From a919cf4b6b499416b6e2247dbc79196c4325f2e6 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Thu, 14 Aug 2008 00:47:16 +0200 Subject: ipvs: Create init functions for estimator code Commit 8ab19ea36c5c5340ff598e4d15fc084eb65671dc ("ipvs: Fix possible deadlock in estimator code") fixed a deadlock condition, but that condition can only happen during unload of IPVS, because during normal operation there is at least our global stats structure in the estimator list. The mod_timer() and del_timer_sync() calls are actually initialization and cleanup code in disguise. Let's make it explicit and move them to their own init and cleanup function. Signed-off-by: Sven Wegener Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7312c3dd309..a25ad243031 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -683,6 +683,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp); /* * IPVS rate estimator prototypes (from ip_vs_est.c) */ +extern int ip_vs_estimator_init(void); +extern void ip_vs_estimator_cleanup(void); extern void ip_vs_new_estimator(struct ip_vs_stats *stats); extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); -- cgit v1.2.3-70-g09d2 From 92ab85354993ac3a364c65cab45745af470ffc67 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 24 Jul 2008 21:02:04 +0300 Subject: mac80211: add ieee80211_queue_stopped) This patch adds ieee80211_queue_stopped that let drivers to query queue status Signed-off-by: Tomas Winkler Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ++++++++++ net/mac80211/util.c | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ff137fd7714..25cc192cbe4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1607,6 +1607,16 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue); */ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue); +/** + * ieee80211_queue_stopped - test status of the queue + * @hw: pointer as obtained from ieee80211_alloc_hw(). + * @queue: queue number (counted from zero). + * + * Drivers should use this function instead of netif_stop_queue. + */ + +int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue); + /** * ieee80211_stop_queues - stop all queues * @hw: pointer as obtained from ieee80211_alloc_hw(). diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0d463c80c40..24400618190 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -386,6 +386,13 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_stop_queues); +int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + return __netif_subqueue_stopped(local->mdev, queue); +} +EXPORT_SYMBOL(ieee80211_queue_stopped); + void ieee80211_wake_queues(struct ieee80211_hw *hw) { int i; -- cgit v1.2.3-70-g09d2 From b4f28bbb9bf0b2c829ecf97ce2173f204fde4f10 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 30 Jul 2008 17:19:55 +0200 Subject: mac80211: add rx status flag for short preamble and use it for the radiotap header Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ net/mac80211/rx.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 25cc192cbe4..dcc05a197dc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -363,6 +363,7 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb) * @RX_FLAG_TSFT: The timestamp passed in the RX status (@mactime field) * is valid. This is useful in monitor mode and necessary for beacon frames * to enable IBSS merging. + * @RX_FLAG_SHORTPRE: Short preamble was used for this frame */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -373,6 +374,7 @@ enum mac80211_rx_flags { RX_FLAG_FAILED_FCS_CRC = 1<<5, RX_FLAG_FAILED_PLCP_CRC = 1<<6, RX_FLAG_TSFT = 1<<7, + RX_FLAG_SHORTPRE = 1<<8 }; /** diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6db85450519..ad47a614a20 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -143,6 +143,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_FLAGS */ if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) *pos |= IEEE80211_RADIOTAP_F_FCS; + if (status->flag & RX_FLAG_SHORTPRE) + *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; pos++; /* IEEE80211_RADIOTAP_RATE */ -- cgit v1.2.3-70-g09d2 From 6b644e524bbd4089a28e0711de4f1cf2daa5db50 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 15 Jul 2008 18:44:12 -0700 Subject: mac80211: remove ieee80211_get_hdrlen All users have been moved over to the version taking a le16 frame control rather than a cpu-endian value. Signed-off-by: Harvey Harrison Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ---------- net/mac80211/util.c | 39 --------------------------------------- 2 files changed, 49 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc05a197dc..0fdc3dabc96 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1558,16 +1558,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif); */ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); -/** - * ieee80211_get_hdrlen - get header length from frame control - * - * This function returns the 802.11 header length in bytes (not including - * encryption headers.) - * - * @fc: the frame control field (in CPU endianness) - */ -int ieee80211_get_hdrlen(u16 fc); - /** * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 24400618190..f40c060341a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -91,45 +91,6 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, return NULL; } -int ieee80211_get_hdrlen(u16 fc) -{ - int hdrlen = 24; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) - hdrlen = 30; /* Addr4 */ - /* - * The QoS Control field is two bytes and its presence is - * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to - * hdrlen if that bit is set. - * This works by masking out the bit and shifting it to - * bit position 1 so the result has the value 0 or 2. - */ - hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) - >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); - break; - case IEEE80211_FTYPE_CTL: - /* - * ACK and CTS are 10 bytes, all others 16. To see how - * to get this condition consider - * subtype mask: 0b0000000011110000 (0x00F0) - * ACK subtype: 0b0000000011010000 (0x00D0) - * CTS subtype: 0b0000000011000000 (0x00C0) - * bits that matter: ^^^ (0x00E0) - * value of those: 0b0000000011000000 (0x00C0) - */ - if ((fc & 0xE0) == 0xC0) - hdrlen = 10; - else - hdrlen = 16; - break; - } - - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen); - unsigned int ieee80211_hdrlen(__le16 fc) { unsigned int hdrlen = 24; -- cgit v1.2.3-70-g09d2 From af01d537463714e36e2c96d2da35902b76cd6827 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 28 Aug 2008 02:53:51 -0700 Subject: net: more #ifdef CONFIG_COMPAT All users of struct proto::compat_[gs]etsockopt and struct inet_connection_sock_af_ops::compat_[gs]etsockopt are under #ifdef already, so use it in structure definition too. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ include/net/sock.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2ff545a56fb..03cffd9f64e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -51,12 +51,14 @@ struct inet_connection_sock_af_ops { char __user *optval, int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, int optname, char __user *optval, int optlen); int (*compat_getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +#endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); int (*bind_conflict)(const struct sock *sk, const struct inet_bind_bucket *tb); diff --git a/include/net/sock.h b/include/net/sock.h index 06c5259aff3..75a312d3888 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -532,6 +532,7 @@ struct proto { int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, int level, int optname, char __user *optval, @@ -540,6 +541,7 @@ struct proto { int level, int optname, char __user *optval, int __user *option); +#endif int (*sendmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct kiocb *iocb, struct sock *sk, -- cgit v1.2.3-70-g09d2 From 9f1ba9062e032fb7b395cd27fc564754fe4e9867 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 7 Aug 2008 20:07:01 +0300 Subject: mac80211/cfg80211: Add BSS configuration options for AP mode This change adds a new cfg80211 command, NL80211_CMD_SET_BSS, to allow AP mode BSS parameters to be changed from user space (e.g., hostapd). The drivers using mac80211 are expected to be modified with separate changes to use the new BSS info parameter for short slot time in the bss_info_changed() handler. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 19 ++++++++++++++++++ include/net/cfg80211.h | 22 +++++++++++++++++++++ include/net/mac80211.h | 9 +++++++++ net/mac80211/cfg.c | 37 +++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2be7c63bc0f..447c02a5190 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -89,6 +89,8 @@ * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC * or, if no MAC address given, all mesh paths, on the interface identified * by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by + * %NL80211_ATTR_IFINDEX. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -127,6 +129,8 @@ enum nl80211_commands { NL80211_CMD_NEW_MPATH, NL80211_CMD_DEL_MPATH, + NL80211_CMD_SET_BSS, + /* add commands here */ /* used to define NL80211_CMD_MAX below */ @@ -134,6 +138,11 @@ enum nl80211_commands { NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new commands by defining them + * here + */ +#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS /** * enum nl80211_attrs - nl80211 netlink attributes @@ -192,6 +201,12 @@ enum nl80211_commands { * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled + * (u8, 0 or 1) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -235,6 +250,10 @@ enum nl80211_attrs { NL80211_ATTR_MPATH_NEXT_HOP, NL80211_ATTR_MPATH_INFO, + NL80211_ATTR_BSS_CTS_PROT, + NL80211_ATTR_BSS_SHORT_PREAMBLE, + NL80211_ATTR_BSS_SHORT_SLOT_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e00750836ba..7afef14d5c5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -268,6 +268,23 @@ struct mpath_info { u8 flags; }; +/** + * struct bss_parameters - BSS parameters + * + * Used to change BSS parameters (mainly for AP mode). + * + * @use_cts_prot: Whether to use CTS protection + * (0 = no, 1 = yes, -1 = do not change) + * @use_short_preamble: Whether the use of short preambles is allowed + * (0 = no, 1 = yes, -1 = do not change) + * @use_short_slot_time: Whether the use of short slot time is allowed + * (0 = no, 1 = yes, -1 = do not change) + */ +struct bss_parameters { + int use_cts_prot; + int use_short_preamble; + int use_short_slot_time; +}; /* from net/wireless.h */ struct wiphy; @@ -318,6 +335,8 @@ struct wiphy; * @change_station: Modify a given station. * * @set_mesh_cfg: set mesh parameters (by now, just mesh id) + * + * @change_bss: Modify parameters for a given BSS. */ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, @@ -370,6 +389,9 @@ struct cfg80211_ops { int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); + + int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, + struct bss_parameters *params); }; #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0fdc3dabc96..7c399a9c11d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -158,12 +158,14 @@ struct ieee80211_low_level_stats { * also implies a change in the AID. * @BSS_CHANGED_ERP_CTS_PROT: CTS protection changed * @BSS_CHANGED_ERP_PREAMBLE: preamble changed + * @BSS_CHANGED_ERP_SLOT: slot timing changed * @BSS_CHANGED_HT: 802.11n parameters changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, BSS_CHANGED_ERP_CTS_PROT = 1<<1, BSS_CHANGED_ERP_PREAMBLE = 1<<2, + BSS_CHANGED_ERP_SLOT = 1<<3, BSS_CHANGED_HT = 1<<4, }; @@ -177,6 +179,7 @@ enum ieee80211_bss_change { * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection * @use_short_preamble: use 802.11b short preamble + * @use_short_slot: use short slot time (only relevant for ERP) * @dtim_period: num of beacons before the next DTIM, for PSM * @timestamp: beacon timestamp * @beacon_int: beacon interval @@ -192,6 +195,7 @@ struct ieee80211_bss_conf { /* erp related data */ bool use_cts_prot; bool use_short_preamble; + bool use_short_slot; u8 dtim_period; u16 beacon_int; u16 assoc_capability; @@ -420,6 +424,11 @@ struct ieee80211_rx_status { * @IEEE80211_CONF_PS: Enable 802.11 power save mode */ enum ieee80211_conf_flags { + /* + * TODO: IEEE80211_CONF_SHORT_SLOT_TIME will be removed once drivers + * have been converted to use bss_info_changed() for slot time + * configuration + */ IEEE80211_CONF_SHORT_SLOT_TIME = (1<<0), IEEE80211_CONF_RADIOTAP = (1<<1), IEEE80211_CONF_SUPPORT_HT_MODE = (1<<2), diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6d2ad2bf3ab..2b19532f4c8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1010,6 +1010,42 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, } #endif +static int ieee80211_change_bss(struct wiphy *wiphy, + struct net_device *dev, + struct bss_parameters *params) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata; + u32 changed = 0; + + if (dev == local->mdev) + return -EOPNOTSUPP; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + return -EINVAL; + + if (params->use_cts_prot >= 0) { + sdata->bss_conf.use_cts_prot = params->use_cts_prot; + changed |= BSS_CHANGED_ERP_CTS_PROT; + } + if (params->use_short_preamble >= 0) { + sdata->bss_conf.use_short_preamble = + params->use_short_preamble; + changed |= BSS_CHANGED_ERP_PREAMBLE; + } + if (params->use_short_slot_time >= 0) { + sdata->bss_conf.use_short_slot = + params->use_short_slot_time; + changed |= BSS_CHANGED_ERP_SLOT; + } + + ieee80211_bss_info_change_notify(sdata, changed); + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1033,4 +1069,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, #endif + .change_bss = ieee80211_change_bss, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 59eb2cf42e5..47542ee01c5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -87,6 +87,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + + [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, }; /* message building helper */ @@ -1525,6 +1529,48 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + struct bss_parameters params; + + memset(¶ms, 0, sizeof(params)); + /* default to not changing parameters */ + params.use_cts_prot = -1; + params.use_short_preamble = -1; + params.use_short_slot_time = -1; + + if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) + params.use_cts_prot = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_CTS_PROT]); + if (info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]) + params.use_short_preamble = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]); + if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) + params.use_short_slot_time = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + if (!drv->ops->change_bss) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->change_bss(&drv->wiphy, dev, ¶ms); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -1656,6 +1702,12 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_BSS, + .doit = nl80211_set_bss, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ -- cgit v1.2.3-70-g09d2 From 36aedc903ea11a4188de0a118d26c9f20afdd272 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 25 Aug 2008 11:58:58 +0300 Subject: mac80211/cfg80211: HT capabilities for NEW_STA Allow userspace (e.g., hostapd) to set HT capabilities for associated STAs. This is based on a patch from Zhu Yi (only the NL80211_ATTR_HT_CAPABILITY for NEW_STA part is included here). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ include/net/cfg80211.h | 1 + net/mac80211/cfg.c | 5 +++++ net/wireless/nl80211.c | 10 ++++++++++ 4 files changed, 28 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 447c02a5190..0c1147de3ec 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -207,6 +207,9 @@ enum nl80211_commands { * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) * + * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -254,16 +257,25 @@ enum nl80211_attrs { NL80211_ATTR_BSS_SHORT_PREAMBLE, NL80211_ATTR_BSS_SHORT_SLOT_TIME, + NL80211_ATTR_HT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; +/* + * Allow user space programs to use #ifdef on new attributes by defining them + * here + */ +#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY + #define NL80211_MAX_SUPP_RATES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 +#define NL80211_HT_CAPABILITY_LEN 26 /** * enum nl80211_iftype - (virtual) interface types diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7afef14d5c5..0a72d1e3d3a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -152,6 +152,7 @@ struct station_parameters { u16 aid; u8 supported_rates_len; u8 plink_action; + struct ieee80211_ht_cap *ht_capa; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2b19532f4c8..928813ce08e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -674,6 +674,11 @@ static void sta_apply_parameters(struct ieee80211_local *local, sta->supp_rates[local->oper_channel->band] = rates; } + if (params->ht_capa) { + ieee80211_ht_cap_ie_to_ht_info(params->ht_capa, + &sta->ht_info); + } + if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { switch (params->plink_action) { case PLINK_ACTION_OPEN: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 47542ee01c5..4d6c02afd6f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -91,6 +91,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + + [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, + .len = NL80211_HT_CAPABILITY_LEN }, }; /* message building helper */ @@ -1129,6 +1132,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params.ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) return -EINVAL; @@ -1192,6 +1199,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params.ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) -- cgit v1.2.3-70-g09d2 From 2c10b32bf57db7ec6d4cca4c4aa3d86bacb01c8a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 2 Sep 2008 17:30:27 -0700 Subject: netlink: Remove compat API for nested attributes Removes all _nested_compat() functions from the API. The prio qdisc no longer requires them and netem has its own format anyway. Their existance is only confusing. Resend: Also remove the wrapper macro. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 82 --------------------------------------------------- net/sched/sch_netem.c | 18 +++++++++-- net/sched/sch_prio.c | 6 +--- 3 files changed, 17 insertions(+), 89 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 18024b8cecb..76c43ff38f6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -119,9 +119,6 @@ * Nested Attributes Construction: * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute - * nla_nest_compat_start(skb, type, start a nested compat attribute - * len, data) - * nla_nest_compat_end(skb, type) finalize a nested compat attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction * * Attribute Length Calculations: @@ -156,7 +153,6 @@ * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets - * nla_parse_nested_compat() parse nested compat attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= @@ -751,39 +747,6 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); } -/** - * nla_parse_nested_compat - parse nested compat attributes - * @tb: destination array with maxtype+1 elements - * @maxtype: maximum attribute type to be expected - * @nla: attribute containing the nested attributes - * @data: pointer to point to contained structure - * @len: length of contained structure - * @policy: validation policy - * - * Parse a nested compat attribute. The compat attribute contains a structure - * and optionally a set of nested attributes. On success the data pointer - * points to the nested data and tb contains the parsed attributes - * (see nla_parse). - */ -static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, - struct nlattr *nla, - const struct nla_policy *policy, - int len) -{ - int nested_len = nla_len(nla) - NLA_ALIGN(len); - - if (nested_len < 0) - return -EINVAL; - if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy); - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - return 0; -} - -#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \ -({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \ - __nla_parse_nested_compat(tb, maxtype, nla, policy, len); }) /** * nla_put_u8 - Add a u8 netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -1030,51 +993,6 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) return skb->len; } -/** - * nla_nest_compat_start - Start a new level of nested compat attributes - * @skb: socket buffer to add attributes to - * @attrtype: attribute type of container - * @attrlen: length of structure - * @data: pointer to structure - * - * Start a nested compat attribute that contains both a structure and - * a set of nested attributes. - * - * Returns the container attribute - */ -static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb, - int attrtype, int attrlen, - const void *data) -{ - struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); - - if (nla_put(skb, attrtype, attrlen, data) < 0) - return NULL; - if (nla_nest_start(skb, attrtype) == NULL) { - nlmsg_trim(skb, start); - return NULL; - } - return start; -} - -/** - * nla_nest_compat_end - Finalize nesting of compat attributes - * @skb: socket buffer the attributes are stored in - * @start: container attribute - * - * Corrects the container attribute header to include the all - * appeneded attributes. - * - * Returns the total data length of the skb. - */ -static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) -{ - struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); - - start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; - return nla_nest_end(skb, nest); -} - /** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3781e55046d..a11959908d9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -388,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, }; +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len < 0) + return -EINVAL; + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt) { @@ -399,8 +413,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy, - qopt, sizeof(*qopt)); + qopt = nla_data(opt); + ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); if (ret < 0) return ret; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index a6697c686c7..504a78cdb71 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -254,16 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nest; struct tc_prio_qopt opt; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt); - if (nest == NULL) - goto nla_put_failure; - nla_nest_compat_end(skb, nest); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; -- cgit v1.2.3-70-g09d2 From 6224877b2ca4be5de96270a8ae490fe2ba11b0e0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates the code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker --- include/net/tcp.h | 15 +++++++++++++++ net/dccp/ccids/ccid2.c | 8 ++------ net/ipv4/tcp_input.c | 17 ++--------------- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8983386356a..6bc4b8148ca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -782,6 +782,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC3390 larger initial windows into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 bytes) +{ + return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index c539f79ab8e..fa713227c66 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -596,12 +596,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ hctx->ssthresh = ~0U; - /* - * RFC 4341, 5: "The cwnd parameter is initialized to at most four - * packets for new connections, following the rules from [RFC3390]". - * We need to convert the bytes of RFC3390 into the packets of RFC 4341. - */ - hctx->cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + /* Use larger initial windows (RFC 3390, rfc2581bis) */ + hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hctx->cwnd, 2); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96b..16d0040de34 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -811,25 +811,12 @@ void tcp_update_metrics(struct sock *sk) } } -/* Numbers are taken from RFC3390. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than use a multiplier in the relevant range. - */ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); - if (!cwnd) { - if (tp->mss_cache > 1460) - cwnd = 2; - else - cwnd = (tp->mss_cache > 1095) ? 3 : 4; - } + if (!cwnd) + cwnd = rfc3390_bytes_to_packets(tp->mss_cache); return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -- cgit v1.2.3-70-g09d2 From e7ade46a53055c19a01c8becbe7807f9075d6fee Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:33 +0200 Subject: IPVS: Change IPVS data structures to support IPv6 addresses Introduce new 'af' fields into IPVS data structures for specifying an entry's address family. Convert IP addresses to be of type union nf_inet_addr. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 18 ++++++++----- net/ipv4/ipvs/ip_vs_conn.c | 60 ++++++++++++++++++++--------------------- net/ipv4/ipvs/ip_vs_core.c | 28 +++++++++---------- net/ipv4/ipvs/ip_vs_ctl.c | 37 ++++++++++++------------- net/ipv4/ipvs/ip_vs_dh.c | 2 +- net/ipv4/ipvs/ip_vs_ftp.c | 18 ++++++------- net/ipv4/ipvs/ip_vs_lblc.c | 4 +-- net/ipv4/ipvs/ip_vs_lblcr.c | 8 +++--- net/ipv4/ipvs/ip_vs_lc.c | 2 +- net/ipv4/ipvs/ip_vs_nq.c | 2 +- net/ipv4/ipvs/ip_vs_proto_tcp.c | 16 +++++------ net/ipv4/ipvs/ip_vs_proto_udp.c | 12 ++++----- net/ipv4/ipvs/ip_vs_rr.c | 2 +- net/ipv4/ipvs/ip_vs_sed.c | 2 +- net/ipv4/ipvs/ip_vs_sh.c | 2 +- net/ipv4/ipvs/ip_vs_sync.c | 6 ++--- net/ipv4/ipvs/ip_vs_wlc.c | 2 +- net/ipv4/ipvs/ip_vs_wrr.c | 2 +- net/ipv4/ipvs/ip_vs_xmit.c | 12 ++++----- 19 files changed, 121 insertions(+), 114 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a25ad243031..d3282558550 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -21,6 +21,9 @@ #include #include +#include /* for union nf_inet_addr */ +#include /* for struct ipv6hdr */ +#include /* for ipv6_addr_copy */ #ifdef CONFIG_IP_VS_DEBUG #include @@ -259,9 +262,10 @@ struct ip_vs_conn { struct list_head c_list; /* hashed list heads */ /* Protocol, addresses and port numbers */ - __be32 caddr; /* client address */ - __be32 vaddr; /* virtual address */ - __be32 daddr; /* destination address */ + u16 af; /* address family */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + union nf_inet_addr daddr; /* destination address */ __be16 cport; __be16 vport; __be16 dport; @@ -314,8 +318,9 @@ struct ip_vs_service { atomic_t refcnt; /* reference counter */ atomic_t usecnt; /* use counter */ + u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ - __be32 addr; /* IP address for virtual service */ + union nf_inet_addr addr; /* IP address for virtual service */ __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ unsigned flags; /* service status flags */ @@ -342,7 +347,8 @@ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ struct list_head d_list; /* for table with all the dests */ - __be32 addr; /* IP address of the server */ + u16 af; /* address family */ + union nf_inet_addr addr; /* IP address of the server */ __be16 port; /* port number of the server */ volatile unsigned flags; /* dest status flags */ atomic_t conn_flags; /* flags to copy to conn */ @@ -366,7 +372,7 @@ struct ip_vs_dest { /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ - __be32 vaddr; /* virtual IP address */ + union nf_inet_addr vaddr; /* virtual IP address */ __be16 vport; /* virtual port number */ __u32 vfwmark; /* firewall mark of service */ }; diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 44a6872dc24..639d4bc7fc1 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -131,7 +131,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) int ret; /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport); ct_write_lock(hash); @@ -162,7 +162,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) int ret; /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport); ct_write_lock(hash); @@ -197,10 +197,10 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (s_addr == cp->caddr.ip && s_port == cp->cport && + d_port == cp->vport && d_addr == cp->vaddr.ip && ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -243,10 +243,10 @@ struct ip_vs_conn *ip_vs_ct_in_get ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (s_addr == cp->caddr.ip && s_port == cp->cport && + d_port == cp->vport && d_addr == cp->vaddr.ip && cp->flags & IP_VS_CONN_F_TEMPLATE && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); goto out; @@ -286,8 +286,8 @@ struct ip_vs_conn *ip_vs_conn_out_get ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (d_addr == cp->caddr && d_port == cp->cport && - s_port == cp->dport && s_addr == cp->daddr && + if (d_addr == cp->caddr.ip && d_port == cp->cport && + s_port == cp->dport && s_addr == cp->daddr.ip && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -406,9 +406,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " "dest->refcnt:%d\n", ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), + NIPQUAD(cp->caddr.ip), ntohs(cp->cport), + NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), + NIPQUAD(cp->daddr.ip), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, cp->flags, atomic_read(&cp->refcnt), atomic_read(&dest->refcnt)); @@ -444,8 +444,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->daddr, cp->dport, - cp->vaddr, cp->vport, cp->protocol); + dest = ip_vs_find_dest(cp->daddr.ip, cp->dport, + cp->vaddr.ip, cp->vport, cp->protocol); ip_vs_bind_dest(cp, dest); return dest; } else @@ -468,9 +468,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " "dest->refcnt:%d\n", ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), + NIPQUAD(cp->caddr.ip), ntohs(cp->cport), + NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), + NIPQUAD(cp->daddr.ip), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, cp->flags, atomic_read(&cp->refcnt), atomic_read(&dest->refcnt)); @@ -530,9 +530,9 @@ int ip_vs_check_template(struct ip_vs_conn *ct) "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " "-> d:%u.%u.%u.%u:%d\n", ip_vs_proto_name(ct->protocol), - NIPQUAD(ct->caddr), ntohs(ct->cport), - NIPQUAD(ct->vaddr), ntohs(ct->vport), - NIPQUAD(ct->daddr), ntohs(ct->dport)); + NIPQUAD(ct->caddr.ip), ntohs(ct->cport), + NIPQUAD(ct->vaddr.ip), ntohs(ct->vport), + NIPQUAD(ct->daddr.ip), ntohs(ct->dport)); /* * Invalidate the connection template @@ -641,11 +641,11 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); cp->protocol = proto; - cp->caddr = caddr; + cp->caddr.ip = caddr; cp->cport = cport; - cp->vaddr = vaddr; + cp->vaddr.ip = vaddr; cp->vport = vport; - cp->daddr = daddr; + cp->daddr.ip = daddr; cp->dport = dport; cp->flags = flags; spin_lock_init(&cp->lock); @@ -763,9 +763,9 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n", ip_vs_proto_name(cp->protocol), - ntohl(cp->caddr), ntohs(cp->cport), - ntohl(cp->vaddr), ntohs(cp->vport), - ntohl(cp->daddr), ntohs(cp->dport), + ntohl(cp->caddr.ip), ntohs(cp->cport), + ntohl(cp->vaddr.ip), ntohs(cp->vport), + ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), (cp->timer.expires-jiffies)/HZ); } @@ -812,9 +812,9 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), - ntohl(cp->caddr), ntohs(cp->cport), - ntohl(cp->vaddr), ntohs(cp->vport), - ntohl(cp->daddr), ntohs(cp->dport), + ntohl(cp->caddr.ip), ntohs(cp->cport), + ntohl(cp->vaddr.ip), ntohs(cp->vport), + ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), ip_vs_origin_name(cp->flags), (cp->timer.expires-jiffies)/HZ); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 9fbf0a6d739..4a54f33b60d 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -232,14 +232,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, snet, 0, iph->daddr, ports[1], - dest->addr, dest->port, + dest->addr.ip, dest->port, IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, - dest->addr, 0, + dest->addr.ip, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -286,14 +286,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, ct = ip_vs_conn_new(IPPROTO_IP, snet, 0, htonl(svc->fwmark), 0, - dest->addr, 0, + dest->addr.ip, 0, IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(iph->protocol, snet, 0, iph->daddr, 0, - dest->addr, 0, + dest->addr.ip, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -313,7 +313,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, cp = ip_vs_conn_new(iph->protocol, iph->saddr, ports[0], iph->daddr, ports[1], - dest->addr, dport, + dest->addr.ip, dport, 0, dest); if (cp == NULL) { @@ -380,7 +380,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) cp = ip_vs_conn_new(iph->protocol, iph->saddr, pptr[0], iph->daddr, pptr[1], - dest->addr, dest->port?dest->port:pptr[1], + dest->addr.ip, dest->port ? dest->port : pptr[1], 0, dest); if (cp == NULL) @@ -389,9 +389,9 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), + NIPQUAD(cp->caddr.ip), ntohs(cp->cport), + NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), + NIPQUAD(cp->daddr.ip), ntohs(cp->dport), cp->flags, atomic_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); @@ -526,14 +526,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct iphdr *ciph = (struct iphdr *)(icmph + 1); if (inout) { - iph->saddr = cp->vaddr; + iph->saddr = cp->vaddr.ip; ip_send_check(iph); - ciph->daddr = cp->vaddr; + ciph->daddr = cp->vaddr.ip; ip_send_check(ciph); } else { - iph->daddr = cp->daddr; + iph->daddr = cp->daddr.ip; ip_send_check(iph); - ciph->saddr = cp->daddr; + ciph->saddr = cp->daddr.ip; ip_send_check(ciph); } @@ -762,7 +762,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, /* mangle the packet */ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) goto drop; - ip_hdr(skb)->saddr = cp->vaddr; + ip_hdr(skb)->saddr = cp->vaddr.ip; ip_send_check(ip_hdr(skb)); /* For policy routing, packets originating from this diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index ede101eeec1..3f2277b847d 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -317,7 +317,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* * Hash it by in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); + hash = ip_vs_svc_hashkey(svc->protocol, svc->addr.ip, + svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* @@ -373,7 +374,7 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) hash = ip_vs_svc_hashkey(protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ - if ((svc->addr == vaddr) + if ((svc->addr.ip == vaddr) && (svc->port == vport) && (svc->protocol == protocol)) { /* HIT */ @@ -503,7 +504,7 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) * Hash by proto,addr,port, * which are the parameters of the real service. */ - hash = ip_vs_rs_hashkey(dest->addr, dest->port); + hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port); list_add(&dest->d_list, &ip_vs_rtable[hash]); return 1; @@ -543,7 +544,7 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) read_lock(&__ip_vs_rs_lock); list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { - if ((dest->addr == daddr) + if ((dest->addr.ip == daddr) && (dest->port == dport) && ((dest->protocol == protocol) || dest->vfwmark)) { @@ -569,7 +570,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination for the given service */ list_for_each_entry(dest, &svc->destinations, n_list) { - if ((dest->addr == daddr) && (dest->port == dport)) { + if ((dest->addr.ip == daddr) && (dest->port == dport)) { /* HIT */ return dest; } @@ -626,14 +627,14 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port), + NIPQUAD(dest->addr.ip), ntohs(dest->port), atomic_read(&dest->refcnt)); - if (dest->addr == daddr && + if (dest->addr.ip == daddr && dest->port == dport && dest->vfwmark == svc->fwmark && dest->protocol == svc->protocol && (svc->fwmark || - (dest->vaddr == svc->addr && + (dest->vaddr.ip == svc->addr.ip && dest->vport == svc->port))) { /* HIT */ return dest; @@ -646,7 +647,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " "from trash\n", dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port)); + NIPQUAD(dest->addr.ip), ntohs(dest->port)); list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -779,10 +780,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, } dest->protocol = svc->protocol; - dest->vaddr = svc->addr; + dest->vaddr.ip = svc->addr.ip; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - dest->addr = udest->addr; + dest->addr.ip = udest->addr; dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -847,7 +848,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) NIPQUAD(daddr), ntohs(dport), atomic_read(&dest->refcnt), dest->vfwmark, - NIPQUAD(dest->vaddr), + NIPQUAD(dest->vaddr.ip), ntohs(dest->vport)); __ip_vs_update_dest(svc, dest, udest); @@ -993,7 +994,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) } else { IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " "dest->refcnt=%d\n", - NIPQUAD(dest->addr), ntohs(dest->port), + NIPQUAD(dest->addr.ip), ntohs(dest->port), atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ip_vs_dest_trash); atomic_inc(&dest->refcnt); @@ -1101,7 +1102,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_set(&svc->refcnt, 0); svc->protocol = u->protocol; - svc->addr = u->addr; + svc->addr.ip = u->addr; svc->port = u->port; svc->fwmark = u->fwmark; svc->flags = u->flags; @@ -1751,7 +1752,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (iter->table == ip_vs_svc_table) seq_printf(seq, "%s %08X:%04X %s ", ip_vs_proto_name(svc->protocol), - ntohl(svc->addr), + ntohl(svc->addr.ip), ntohs(svc->port), svc->scheduler->name); else @@ -1768,7 +1769,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) list_for_each_entry(dest, &svc->destinations, n_list) { seq_printf(seq, " -> %08X:%04X %-7s %-6d %-10d %-10d\n", - ntohl(dest->addr), ntohs(dest->port), + ntohl(dest->addr.ip), ntohs(dest->port), ip_vs_fwd_name(atomic_read(&dest->conn_flags)), atomic_read(&dest->weight), atomic_read(&dest->activeconns), @@ -2040,7 +2041,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; - dst->addr = src->addr; + dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); @@ -2114,7 +2115,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, if (count >= get->num_dests) break; - entry.addr = dest->addr; + entry.addr = dest->addr.ip; entry.port = dest->port; entry.conn_flags = atomic_read(&dest->conn_flags); entry.weight = atomic_read(&dest->weight); diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index fa66824d264..9f9d795dbd7 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->daddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index c1c758e4f73..bfe5d7050a5 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -172,17 +172,17 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " "%u.%u.%u.%u:%d detected\n", - NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0); + NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr.ip), 0); /* * Now update or create an connection entry for it */ n_cp = ip_vs_conn_out_get(iph->protocol, from, port, - cp->caddr, 0); + cp->caddr.ip, 0); if (!n_cp) { n_cp = ip_vs_conn_new(IPPROTO_TCP, - cp->caddr, 0, - cp->vaddr, port, + cp->caddr.ip, 0, + cp->vaddr.ip, port, from, port, IP_VS_CONN_F_NO_CPORT, cp->dest); @@ -196,7 +196,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Replace the old passive address with the new one */ - from = n_cp->vaddr; + from = n_cp->vaddr.ip; port = n_cp->vport; sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), (ntohs(port)>>8)&255, ntohs(port)&255); @@ -306,16 +306,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", ip_vs_proto_name(iph->protocol), - NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0); + NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); n_cp = ip_vs_conn_in_get(iph->protocol, to, port, - cp->vaddr, htons(ntohs(cp->vport)-1)); + cp->vaddr.ip, htons(ntohs(cp->vport)-1)); if (!n_cp) { n_cp = ip_vs_conn_new(IPPROTO_TCP, to, port, - cp->vaddr, htons(ntohs(cp->vport)-1), - cp->daddr, htons(ntohs(cp->dport)-1), + cp->vaddr.ip, htons(ntohs(cp->vport)-1), + cp->daddr.ip, htons(ntohs(cp->dport)-1), 0, cp->dest); if (!n_cp) diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index d2a43aa3fe4..69309edc0c4 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -422,7 +422,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -506,7 +506,7 @@ out: IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->daddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 375a1ffb6b6..51c746e2083 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -250,7 +250,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(most->addr), ntohs(most->port), + NIPQUAD(most->addr.ip), ntohs(most->port), atomic_read(&most->activeconns), atomic_read(&most->refcnt), atomic_read(&most->weight), moh); @@ -598,7 +598,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -706,7 +706,7 @@ out: IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->daddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index 2c3de1b6351..551d293347f 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -68,7 +68,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (least) IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->inactconns)); diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index 5330d5a2de1..aa0e32ad348 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -101,7 +101,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) out: IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index d0ea467986a..15860e1441b 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -147,7 +147,7 @@ tcp_snat_handler(struct sk_buff *skb, /* Adjust TCP checksums */ if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, + tcp_fast_csum_update(tcph, cp->daddr.ip, cp->vaddr.ip, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -155,7 +155,7 @@ tcp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, + tcph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, skb->len - tcphoff, cp->protocol, skb->csum); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -198,7 +198,7 @@ tcp_dnat_handler(struct sk_buff *skb, */ if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, + tcp_fast_csum_update(tcph, cp->vaddr.ip, cp->daddr.ip, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -206,7 +206,7 @@ tcp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, + tcph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, skb->len - tcphoff, cp->protocol, skb->csum); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -427,8 +427,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, th->fin? 'F' : '.', th->ack? 'A' : '.', th->rst? 'R' : '.', - NIPQUAD(cp->daddr), ntohs(cp->dport), - NIPQUAD(cp->caddr), ntohs(cp->cport), + NIPQUAD(cp->daddr.ip), ntohs(cp->dport), + NIPQUAD(cp->caddr.ip), ntohs(cp->cport), tcp_state_name(cp->state), tcp_state_name(new_state), atomic_read(&cp->refcnt)); @@ -549,8 +549,8 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" "%u.%u.%u.%u:%u to app %s on port %u\n", __func__, - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), + NIPQUAD(cp->caddr.ip), ntohs(cp->cport), + NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), inc->name, ntohs(inc->port)); cp->app = inc; if (inc->init_conn) diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index c6be5d56823..8dfad5db829 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -158,7 +158,7 @@ udp_snat_handler(struct sk_buff *skb, */ if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->daddr, cp->vaddr, + udp_fast_csum_update(udph, cp->daddr.ip, cp->vaddr.ip, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -166,7 +166,7 @@ udp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, + udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) @@ -211,7 +211,7 @@ udp_dnat_handler(struct sk_buff *skb, */ if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->vaddr, cp->daddr, + udp_fast_csum_update(udph, cp->vaddr.ip, cp->daddr.ip, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -219,7 +219,7 @@ udp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, + udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, skb->len - udphoff, cp->protocol, skb->csum); if (udph->check == 0) @@ -343,8 +343,8 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" "%u.%u.%u.%u:%u to app %s on port %u\n", __func__, - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), + NIPQUAD(cp->caddr.ip), ntohs(cp->cport), + NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), inc->name, ntohs(inc->port)); cp->app = inc; if (inc->init_conn) diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index f7492911753..27f0b624283 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -76,7 +76,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) write_unlock(&svc->sched_lock); IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr), ntohs(dest->port), + NIPQUAD(dest->addr.ip), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 53f73bea66c..38b574b2fdf 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -103,7 +103,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 7b979e22805..c9e54e2ec29 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->saddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index a652da2c320..2cf47b2e166 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -256,9 +256,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) s->cport = cp->cport; s->vport = cp->vport; s->dport = cp->dport; - s->caddr = cp->caddr; - s->vaddr = cp->vaddr; - s->daddr = cp->daddr; + s->caddr = cp->caddr.ip; + s->vaddr = cp->vaddr.ip; + s->daddr = cp->daddr.ip; s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); s->state = htons(cp->state); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index df7ad8d7476..09fd993040f 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -91,7 +91,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 0d86a79b87b..19c49b234f3 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -197,7 +197,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr), ntohs(dest->port), + NIPQUAD(dest->addr.ip), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 9892d4aca42..88199c9f2d3 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -71,7 +71,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = dest->addr, + .daddr = dest->addr.ip, .saddr = 0, .tos = rtos, } }, }; @@ -80,12 +80,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, " "dest: %u.%u.%u.%u\n", - NIPQUAD(dest->addr)); + NIPQUAD(dest->addr.ip)); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), atomic_read(&rt->u.dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); @@ -94,14 +94,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = cp->daddr, + .daddr = cp->daddr.ip, .saddr = 0, .tos = rtos, } }, }; if (ip_route_output_key(&init_net, &rt, &fl)) { IP_VS_DBG_RL("ip_route_output error, dest: " - "%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); + "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip)); return NULL; } } @@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; - ip_hdr(skb)->daddr = cp->daddr; + ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); -- cgit v1.2.3-70-g09d2 From 64aae3cb9fd22f33e491c4730d363eb2229ef910 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:34 +0200 Subject: IPVS: Add general v4/v6 helper functions / data structures Add a struct ip_vs_iphdr for easier handling of common v4 and v6 header fields in the same code path. ip_vs_fill_iphdr() helps to fill this struct from an IPv4 or IPv6 header. Add further helper functions for copying and comparing addresses. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d3282558550..5d6313d972f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,6 +25,55 @@ #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ +struct ip_vs_iphdr { + int len; + __u8 protocol; + union nf_inet_addr saddr; + union nf_inet_addr daddr; +}; + +static inline void +ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + const struct ipv6hdr *iph = nh; + iphdr->len = sizeof(struct ipv6hdr); + iphdr->protocol = iph->nexthdr; + ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr); + ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr); + } else +#endif + { + const struct iphdr *iph = nh; + iphdr->len = iph->ihl * 4; + iphdr->protocol = iph->protocol; + iphdr->saddr.ip = iph->saddr; + iphdr->daddr.ip = iph->daddr; + } +} + +static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ipv6_addr_copy(&dst->in6, &src->in6); + else +#endif + dst->ip = src->ip; +} + +static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, + const union nf_inet_addr *b) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return ipv6_addr_equal(&a->in6, &b->in6); +#endif + return a->ip == b->ip; +} + #ifdef CONFIG_IP_VS_DEBUG #include -- cgit v1.2.3-70-g09d2 From c842a3ada9ba8f0cca38a70de3fe0effcfab254c Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:35 +0200 Subject: IPVS: Add debug macros for v4 and v6 address output Add some debugging macros that allow conditional output of either v4 or v6 addresses, depending on an 'af' parameter. This is done by creating a temporary string buffer in an outer debug macro and writing addresses' string representations into it from another macro which can only be used when inside the outer one. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 5d6313d972f..0400e590b6a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -78,6 +78,46 @@ static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, #include extern int ip_vs_get_debug_level(void); + +static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, + const union nf_inet_addr *addr, + int *idx) +{ + int len; +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]", + NIP6(addr->in6)) + 1; + else +#endif + len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, + NIPQUAD(addr->ip)) + 1; + + *idx += len; + BUG_ON(*idx > buf_len + 1); + return &buf[*idx - len]; +} + +#define IP_VS_DBG_BUF(level, msg...) \ + do { \ + char ip_vs_dbg_buf[160]; \ + int ip_vs_dbg_idx = 0; \ + if (level <= ip_vs_get_debug_level()) \ + printk(KERN_DEBUG "IPVS: " msg); \ + } while (0) +#define IP_VS_ERR_BUF(msg...) \ + do { \ + char ip_vs_dbg_buf[160]; \ + int ip_vs_dbg_idx = 0; \ + printk(KERN_ERR "IPVS: " msg); \ + } while (0) + +/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */ +#define IP_VS_DBG_ADDR(af, addr) \ + ip_vs_dbg_addr(af, ip_vs_dbg_buf, \ + sizeof(ip_vs_dbg_buf), addr, \ + &ip_vs_dbg_idx) + #define IP_VS_DBG(level, msg...) \ do { \ if (level <= ip_vs_get_debug_level()) \ @@ -100,6 +140,8 @@ extern int ip_vs_get_debug_level(void); pp->debug_packet(pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ +#define IP_VS_DBG_BUF(level, msg...) do {} while (0) +#define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) #define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -- cgit v1.2.3-70-g09d2 From c860c6b1479992440e4962e9c95d258bfdce4fca Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:36 +0200 Subject: IPVS: Add internal versions of sockopt interface structs Add extended internal versions of struct ip_vs_service_user and struct ip_vs_dest_user (the originals can't be modified as they are part of the old sockopt interface). Adjust ip_vs_ctl.c to work with the new data structures and add some minor AF-awareness. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 39 +++++++++++++ net/ipv4/ipvs/ip_vs_ctl.c | 138 ++++++++++++++++++++++++++++++---------------- 2 files changed, 129 insertions(+), 48 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0400e590b6a..1adf8a026e4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -399,6 +399,45 @@ struct ip_vs_conn { }; +/* + * Extended internal versions of struct ip_vs_service_user and + * ip_vs_dest_user for IPv6 support. + * + * We need these to conveniently pass around service and destination + * options, but unfortunately, we also need to keep the old definitions to + * maintain userspace backwards compatibility for the setsockopt interface. + */ +struct ip_vs_service_user_kern { + /* virtual service addresses */ + u16 af; + u16 protocol; + union nf_inet_addr addr; /* virtual ip address */ + u16 port; + u32 fwmark; /* firwall mark of service */ + + /* virtual service options */ + char *sched_name; + unsigned flags; /* virtual service flags */ + unsigned timeout; /* persistent timeout in sec */ + u32 netmask; /* persistent netmask */ +}; + + +struct ip_vs_dest_user_kern { + /* destination server address */ + union nf_inet_addr addr; + u16 port; + + /* real server options */ + unsigned conn_flags; /* connection flags */ + int weight; /* destination weight */ + + /* thresholds for active connections */ + u32 u_threshold; /* upper threshold */ + u32 l_threshold; /* lower threshold */ +}; + + /* * The information about the virtual service offered to the net * and the forwarding entries diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 3f2277b847d..a0c8b7bb553 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -708,7 +708,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) */ static void __ip_vs_update_dest(struct ip_vs_service *svc, - struct ip_vs_dest *dest, struct ip_vs_dest_user *udest) + struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) { int conn_flags; @@ -717,7 +717,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ - if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { + if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | IP_VS_CONN_F_LOCALNODE; } @@ -761,7 +761,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, * Create a destination for the given service */ static int -ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, +ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; @@ -769,7 +769,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, EnterFunction(2); - atype = inet_addr_type(&init_net, udest->addr); + atype = inet_addr_type(&init_net, udest->addr.ip); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; @@ -779,11 +779,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, return -ENOMEM; } + dest->af = svc->af; dest->protocol = svc->protocol; - dest->vaddr.ip = svc->addr.ip; + dest->vaddr = svc->addr; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - dest->addr.ip = udest->addr; + ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -808,10 +809,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, * Add a destination into an existing service */ static int -ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; int ret; @@ -828,10 +829,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Check if the dest already exists in the list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, daddr.ip, dport); if (dest != NULL) { IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); return -EEXIST; @@ -841,7 +844,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Check if the dest already exists in the trash and * is from the same service */ - dest = ip_vs_trash_get_dest(svc, daddr, dport); + dest = ip_vs_trash_get_dest(svc, daddr.ip, dport); if (dest != NULL) { IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", @@ -916,10 +919,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Edit a destination in the given service */ static int -ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; EnterFunction(2); @@ -935,10 +938,12 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Lookup the destination list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, daddr.ip, dport); if (dest == NULL) { IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); return -ENOENT; @@ -1029,15 +1034,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, * Delete a destination server in the given service */ static int -ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) +ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; __be16 dport = udest->port; EnterFunction(2); - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); return -ENOENT; @@ -1072,7 +1077,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) +ip_vs_add_service(struct ip_vs_service_user_kern *u, + struct ip_vs_service **svc_p) { int ret = 0; struct ip_vs_scheduler *sched = NULL; @@ -1101,8 +1107,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_set(&svc->usecnt, 1); atomic_set(&svc->refcnt, 0); + svc->af = u->af; svc->protocol = u->protocol; - svc->addr.ip = u->addr; + ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; svc->flags = u->flags; @@ -1161,7 +1168,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) * Edit a service and bind it with a new scheduler */ static int -ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) +ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { struct ip_vs_scheduler *sched, *old_sched; int ret = 0; @@ -1905,14 +1912,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, }; +static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, + struct ip_vs_service_user *usvc_compat) +{ + usvc->af = AF_INET; + usvc->protocol = usvc_compat->protocol; + usvc->addr.ip = usvc_compat->addr; + usvc->port = usvc_compat->port; + usvc->fwmark = usvc_compat->fwmark; + + /* Deep copy of sched_name is not needed here */ + usvc->sched_name = usvc_compat->sched_name; + + usvc->flags = usvc_compat->flags; + usvc->timeout = usvc_compat->timeout; + usvc->netmask = usvc_compat->netmask; +} + +static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, + struct ip_vs_dest_user *udest_compat) +{ + udest->addr.ip = udest_compat->addr; + udest->port = udest_compat->port; + udest->conn_flags = udest_compat->conn_flags; + udest->weight = udest_compat->weight; + udest->u_threshold = udest_compat->u_threshold; + udest->l_threshold = udest_compat->l_threshold; +} + static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; unsigned char arg[MAX_ARG_LEN]; - struct ip_vs_service_user *usvc; + struct ip_vs_service_user *usvc_compat; + struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; - struct ip_vs_dest_user *udest; + struct ip_vs_dest_user *udest_compat; + struct ip_vs_dest_user_kern udest; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1952,35 +1989,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } - usvc = (struct ip_vs_service_user *)arg; - udest = (struct ip_vs_dest_user *)(usvc + 1); + usvc_compat = (struct ip_vs_service_user *)arg; + udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); + + /* We only use the new structs internally, so copy userspace compat + * structs to extended internal versions */ + ip_vs_copy_usvc_compat(&usvc, usvc_compat); + ip_vs_copy_udest_compat(&udest, udest_compat); if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ - if (!usvc->fwmark && !usvc->addr && !usvc->port) { + if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { ret = ip_vs_zero_all(); goto out_unlock; } } /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ - if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) { + if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", - usvc->protocol, NIPQUAD(usvc->addr), - ntohs(usvc->port), usvc->sched_name); + usvc.protocol, NIPQUAD(usvc.addr.ip), + ntohs(usvc.port), usvc.sched_name); ret = -EFAULT; goto out_unlock; } /* Lookup the exact service by or fwmark */ - if (usvc->fwmark == 0) - svc = __ip_vs_service_get(usvc->protocol, - usvc->addr, usvc->port); + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.protocol, + usvc.addr.ip, usvc.port); else - svc = __ip_vs_svc_fwm_get(usvc->fwmark); + svc = __ip_vs_svc_fwm_get(usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD - && (svc == NULL || svc->protocol != usvc->protocol)) { + && (svc == NULL || svc->protocol != usvc.protocol)) { ret = -ESRCH; goto out_unlock; } @@ -1990,10 +2032,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(usvc, &svc); + ret = ip_vs_add_service(&usvc, &svc); break; case IP_VS_SO_SET_EDIT: - ret = ip_vs_edit_service(svc, usvc); + ret = ip_vs_edit_service(svc, &usvc); break; case IP_VS_SO_SET_DEL: ret = ip_vs_del_service(svc); @@ -2004,13 +2046,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = ip_vs_zero_service(svc); break; case IP_VS_SO_SET_ADDDEST: - ret = ip_vs_add_dest(svc, udest); + ret = ip_vs_add_dest(svc, &udest); break; case IP_VS_SO_SET_EDITDEST: - ret = ip_vs_edit_dest(svc, udest); + ret = ip_vs_edit_dest(svc, &udest); break; case IP_VS_SO_SET_DELDEST: - ret = ip_vs_del_dest(svc, udest); + ret = ip_vs_del_dest(svc, &udest); break; default: ret = -EINVAL; @@ -2517,7 +2559,7 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, +static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry) { struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; @@ -2537,6 +2579,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) return -EINVAL; + usvc->af = nla_get_u16(nla_af); /* For now, only support IPv4 */ if (nla_get_u16(nla_af) != AF_INET) return -EAFNOSUPPORT; @@ -2572,7 +2615,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, if (usvc->fwmark) svc = __ip_vs_svc_fwm_get(usvc->fwmark); else - svc = __ip_vs_service_get(usvc->protocol, usvc->addr, + svc = __ip_vs_service_get(usvc->protocol, usvc->addr.ip, usvc->port); if (svc) { usvc->flags = svc->flags; @@ -2583,9 +2626,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, /* set new flags from userland */ usvc->flags = (usvc->flags & ~flags.mask) | (flags.flags & flags.mask); - - strlcpy(usvc->sched_name, nla_data(nla_sched), - sizeof(usvc->sched_name)); + usvc->sched_name = nla_data(nla_sched); usvc->timeout = nla_get_u32(nla_timeout); usvc->netmask = nla_get_u32(nla_netmask); } @@ -2595,7 +2636,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) { - struct ip_vs_service_user usvc; + struct ip_vs_service_user_kern usvc; int ret; ret = ip_vs_genl_parse_service(&usvc, nla, 0); @@ -2605,7 +2646,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) if (usvc.fwmark) return __ip_vs_svc_fwm_get(usvc.fwmark); else - return __ip_vs_service_get(usvc.protocol, usvc.addr, + return __ip_vs_service_get(usvc.protocol, usvc.addr.ip, usvc.port); } @@ -2705,7 +2746,7 @@ out_err: return skb->len; } -static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest, +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, struct nlattr *nla, int full_entry) { struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; @@ -2861,8 +2902,8 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc = NULL; - struct ip_vs_service_user usvc; - struct ip_vs_dest_user udest; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; @@ -2914,7 +2955,8 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) /* Lookup the exact service by or fwmark */ if (usvc.fwmark == 0) - svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port); + svc = __ip_vs_service_get(usvc.protocol, usvc.addr.ip, + usvc.port); else svc = __ip_vs_svc_fwm_get(usvc.fwmark); -- cgit v1.2.3-70-g09d2 From 3c2e0505d25cdc9425336f167fd4ff5f505aecff Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:38 +0200 Subject: IPVS: Add v6 support to ip_vs_service_get() Add support for selecting services based on their address family to ip_vs_service_get() and adjust the callers. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 ++- net/ipv4/ipvs/ip_vs_ctl.c | 24 +++++++++++++----------- net/ipv4/ipvs/ip_vs_proto_tcp.c | 9 ++++++--- net/ipv4/ipvs/ip_vs_proto_udp.c | 11 +++++++---- 4 files changed, 28 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1adf8a026e4..30baed0e696 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -783,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; extern struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport); +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport); static inline void ip_vs_service_put(struct ip_vs_service *svc) { diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index a2d69b2ce6a..1f3fc66e694 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -421,23 +421,24 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; - union nf_inet_addr _vaddr = { .ip = vaddr }; + read_lock(&__ip_vs_svc_lock); /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_get(AF_INET, fwmark))) + if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) goto out; /* * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, vport); + svc = __ip_vs_service_get(af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -447,7 +448,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, FTPPORT); + svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -455,16 +456,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, 0); + svc = __ip_vs_service_get(af, protocol, vaddr, 0); } out: read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", - fwmark, ip_vs_proto_name(protocol), - NIPQUAD(vaddr), ntohs(vport), - svc?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", + fwmark, ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), + svc ? "hit" : "not hit"); return svc; } @@ -605,8 +606,9 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, { struct ip_vs_dest *dest; struct ip_vs_service *svc; + union nf_inet_addr _vaddr = { .ip = vaddr }; - svc = ip_vs_service_get(0, protocol, vaddr, vport); + svc = ip_vs_service_get(AF_INET, 0, protocol, &_vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 15860e1441b..fe93c9e6ff6 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -74,16 +74,19 @@ tcp_conn_schedule(struct sk_buff *skb, { struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct ip_vs_iphdr iph; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); + + th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; return 0; } if (th->syn && - (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, - ip_hdr(skb)->daddr, th->dest))) { + (svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol, + &iph.daddr, th->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8dfad5db829..d208ed6eb9f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -80,16 +80,19 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, { struct ip_vs_service *svc; struct udphdr _udph, *uh; + struct ip_vs_iphdr iph; - uh = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_udph), &_udph); + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); + + uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); if (uh == NULL) { *verdict = NF_DROP; return 0; } - if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, - ip_hdr(skb)->daddr, uh->dest))) { + svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol, + &iph.daddr, uh->dest); + if (svc) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. -- cgit v1.2.3-70-g09d2 From b14198f6c1bea1687d20723db35d8effecd9d899 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:39 +0200 Subject: IPVS: Add IPv6 support flag to schedulers Add 'supports_ipv6' flag to struct ip_vs_scheduler to indicate whether a scheduler supports IPv6. Set the flag to 1 in schedulers that work with IPv6, 0 otherwise. This flag is checked in a later patch while trying to add a service with a specific scheduler. Adjust debug in v6-supporting schedulers to work with both address families. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 +++ net/ipv4/ipvs/ip_vs_dh.c | 3 +++ net/ipv4/ipvs/ip_vs_lblc.c | 3 +++ net/ipv4/ipvs/ip_vs_lblcr.c | 3 +++ net/ipv4/ipvs/ip_vs_lc.c | 11 +++++++---- net/ipv4/ipvs/ip_vs_nq.c | 15 +++++++++------ net/ipv4/ipvs/ip_vs_rr.c | 13 ++++++++----- net/ipv4/ipvs/ip_vs_sed.c | 15 +++++++++------ net/ipv4/ipvs/ip_vs_sh.c | 3 +++ net/ipv4/ipvs/ip_vs_wlc.c | 15 +++++++++------ net/ipv4/ipvs/ip_vs_wrr.c | 15 +++++++++------ 11 files changed, 66 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 30baed0e696..3d3b699dc02 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -516,6 +516,9 @@ struct ip_vs_scheduler { char *name; /* scheduler name */ atomic_t refcnt; /* reference counter */ struct module *module; /* THIS_MODULE/NULL */ +#ifdef CONFIG_IP_VS_IPV6 + int supports_ipv6; /* scheduler has IPv6 support */ +#endif /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 9f9d795dbd7..a16943fd72f 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 69309edc0c4..6ecef3518ca 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -522,6 +522,9 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, .schedule = ip_vs_lblc_schedule, diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 51c746e2083..1f75ea83bcf 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -722,6 +722,9 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, .schedule = ip_vs_lblcr_schedule, diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index 551d293347f..b69f808ac46 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -67,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } if (least) - IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->inactconns)); + IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->inactconns)); return least; } @@ -81,6 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_lc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index aa0e32ad348..9a2d8033f08 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -99,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; out: - IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "NQ: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -116,6 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_nq_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index 27f0b624283..a22195f68ac 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -74,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) out: svc->sched_data = q; write_unlock(&svc->sched_lock); - IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr.ip), ntohs(dest->port), - atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + IP_VS_DBG_BUF(6, "RR: server %s:%u " + "activeconns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->activeconns), + atomic_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } @@ -89,6 +89,9 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .init_service = ip_vs_rr_init_svc, .update_service = ip_vs_rr_update_svc, .schedule = ip_vs_rr_schedule, diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 38b574b2fdf..7d2f22f04b8 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -101,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "SED: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -118,6 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_sed_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index c9e54e2ec29..1d96de27fef 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 09fd993040f..8c596e71259 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -89,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr.ip), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "WLC: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -106,6 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_wlc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 19c49b234f3..7ea92fed50b 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr.ip), ntohs(dest->port), - atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), - atomic_read(&dest->weight)); + IP_VS_DBG_BUF(6, "WRR: server %s:%u " + "activeconns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->activeconns), + atomic_read(&dest->refcnt), + atomic_read(&dest->weight)); out: write_unlock(&svc->sched_lock); @@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, -- cgit v1.2.3-70-g09d2 From 51ef348b14183789e4cb3444d05ce83b1b69d8fb Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:40 +0200 Subject: IPVS: Add 'af' args to protocol handler functions Add 'af' arguments to conn_schedule(), conn_in_get(), conn_out_get() and csum_check() function pointers in struct ip_vs_protocol. Extend the respective functions for TCP, UDP, AH and ESP and adjust the callers. The changes in the callers need to be somewhat extensive, since they now need to pass a filled out struct ip_vs_iphdr * to the modified functions instead of a struct iphdr *. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 15 ++++--- net/ipv4/ipvs/ip_vs_core.c | 64 +++++++++++++++--------------- net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 56 +++++++++++++------------- net/ipv4/ipvs/ip_vs_proto_tcp.c | 79 ++++++++++++++++++++++++------------- net/ipv4/ipvs/ip_vs_proto_udp.c | 81 ++++++++++++++++++++++++-------------- 5 files changed, 171 insertions(+), 124 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3d3b699dc02..68f004f9bcc 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -296,21 +296,23 @@ struct ip_vs_protocol { void (*exit)(struct ip_vs_protocol *pp); - int (*conn_schedule)(struct sk_buff *skb, + int (*conn_schedule)(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp); struct ip_vs_conn * - (*conn_in_get)(const struct sk_buff *skb, + (*conn_in_get)(int af, + const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); struct ip_vs_conn * - (*conn_out_get)(const struct sk_buff *skb, + (*conn_out_get)(int af, + const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -320,7 +322,8 @@ struct ip_vs_protocol { int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); - int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp); + int (*csum_check)(int af, struct sk_buff *skb, + struct ip_vs_protocol *pp); const char *(*state_name)(int state); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 4a54f33b60d..34aaa1480d9 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -572,6 +572,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ + struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; @@ -627,8 +628,9 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) offset += cih->ihl * 4; + ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(skb, pp, cih, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -686,43 +688,41 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; + struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; - int ihl; EnterFunction(11); if (skb->ipvs_property) return NF_ACCEPT; - iph = ip_hdr(skb); - if (unlikely(iph->protocol == IPPROTO_ICMP)) { + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); + if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_out_icmp(skb, &related); if (related) return verdict; - iph = ip_hdr(skb); + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); } - pp = ip_vs_proto_get(iph->protocol); + pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) return NF_ACCEPT; /* reassemble IP fragments */ - if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && + if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; - iph = ip_hdr(skb); - } - ihl = iph->ihl << 2; + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); + } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(skb, pp, iph, ihl, 0); + cp = pp->conn_out_get(AF_INET, skb, pp, &iph, iph.len, 0); if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && @@ -730,18 +730,18 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, pp->protocol == IPPROTO_UDP)) { __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, ihl, + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(iph->protocol, - iph->saddr, pptr[0])) { + if (ip_vs_lookup_real_service(iph.protocol, + iph.saddr.ip, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ - if (iph->protocol != IPPROTO_TCP + if (iph.protocol != IPPROTO_TCP || !is_tcp_reset(skb)) { icmp_send(skb,ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -756,7 +756,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - if (!skb_make_writable(skb, ihl)) + if (!skb_make_writable(skb, iph.len)) goto drop; /* mangle the packet */ @@ -804,6 +804,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ + struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; @@ -860,8 +861,9 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) offset += cih->ihl * 4; + ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(skb, pp, cih, offset, 1); + cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -897,11 +899,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; + struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; int ret, restart; - int ihl; + + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); /* * Big tappo: only PACKET_HOST (neither loopback nor mcasts) @@ -909,38 +912,35 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, */ if (unlikely(skb->pkt_type != PACKET_HOST || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { - IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", - skb->pkt_type, - ip_hdr(skb)->protocol, - NIPQUAD(ip_hdr(skb)->daddr)); + IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", + skb->pkt_type, + iph.protocol, + IP_VS_DBG_ADDR(AF_INET, &iph.daddr)); return NF_ACCEPT; } - iph = ip_hdr(skb); - if (unlikely(iph->protocol == IPPROTO_ICMP)) { + if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; - iph = ip_hdr(skb); + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); } /* Protocol supported? */ - pp = ip_vs_proto_get(iph->protocol); + pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) return NF_ACCEPT; - ihl = iph->ihl << 2; - /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(skb, pp, iph, ihl, 0); + cp = pp->conn_in_get(AF_INET, skb, pp, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(skb, pp, &v, &cp)) + if (!pp->conn_schedule(AF_INET, skb, pp, &v, &cp)) return v; } diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c index 3f9ebd7639a..2a361a99174 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c @@ -39,25 +39,23 @@ struct isakmp_hdr { static struct ip_vs_conn * -ah_esp_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, +ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; if (likely(!inverse)) { cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, + iph->saddr.ip, htons(PORT_ISAKMP), - iph->daddr, + iph->daddr.ip, htons(PORT_ISAKMP)); } else { cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, + iph->daddr.ip, htons(PORT_ISAKMP), - iph->saddr, + iph->saddr.ip, htons(PORT_ISAKMP)); } @@ -66,12 +64,12 @@ ah_esp_conn_in_get(const struct sk_buff *skb, * We are not sure if the packet is from our * service, so our conn_schedule hook should return NF_ACCEPT */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " + "%s%s %s->%s\n", + inverse ? "ICMP+" : "", + pp->name, + IP_VS_DBG_ADDR(af, &iph->saddr), + IP_VS_DBG_ADDR(af, &iph->daddr)); } return cp; @@ -79,32 +77,35 @@ ah_esp_conn_in_get(const struct sk_buff *skb, static struct ip_vs_conn * -ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +ah_esp_conn_out_get(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; if (likely(!inverse)) { cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, + iph->saddr.ip, htons(PORT_ISAKMP), - iph->daddr, + iph->daddr.ip, htons(PORT_ISAKMP)); } else { cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, + iph->daddr.ip, htons(PORT_ISAKMP), - iph->saddr, + iph->saddr.ip, htons(PORT_ISAKMP)); } if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); + IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " + "%s%s %s->%s\n", + inverse ? "ICMP+" : "", + pp->name, + IP_VS_DBG_ADDR(af, &iph->saddr), + IP_VS_DBG_ADDR(af, &iph->daddr)); } return cp; @@ -112,8 +113,7 @@ ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static int -ah_esp_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { /* diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index fe93c9e6ff6..9211afa8f30 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -25,8 +25,9 @@ static struct ip_vs_conn * -tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { __be16 _ports[2], *pptr; @@ -36,18 +37,19 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, if (likely(!inverse)) { return ip_vs_conn_in_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + iph->saddr.ip, pptr[0], + iph->daddr.ip, pptr[1]); } else { return ip_vs_conn_in_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + iph->daddr.ip, pptr[1], + iph->saddr.ip, pptr[0]); } } static struct ip_vs_conn * -tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { __be16 _ports[2], *pptr; @@ -57,26 +59,25 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, if (likely(!inverse)) { return ip_vs_conn_out_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + iph->saddr.ip, pptr[0], + iph->daddr.ip, pptr[1]); } else { return ip_vs_conn_out_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + iph->daddr.ip, pptr[1], + iph->saddr.ip, pptr[0]); } } static int -tcp_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); if (th == NULL) { @@ -85,8 +86,8 @@ tcp_conn_schedule(struct sk_buff *skb, } if (th->syn && - (svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol, - &iph.daddr, th->dest))) { + (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, + th->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -136,7 +137,7 @@ tcp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) return 0; /* Call application helper if needed */ @@ -182,7 +183,7 @@ tcp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) return 0; /* @@ -219,21 +220,43 @@ tcp_dnat_handler(struct sk_buff *skb, static int -tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); case CHECKSUM_COMPLETE: - if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - skb->len - tcphoff, - ip_hdr(skb)->protocol, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, - "Failed checksum for"); - return 0; - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - tcphoff, + ipv6_hdr(skb)->nexthdr, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + } else +#endif + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - tcphoff, + ip_hdr(skb)->protocol, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } break; default: /* No need to checksum. */ diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index d208ed6eb9f..d3a1b1f2d10 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -24,8 +24,9 @@ #include static struct ip_vs_conn * -udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; __be16 _ports[2], *pptr; @@ -36,12 +37,12 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, if (likely(!inverse)) { cp = ip_vs_conn_in_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + iph->saddr.ip, pptr[0], + iph->daddr.ip, pptr[1]); } else { cp = ip_vs_conn_in_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + iph->daddr.ip, pptr[1], + iph->saddr.ip, pptr[0]); } return cp; @@ -49,25 +50,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * -udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return NULL; if (likely(!inverse)) { cp = ip_vs_conn_out_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + iph->saddr.ip, pptr[0], + iph->daddr.ip, pptr[1]); } else { cp = ip_vs_conn_out_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + iph->daddr.ip, pptr[1], + iph->saddr.ip, pptr[0]); } return cp; @@ -75,14 +76,14 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static int -udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct udphdr _udph, *uh; struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); if (uh == NULL) { @@ -90,7 +91,7 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; } - svc = ip_vs_service_get(AF_INET, skb->mark, iph.protocol, + svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, uh->dest); if (svc) { if (ip_vs_todrop()) { @@ -143,7 +144,7 @@ udp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) return 0; /* @@ -195,7 +196,7 @@ udp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) return 0; /* @@ -234,10 +235,17 @@ udp_dnat_handler(struct sk_buff *skb, static int -udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { struct udphdr _udph, *uh; - const unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) @@ -249,15 +257,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); case CHECKSUM_COMPLETE: - if (csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - udphoff, - ip_hdr(skb)->protocol, - skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, - "Failed checksum for"); - return 0; - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - udphoff, + ipv6_hdr(skb)->nexthdr, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + } else +#endif + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - udphoff, + ip_hdr(skb)->protocol, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } break; default: /* No need to checksum. */ -- cgit v1.2.3-70-g09d2 From 0bbdd42b7efa66685b6d74701bcde3a596a3a59d Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:42 +0200 Subject: IPVS: Extend protocol DNAT/SNAT and state handlers Extend protocol DNAT/SNAT and state handlers to work with IPv6. Also change/introduce new checksumming helper functions for this. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 11 ++++++ net/ipv4/ipvs/ip_vs_proto_tcp.c | 85 ++++++++++++++++++++++++++++++++--------- net/ipv4/ipvs/ip_vs_proto_udp.c | 82 ++++++++++++++++++++++++++++++--------- 3 files changed, 142 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 68f004f9bcc..c173f1a7de2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -904,6 +904,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) return csum_partial((char *) diff, sizeof(diff), oldsum); } +#ifdef CONFIG_IP_VS_IPV6 +static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, + __wsum oldsum) +{ + __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], + new[3], new[2], new[1], new[0] }; + + return csum_partial((char *) diff, sizeof(diff), oldsum); +} +#endif + static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 9211afa8f30..3daae43ae44 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -114,11 +114,21 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, +tcp_fast_csum_update(int af, struct tcphdr *tcph, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcph->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(tcph->check)))); + else +#endif tcph->check = - csum_fold(ip_vs_check_diff4(oldip, newip, + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldport, newport, ~csum_unfold(tcph->check)))); } @@ -129,7 +139,14 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) @@ -137,7 +154,7 @@ tcp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -145,13 +162,13 @@ tcp_snat_handler(struct sk_buff *skb, return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)skb_network_header(skb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->daddr.ip, cp->vaddr.ip, + tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -159,9 +176,20 @@ tcp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, - skb->len - tcphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcph->check = csum_ipv6_magic(&cp->vaddr.in6, + &cp->caddr.in6, + skb->len - tcphoff, + cp->protocol, skb->csum); + else +#endif + tcph->check = csum_tcpudp_magic(cp->vaddr.ip, + cp->caddr.ip, + skb->len - tcphoff, + cp->protocol, + skb->csum); + IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -175,7 +203,14 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) @@ -183,7 +218,7 @@ tcp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -194,7 +229,7 @@ tcp_dnat_handler(struct sk_buff *skb, return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)skb_network_header(skb) + tcphoff; tcph->dest = cp->dport; /* @@ -202,7 +237,7 @@ tcp_dnat_handler(struct sk_buff *skb, */ if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->vaddr.ip, cp->daddr.ip, + tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -210,9 +245,19 @@ tcp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, - skb->len - tcphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcph->check = csum_ipv6_magic(&cp->caddr.in6, + &cp->daddr.in6, + skb->len - tcphoff, + cp->protocol, skb->csum); + else +#endif + tcph->check = csum_tcpudp_magic(cp->caddr.ip, + cp->daddr.ip, + skb->len - tcphoff, + cp->protocol, + skb->csum); skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; @@ -487,7 +532,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, { struct tcphdr _tcph, *th; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); +#ifdef CONFIG_IP_VS_IPV6 + int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); +#else + int ihl = ip_hdrlen(skb); +#endif + + th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph); if (th == NULL) return 0; diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index d3a1b1f2d10..6cca0ad8e32 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -120,13 +120,23 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, +udp_fast_csum_update(int af, struct udphdr *uhdr, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { - uhdr->check = - csum_fold(ip_vs_check_diff4(oldip, newip, - ip_vs_check_diff2(oldport, newport, - ~csum_unfold(uhdr->check)))); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + uhdr->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); + else +#endif + uhdr->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); if (!uhdr->check) uhdr->check = CSUM_MANGLED_0; } @@ -136,7 +146,14 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) @@ -144,7 +161,7 @@ udp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -154,7 +171,7 @@ udp_snat_handler(struct sk_buff *skb, return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)skb_network_header(skb) + udphoff; udph->source = cp->vport; /* @@ -162,7 +179,7 @@ udp_snat_handler(struct sk_buff *skb, */ if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->daddr.ip, cp->vaddr.ip, + udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -170,9 +187,19 @@ udp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->vaddr.ip, cp->caddr.ip, - skb->len - udphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udph->check = csum_ipv6_magic(&cp->vaddr.in6, + &cp->caddr.in6, + skb->len - udphoff, + cp->protocol, skb->csum); + else +#endif + udph->check = csum_tcpudp_magic(cp->vaddr.ip, + cp->caddr.ip, + skb->len - udphoff, + cp->protocol, + skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -188,7 +215,14 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) @@ -196,7 +230,7 @@ udp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(AF_INET, skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -207,7 +241,7 @@ udp_dnat_handler(struct sk_buff *skb, return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)skb_network_header(skb) + udphoff; udph->dest = cp->dport; /* @@ -215,7 +249,7 @@ udp_dnat_handler(struct sk_buff *skb, */ if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->vaddr.ip, cp->daddr.ip, + udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -223,9 +257,19 @@ udp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->caddr.ip, cp->daddr.ip, - skb->len - udphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udph->check = csum_ipv6_magic(&cp->caddr.in6, + &cp->daddr.in6, + skb->len - udphoff, + cp->protocol, skb->csum); + else +#endif + udph->check = csum_tcpudp_magic(cp->caddr.ip, + cp->daddr.ip, + skb->len - udphoff, + cp->protocol, + skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; -- cgit v1.2.3-70-g09d2 From 28364a59f3dfe7fed3560ec7aff9b7aeb02824fb Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:43 +0200 Subject: IPVS: Extend functions for getting/creating connections Extend functions for getting/creating connections and connection templates for IPv6 support and fix the callers. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 16 ++++-- net/ipv4/ipvs/ip_vs_conn.c | 100 ++++++++++++++++++++------------- net/ipv4/ipvs/ip_vs_core.c | 112 ++++++++++++++++++++----------------- net/ipv4/ipvs/ip_vs_ftp.c | 45 +++++++-------- net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 24 ++++---- net/ipv4/ipvs/ip_vs_proto_tcp.c | 24 ++++---- net/ipv4/ipvs/ip_vs_proto_udp.c | 24 ++++---- net/ipv4/ipvs/ip_vs_sync.c | 27 +++++---- 8 files changed, 209 insertions(+), 163 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c173f1a7de2..26893499eb6 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -642,11 +642,16 @@ enum { }; extern struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); + extern struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); + extern struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) @@ -657,8 +662,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); extern struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 639d4bc7fc1..15eec282b17 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) +static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, + const union nf_inet_addr *addr, + __be16 port) { - return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), + (__force u32)port, proto, ip_vs_conn_rnd) + & IP_VS_CONN_TAB_MASK; +#endif + return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) int ret; /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) int ret; /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr.ip, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -187,18 +196,21 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr == cp->caddr.ip && s_port == cp->cport && - d_port == cp->vport && d_addr == cp->vaddr.ip && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && protocol == cp->protocol) { /* HIT */ @@ -214,37 +226,42 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { struct ip_vs_conn *cp; - cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) - cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, + d_port); - IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr == cp->caddr.ip && s_port == cp->cport && - d_port == cp->vport && d_addr == cp->vaddr.ip && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && protocol == cp->protocol) { /* HIT */ @@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get out: ct_read_unlock(hash); - IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } @@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get /* * Check for "full" addressed entries */ - hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); + hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (d_addr == cp->caddr.ip && d_port == cp->cport && - s_port == cp->dport && s_addr == cp->daddr.ip && + if (cp->af == af && + ip_vs_addr_equal(af, d_addr, &cp->caddr) && + ip_vs_addr_equal(af, s_addr, &cp->daddr) && + d_port == cp->cport && s_port == cp->dport && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get ct_read_unlock(hash); - IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - ret?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ret ? "hit" : "not hit"); return ret; } @@ -625,8 +645,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; @@ -640,12 +661,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + cp->af = af; cp->protocol = proto; - cp->caddr.ip = caddr; + ip_vs_addr_copy(af, &cp->caddr, caddr); cp->cport = cport; - cp->vaddr.ip = vaddr; + ip_vs_addr_copy(af, &cp->vaddr, vaddr); cp->vport = vport; - cp->daddr.ip = daddr; + ip_vs_addr_copy(af, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; spin_lock_init(&cp->lock); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 34aaa1480d9..2d5a4331709 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -173,19 +173,21 @@ ip_vs_sched_persist(struct ip_vs_service *svc, __be16 ports[2]) { struct ip_vs_conn *cp = NULL; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ - __be32 snet; /* source network of the client, after masking */ + union nf_inet_addr snet; /* source network of the client, + after masking */ + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); /* Mask saddr with the netmask to adjust template granularity */ - snet = iph->saddr & svc->netmask; + snet.ip = iph.saddr.ip & svc->netmask; IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u " "mnet %u.%u.%u.%u\n", - NIPQUAD(iph->saddr), ntohs(ports[0]), - NIPQUAD(iph->daddr), ntohs(ports[1]), + NIPQUAD(iph.saddr.ip), ntohs(ports[0]), + NIPQUAD(iph.daddr.ip), ntohs(ports[1]), NIPQUAD(snet)); /* @@ -204,11 +206,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, ports[1]); + ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0, + &iph.daddr, ports[1]); else - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, 0); + ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0, + &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* @@ -228,18 +230,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * for ftp service. */ if (svc->port != FTPPORT) - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, + ct = ip_vs_conn_new(AF_INET, iph.protocol, + &snet, 0, + &iph.daddr, ports[1], - dest->addr.ip, dest->port, + &dest->addr, dest->port, IP_VS_CONN_F_TEMPLATE, dest); else - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, 0, - dest->addr.ip, 0, + ct = ip_vs_conn_new(AF_INET, iph.protocol, + &snet, 0, + &iph.daddr, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -258,12 +260,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * fwmark template: * port zero template: */ - if (svc->fwmark) - ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, - htonl(svc->fwmark), 0); - else - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, 0); + if (svc->fwmark) { + union nf_inet_addr fwmark = { + .all = { 0, 0, 0, htonl(svc->fwmark) } + }; + + ct = ip_vs_ct_in_get(AF_INET, IPPROTO_IP, &snet, 0, + &fwmark, 0); + } else + ct = ip_vs_ct_in_get(AF_INET, iph.protocol, &snet, 0, + &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* @@ -282,18 +288,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a template according to the service */ - if (svc->fwmark) - ct = ip_vs_conn_new(IPPROTO_IP, - snet, 0, - htonl(svc->fwmark), 0, - dest->addr.ip, 0, + if (svc->fwmark) { + union nf_inet_addr fwmark = { + .all = { 0, 0, 0, htonl(svc->fwmark) } + }; + + ct = ip_vs_conn_new(AF_INET, IPPROTO_IP, + &snet, 0, + &fwmark, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); - else - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, 0, - dest->addr.ip, 0, + } else + ct = ip_vs_conn_new(AF_INET, iph.protocol, + &snet, 0, + &iph.daddr, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -310,10 +320,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], - dest->addr.ip, dport, + cp = ip_vs_conn_new(AF_INET, iph.protocol, + &iph.saddr, ports[0], + &iph.daddr, ports[1], + &dest->addr, dport, 0, dest); if (cp == NULL) { @@ -342,12 +352,12 @@ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_conn *cp = NULL; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, iph->ihl*4, - sizeof(_ports), _ports); + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; @@ -377,10 +387,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* * Create a connection entry. */ - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1], - dest->addr.ip, dest->port ? dest->port : pptr[1], + cp = ip_vs_conn_new(AF_INET, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], + &dest->addr, dest->port ? dest->port : pptr[1], 0, dest); if (cp == NULL) @@ -408,10 +418,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { __be16 _ports[2], *pptr; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; + ip_vs_fill_iphdr(AF_INET, skb_network_header(skb), &iph); - pptr = skb_header_pointer(skb, iph->ihl*4, - sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; @@ -421,7 +431,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, and the destination is RTN_UNICAST (and not local), then create a cache_bypass connection entry */ if (sysctl_ip_vs_cache_bypass && svc->fwmark - && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { + && (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST)) { int ret, cs; struct ip_vs_conn *cp; @@ -429,9 +439,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* create a new connection entry */ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1], + cp = ip_vs_conn_new(AF_INET, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], 0, 0, IP_VS_CONN_F_BYPASS, NULL); diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index bfe5d7050a5..0c3fbe0de5f 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -140,7 +140,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_limit; char *start, *end; - __be32 from; + union nf_inet_addr from; __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ @@ -166,24 +166,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, sizeof(SERVER_STRING)-1, ')', - &from, &port, + &from.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " "%u.%u.%u.%u:%d detected\n", - NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr.ip), 0); + NIPQUAD(from.ip), ntohs(port), + NIPQUAD(cp->caddr.ip), 0); /* * Now update or create an connection entry for it */ - n_cp = ip_vs_conn_out_get(iph->protocol, from, port, - cp->caddr.ip, 0); + n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, + &cp->caddr, 0); if (!n_cp) { - n_cp = ip_vs_conn_new(IPPROTO_TCP, - cp->caddr.ip, 0, - cp->vaddr.ip, port, - from, port, + n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, + &cp->caddr, 0, + &cp->vaddr, port, + &from, port, IP_VS_CONN_F_NO_CPORT, cp->dest); if (!n_cp) @@ -196,9 +197,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Replace the old passive address with the new one */ - from = n_cp->vaddr.ip; + from.ip = n_cp->vaddr.ip; port = n_cp->vport; - sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), + sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip), (ntohs(port)>>8)&255, ntohs(port)&255); buf_len = strlen(buf); @@ -243,7 +244,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_start, *data_limit; char *start, *end; - __be32 to; + union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; @@ -291,12 +292,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ if (ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING, sizeof(CLIENT_STRING)-1, - '\r', &to, &port, + '\r', &to.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", - NIPQUAD(to), ntohs(port)); + NIPQUAD(to.ip), ntohs(port)); /* Passive mode off */ cp->app_data = NULL; @@ -306,16 +307,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", ip_vs_proto_name(iph->protocol), - NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); + NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); - n_cp = ip_vs_conn_in_get(iph->protocol, - to, port, - cp->vaddr.ip, htons(ntohs(cp->vport)-1)); + n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, + &to, port, + &cp->vaddr, htons(ntohs(cp->vport)-1)); if (!n_cp) { - n_cp = ip_vs_conn_new(IPPROTO_TCP, - to, port, - cp->vaddr.ip, htons(ntohs(cp->vport)-1), - cp->daddr.ip, htons(ntohs(cp->dport)-1), + n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, + &to, port, + &cp->vaddr, htons(ntohs(cp->vport)-1), + &cp->daddr, htons(ntohs(cp->dport)-1), 0, cp->dest); if (!n_cp) diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c index 4b0b8f268d1..2b18a78d039 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c @@ -46,16 +46,16 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp; if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr.ip, + cp = ip_vs_conn_in_get(af, IPPROTO_UDP, + &iph->saddr, htons(PORT_ISAKMP), - iph->daddr.ip, + &iph->daddr, htons(PORT_ISAKMP)); } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr.ip, + cp = ip_vs_conn_in_get(af, IPPROTO_UDP, + &iph->daddr, htons(PORT_ISAKMP), - iph->saddr.ip, + &iph->saddr, htons(PORT_ISAKMP)); } @@ -86,16 +86,16 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_conn *cp; if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr.ip, + cp = ip_vs_conn_out_get(af, IPPROTO_UDP, + &iph->saddr, htons(PORT_ISAKMP), - iph->daddr.ip, + &iph->daddr, htons(PORT_ISAKMP)); } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr.ip, + cp = ip_vs_conn_out_get(af, IPPROTO_UDP, + &iph->daddr, htons(PORT_ISAKMP), - iph->saddr.ip, + &iph->saddr, htons(PORT_ISAKMP)); } diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 3daae43ae44..3da2bb05ee7 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -36,13 +36,13 @@ tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - return ip_vs_conn_in_get(iph->protocol, - iph->saddr.ip, pptr[0], - iph->daddr.ip, pptr[1]); + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - return ip_vs_conn_in_get(iph->protocol, - iph->daddr.ip, pptr[1], - iph->saddr.ip, pptr[0]); + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } } @@ -58,13 +58,13 @@ tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - return ip_vs_conn_out_get(iph->protocol, - iph->saddr.ip, pptr[0], - iph->daddr.ip, pptr[1]); + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - return ip_vs_conn_out_get(iph->protocol, - iph->daddr.ip, pptr[1], - iph->saddr.ip, pptr[0]); + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 6cca0ad8e32..fd8bd934cc0 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -36,13 +36,13 @@ udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - cp = ip_vs_conn_in_get(iph->protocol, - iph->saddr.ip, pptr[0], - iph->daddr.ip, pptr[1]); + cp = ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - cp = ip_vs_conn_in_get(iph->protocol, - iph->daddr.ip, pptr[1], - iph->saddr.ip, pptr[0]); + cp = ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } return cp; @@ -62,13 +62,13 @@ udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - cp = ip_vs_conn_out_get(iph->protocol, - iph->saddr.ip, pptr[0], - iph->daddr.ip, pptr[1]); + cp = ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - cp = ip_vs_conn_out_get(iph->protocol, - iph->daddr.ip, pptr[1], - iph->saddr.ip, pptr[0]); + cp = ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } return cp; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 2cf47b2e166..3ce1093e067 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -366,13 +366,17 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + cp = ip_vs_conn_in_get(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport); else - cp = ip_vs_ct_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + cp = ip_vs_ct_in_get(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport); if (!cp) { /* * Find the appropriate destination for the connection. @@ -389,10 +393,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) else flags &= ~IP_VS_CONN_F_INACTIVE; } - cp = ip_vs_conn_new(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport, - s->daddr, s->dport, + cp = ip_vs_conn_new(AF_INET, s->protocol, + (union nf_inet_addr *)s->caddr, + s->cport, + (union nf_inet_addr *)s->vaddr, + s->vport, + (union nf_inet_addr *)s->daddr, + s->dport, flags, dest); if (dest) atomic_dec(&dest->refcnt); -- cgit v1.2.3-70-g09d2 From b3cdd2a73867d309dca288b8e820c09e3b7f1da1 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:45 +0200 Subject: IPVS: Add and bind IPv6 xmit functions Add xmit functions for IPv6. Also add the already needed __ip_vs_get_out_rt_v6() to ip_vs_core.c. Bind the new xmit functions to v6 connections. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 ++- net/ipv4/ipvs/ip_vs_conn.c | 34 +++- net/ipv4/ipvs/ip_vs_core.c | 43 ++++++ net/ipv4/ipvs/ip_vs_xmit.c | 377 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 472 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 26893499eb6..ac709fa5a79 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -855,6 +855,19 @@ extern int ip_vs_icmp_xmit (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset); extern void ip_vs_dst_reset(struct ip_vs_dest *dest); +#ifdef CONFIG_IP_VS_IPV6 +extern int ip_vs_bypass_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_nat_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_tunnel_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_dr_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_icmp_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, + int offset); +#endif /* * This is a simple mechanism to ignore packets when @@ -899,7 +912,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) } extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, - struct ip_vs_conn *cp, int dir); + struct ip_vs_conn *cp, int dir); + +#ifdef CONFIG_IP_VS_IPV6 +extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int dir); +#endif extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 15eec282b17..f5dddad6d5e 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -389,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp) } } +#ifdef CONFIG_IP_VS_IPV6 +static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp) +{ + switch (IP_VS_FWD_METHOD(cp)) { + case IP_VS_CONN_F_MASQ: + cp->packet_xmit = ip_vs_nat_xmit_v6; + break; + + case IP_VS_CONN_F_TUNNEL: + cp->packet_xmit = ip_vs_tunnel_xmit_v6; + break; + + case IP_VS_CONN_F_DROUTE: + cp->packet_xmit = ip_vs_dr_xmit_v6; + break; + + case IP_VS_CONN_F_LOCALNODE: + cp->packet_xmit = ip_vs_null_xmit; + break; + + case IP_VS_CONN_F_BYPASS: + cp->packet_xmit = ip_vs_bypass_xmit_v6; + break; + } +} +#endif + static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) { @@ -694,7 +721,12 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, cp->timeout = 3*HZ; /* Bind its packet transmitter */ - ip_vs_bind_xmit(cp); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ip_vs_bind_xmit_v6(cp); + else +#endif + ip_vs_bind_xmit(cp); if (unlikely(pp && atomic_read(&pp->appcnt))) ip_vs_bind_app(cp, pp); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 2d5a4331709..d6f5bf9049a 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -570,6 +570,49 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, "Forwarding altered incoming ICMP"); } +#ifdef CONFIG_IP_VS_IPV6 +void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int inout) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int icmp_offset = sizeof(struct ipv6hdr); + struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + + icmp_offset); + struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); + + if (inout) { + iph->saddr = cp->vaddr.in6; + ciph->daddr = cp->vaddr.in6; + } else { + iph->daddr = cp->daddr.in6; + ciph->saddr = cp->daddr.in6; + } + + /* the TCP/UDP port */ + if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { + __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); + + if (inout) + ports[1] = cp->vport; + else + ports[0] = cp->dport; + } + + /* And finally the ICMP checksum */ + icmph->icmp6_cksum = 0; + /* TODO IPv6: is this correct for ICMPv6? */ + ip_vs_checksum_complete(skb, icmp_offset); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (inout) + IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + "Forwarding altered outgoing ICMPv6"); + else + IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + "Forwarding altered incoming ICMPv6"); +} +#endif + /* * Handle ICMP messages in the inside-to-outside direction (outgoing). * Find any that might be relevant, check against existing connections, diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index fd8342ec7e1..02ddc2b3ce2 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -281,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct ipv6hdr *iph = ipv6_hdr(skb); + int mtu; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = iph->daddr, + .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + }; + + EnterFunction(10); + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (!rt) { + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, " + "dest: " NIP6_FMT "\n", NIP6(iph->daddr)); + goto tx_error_icmp; + } + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Call ip_send_check because we are not sure it is called + * after ip_defrag. Is copy-on-write needed? + */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(skb == NULL)) { + dst_release(&rt->u.dst); + return NF_STOLEN; + } + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + + tx_error_icmp: + dst_link_failure(skb); + tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif /* * NAT transmitter (only for outside-to-inside nat forwarding) @@ -360,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + + EnterFunction(10); + + /* check if it is a connection of no-client-port */ + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + __be16 _pt, *p; + p = skb_header_pointer(skb, sizeof(struct ipv6hdr), + sizeof(_pt), &_pt); + if (p == NULL) + goto tx_error; + ip_vs_conn_fill_cport(cp, *p); + IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); + } + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "ip_vs_nat_xmit_v6(): frag needed for"); + goto tx_error; + } + + /* copy-on-write the packet before mangling it */ + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + goto tx_error_put; + + if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + goto tx_error_put; + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* mangle the packet */ + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + goto tx_error; + ipv6_hdr(skb)->daddr = cp->daddr.in6; + + IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + + /* FIXME: when application helper enlarges the packet and the length + is larger than the MTU of outgoing device, there will be still + MTU problem. */ + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + LeaveFunction(10); + kfree_skb(skb); + return NF_STOLEN; +tx_error_put: + dst_release(&rt->u.dst); + goto tx_error; +} +#endif + /* * IP Tunneling transmitter @@ -491,6 +632,112 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *old_iph = ipv6_hdr(skb); + sk_buff_data_t old_transport_header = skb->transport_header; + struct ipv6hdr *iph; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int mtu; + + EnterFunction(10); + + if (skb->protocol != htons(ETH_P_IPV6)) { + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, " + "ETH_P_IPV6: %d, skb protocol: %d\n", + htons(ETH_P_IPV6), skb->protocol); + goto tx_error; + } + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + tdev = rt->u.dst.dev; + + mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + /* TODO IPv6: do we need this check in IPv6? */ + if (mtu < 1280) { + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n"); + goto tx_error; + } + if (skb->dst) + skb->dst->ops->update_pmtu(skb->dst, mtu); + + if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Okay, now see if we can stuff it in the buffer as-is. + */ + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); + + if (skb_headroom(skb) < max_headroom + || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = + skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + dst_release(&rt->u.dst); + kfree_skb(skb); + IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n"); + return NF_STOLEN; + } + kfree_skb(skb); + skb = new_skb; + old_iph = ipv6_hdr(skb); + } + + skb->transport_header = old_transport_header; + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* + * Push down and install the IPIP header. + */ + iph = ipv6_hdr(skb); + iph->version = 6; + iph->nexthdr = IPPROTO_IPV6; + iph->payload_len = old_iph->payload_len + sizeof(old_iph); + iph->priority = old_iph->priority; + memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + iph->daddr = rt->rt6i_dst.addr; + iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ + iph->hop_limit = old_iph->hop_limit; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + ip6_local_out(skb); + + LeaveFunction(10); + + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif + /* * Direct Routing transmitter @@ -548,6 +795,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + + EnterFunction(10); + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Call ip_send_check because we are not sure it is called + * after ip_defrag. Is copy-on-write needed? + */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(skb == NULL)) { + dst_release(&rt->u.dst); + return NF_STOLEN; + } + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif + /* * ICMP packet transmitter @@ -625,3 +926,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_rt_put(rt); goto tx_error; } + +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, int offset) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + int rc; + + EnterFunction(10); + + /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be + forwarded directly here, because there is no need to + translate address/port back */ + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + if (cp->packet_xmit) + rc = cp->packet_xmit(skb, cp, pp); + else + rc = NF_ACCEPT; + /* do not touch skb anymore */ + atomic_inc(&cp->in_pkts); + goto out; + } + + /* + * mangle and send the packet here (only for VS/NAT) + */ + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); + goto tx_error; + } + + /* copy-on-write the packet before mangling it */ + if (!skb_make_writable(skb, offset)) + goto tx_error_put; + + if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + goto tx_error_put; + + /* drop the old route when skb is not shared */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + ip_vs_nat_icmp_v6(skb, pp, cp, 0); + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + rc = NF_STOLEN; + goto out; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + dev_kfree_skb(skb); + rc = NF_STOLEN; +out: + LeaveFunction(10); + return rc; +tx_error_put: + dst_release(&rt->u.dst); + goto tx_error; +} +#endif -- cgit v1.2.3-70-g09d2 From 7937df1564783806c285d34a1c6fd63d8da29d7a Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:48 +0200 Subject: IPVS: Convert real server lookup functions Convert functions for looking up destinations (real servers) to support IPv6 services/dests. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 8 +++-- net/ipv4/ipvs/ip_vs_conn.c | 5 +-- net/ipv4/ipvs/ip_vs_core.c | 4 +-- net/ipv4/ipvs/ip_vs_ctl.c | 80 ++++++++++++++++++++++++++++++---------------- net/ipv4/ipvs/ip_vs_sync.c | 7 ++-- 5 files changed, 67 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ac709fa5a79..a719c0ef99e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -804,14 +804,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) } extern struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport); +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); + extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); extern struct ip_vs_dest * -ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol); +ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, + const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index f5dddad6d5e..c2a42a62433 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -491,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->daddr.ip, cp->dport, - cp->vaddr.ip, cp->vport, cp->protocol); + dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + &cp->vaddr, cp->vport, + cp->protocol); ip_vs_bind_dest(cp, dest); return dest; } else diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 035a511e12f..27bef1d67aa 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -957,8 +957,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(iph.protocol, - iph.saddr.ip, + if (ip_vs_lookup_real_service(af, iph.protocol, + &iph.saddr, pptr[0])) { /* * Notify the real server: there is no diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 1f3fc66e694..bb0e1e3c885 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -492,11 +492,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) +static inline unsigned ip_vs_rs_hashkey(int af, + const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif - return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) + return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) & IP_VS_RTAB_MASK; } @@ -516,7 +525,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) * Hash by proto,addr,port, * which are the parameters of the real service. */ - hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port); + hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); + list_add(&dest->d_list, &ip_vs_rtable[hash]); return 1; @@ -543,7 +553,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -552,11 +564,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Check for "full" addressed entries * Return the first found entry */ - hash = ip_vs_rs_hashkey(daddr, dport); + hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { - if ((dest->addr.ip == daddr) + if ((dest->af == af) + && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) && ((dest->protocol == protocol) || dest->vfwmark)) { @@ -574,7 +587,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest; @@ -582,7 +596,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination for the given service */ list_for_each_entry(dest, &svc->destinations, n_list) { - if ((dest->addr.ip == daddr) && (dest->port == dport)) { + if ((dest->af == svc->af) + && ip_vs_addr_equal(svc->af, &dest->addr, daddr) + && (dest->port == dport)) { /* HIT */ return dest; } @@ -601,14 +617,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol) +struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, + __be16 dport, + const union nf_inet_addr *vaddr, + __be16 vport, __u16 protocol) { struct ip_vs_dest *dest; struct ip_vs_service *svc; - union nf_inet_addr _vaddr = { .ip = vaddr }; - svc = ip_vs_service_get(AF_INET, 0, protocol, &_vaddr, vport); + svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -629,7 +646,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -637,17 +655,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination in trash */ list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { - IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " - "dest->refcnt=%d\n", - dest->vfwmark, - NIPQUAD(dest->addr.ip), ntohs(dest->port), - atomic_read(&dest->refcnt)); - if (dest->addr.ip == daddr && + IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " + "dest->refcnt=%d\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (dest->af == svc->af && + ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && dest->vfwmark == svc->fwmark && dest->protocol == svc->protocol && (svc->fwmark || - (dest->vaddr.ip == svc->addr.ip && + (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ return dest; @@ -657,10 +677,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Try to purge the destination from trash if not referenced */ if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " - "from trash\n", - dest->vfwmark, - NIPQUAD(dest->addr.ip), ntohs(dest->port)); + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " + "from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port)); list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -847,7 +868,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Check if the dest already exists in the list */ - dest = ip_vs_lookup_dest(svc, daddr.ip, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest != NULL) { IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); return -EEXIST; @@ -857,7 +879,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) * Check if the dest already exists in the trash and * is from the same service */ - dest = ip_vs_trash_get_dest(svc, daddr.ip, dport); + dest = ip_vs_trash_get_dest(svc, &daddr, dport); + if (dest != NULL) { IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", @@ -956,7 +979,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Lookup the destination list */ - dest = ip_vs_lookup_dest(svc, daddr.ip, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); return -ENOENT; @@ -1054,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) EnterFunction(2); - dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport); + dest = ip_vs_lookup_dest(svc, &udest->addr, dport); if (dest == NULL) { IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 3ce1093e067..40647edf102 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -383,8 +383,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(s->daddr, s->dport, - s->vaddr, s->vport, + dest = ip_vs_find_dest(AF_INET, + (union nf_inet_addr *)&s->daddr, + s->dport, + (union nf_inet_addr *)&s->vaddr, + s->vport, s->protocol); /* Set the approprite ativity flag */ if (s->protocol == IPPROTO_TCP) { -- cgit v1.2.3-70-g09d2 From cfc78c5a09241a3a9561466834996a7fb90c4228 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 2 Sep 2008 15:55:53 +0200 Subject: IPVS: Adjust various debug outputs to use new macros Adjust various debug outputs to use the new *_BUF macro variants for correct output of v4/v6 addresses. Signed-off-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 53 +++++++++++++++++++++++--------------- net/ipv4/ipvs/ip_vs_conn.c | 57 ++++++++++++++++++++++------------------- net/ipv4/ipvs/ip_vs_ctl.c | 24 +++++++++-------- net/ipv4/ipvs/ip_vs_proto_tcp.c | 45 ++++++++++++++++++-------------- net/ipv4/ipvs/ip_vs_proto_udp.c | 15 ++++++----- 5 files changed, 111 insertions(+), 83 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a719c0ef99e..1b13cef4b54 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -680,24 +680,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp) { struct ip_vs_conn *ctl_cp = cp->control; if (!ctl_cp) { - IP_VS_ERR("request control DEL for uncontrolled: " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("request control DEL for uncontrolled: " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + return; } - IP_VS_DBG(7, "DELeting control for: " - "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport)); + IP_VS_DBG_BUF(7, "DELeting control for: " + "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), + ntohs(ctl_cp->cport)); cp->control = NULL; if (atomic_read(&ctl_cp->n_control) == 0) { - IP_VS_ERR("BUG control DEL with n=0 : " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("BUG control DEL with n=0 : " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + return; } atomic_dec(&ctl_cp->n_control); @@ -707,17 +715,22 @@ static inline void ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) { if (cp->control) { - IP_VS_ERR("request control ADD for already controlled: " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("request control ADD for already controlled: " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + ip_vs_control_del(cp); } - IP_VS_DBG(7, "ADDing control for: " - "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport)); + IP_VS_DBG_BUF(7, "ADDing control for: " + "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), + ntohs(ctl_cp->cport)); cp->control = ctl_cp; atomic_inc(&ctl_cp->n_control); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index e7603d749c0..9a24332fbed 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -449,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) cp->flags |= atomic_read(&dest->conn_flags); cp->dest = dest; - IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr.ip), ntohs(cp->cport), - NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), - NIPQUAD(cp->daddr.ip), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -512,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) if (!dest) return; - IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr.ip), ntohs(cp->cport), - NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), - NIPQUAD(cp->daddr.ip), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -574,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct) !(dest->flags & IP_VS_DEST_F_AVAILABLE) || (sysctl_ip_vs_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { - IP_VS_DBG(9, "check_template: dest not available for " - "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "-> d:%u.%u.%u.%u:%d\n", - ip_vs_proto_name(ct->protocol), - NIPQUAD(ct->caddr.ip), ntohs(ct->cport), - NIPQUAD(ct->vaddr.ip), ntohs(ct->vport), - NIPQUAD(ct->daddr.ip), ntohs(ct->dport)); + IP_VS_DBG_BUF(9, "check_template: dest not available for " + "protocol %s s:%s:%d v:%s:%d " + "-> d:%s:%d\n", + ip_vs_proto_name(ct->protocol), + IP_VS_DBG_ADDR(ct->af, &ct->caddr), + ntohs(ct->cport), + IP_VS_DBG_ADDR(ct->af, &ct->vaddr), + ntohs(ct->vport), + IP_VS_DBG_ADDR(ct->af, &ct->daddr), + ntohs(ct->dport)); /* * Invalidate the connection template diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 640203a153c..6dbc527285f 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -924,13 +924,14 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) dest = ip_vs_trash_get_dest(svc, &daddr, dport); if (dest != NULL) { - IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " - "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", - NIPQUAD(daddr), ntohs(dport), - atomic_read(&dest->refcnt), - dest->vfwmark, - NIPQUAD(dest->vaddr.ip), - ntohs(dest->vport)); + IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " + "dest->refcnt=%d, service %u/%s:%u\n", + IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), + atomic_read(&dest->refcnt), + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->vaddr), + ntohs(dest->vport)); + __ip_vs_update_dest(svc, dest, udest); /* @@ -1076,10 +1077,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) atomic_dec(&dest->svc->refcnt); kfree(dest); } else { - IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " - "dest->refcnt=%d\n", - NIPQUAD(dest->addr.ip), ntohs(dest->port), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " + "dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ip_vs_dest_trash); atomic_inc(&dest->refcnt); } diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 3da2bb05ee7..de8ed73997c 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -490,19 +490,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, if (new_state != cp->state) { struct ip_vs_dest *dest = cp->dest; - IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" - "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n", - pp->name, - (state_off==TCP_DIR_OUTPUT)?"output ":"input ", - th->syn? 'S' : '.', - th->fin? 'F' : '.', - th->ack? 'A' : '.', - th->rst? 'R' : '.', - NIPQUAD(cp->daddr.ip), ntohs(cp->dport), - NIPQUAD(cp->caddr.ip), ntohs(cp->cport), - tcp_state_name(cp->state), - tcp_state_name(new_state), - atomic_read(&cp->refcnt)); + IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" + "%s:%d state: %s->%s conn->refcnt:%d\n", + pp->name, + ((state_off == TCP_DIR_OUTPUT) ? + "output " : "input "), + th->syn ? 'S' : '.', + th->fin ? 'F' : '.', + th->ack ? 'A' : '.', + th->rst ? 'R' : '.', + IP_VS_DBG_ADDR(cp->af, &cp->daddr), + ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + tcp_state_name(cp->state), + tcp_state_name(new_state), + atomic_read(&cp->refcnt)); + if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (new_state != IP_VS_TCP_S_ESTABLISHED)) { @@ -623,12 +627,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) break; spin_unlock(&tcp_app_lock); - IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u to app %s on port %u\n", - __func__, - NIPQUAD(cp->caddr.ip), ntohs(cp->cport), - NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), - inc->name, ntohs(inc->port)); + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index fd8bd934cc0..5f2073e41cf 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -408,12 +408,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) break; spin_unlock(&udp_app_lock); - IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u to app %s on port %u\n", - __func__, - NIPQUAD(cp->caddr.ip), ntohs(cp->cport), - NIPQUAD(cp->vaddr.ip), ntohs(cp->vport), - inc->name, ntohs(inc->port)); + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); -- cgit v1.2.3-70-g09d2 From f59ac0481660e66cec67f1d6b024e78b9dc715fe Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 29 Aug 2008 16:26:43 -0700 Subject: cfg80211: keep track of supported interface modes It is obviously good for userspace to know up front which interface modes a given piece of hardware might support (even if adding such an interface might fail later because of concurrency issues), so let's make cfg80211 aware of that. For good measure, disallow adding interfaces in all other modes so drivers don't forget to announce support for one mode when they add it. Signed-off-by: Johannes Berg Signed-off-by: Stephen Blackheath Signed-off-by: Ivo van Doorn Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/adm8211.c | 1 + drivers/net/wireless/ath5k/base.c | 6 ++++++ drivers/net/wireless/ath9k/main.c | 5 +++++ drivers/net/wireless/b43/main.c | 7 +++++++ drivers/net/wireless/b43legacy/main.c | 5 +++++ drivers/net/wireless/iwlwifi/iwl-core.c | 4 ++++ drivers/net/wireless/iwlwifi/iwl3945-base.c | 5 +++++ drivers/net/wireless/mac80211_hwsim.c | 3 +++ drivers/net/wireless/p54/p54common.c | 3 +++ drivers/net/wireless/rt2x00/rt2x00dev.c | 5 +++++ drivers/net/wireless/rtl8187_dev.c | 2 ++ drivers/net/wireless/zd1211rw/zd_mac.c | 5 +++++ include/linux/nl80211.h | 6 ++++++ include/net/wireless.h | 3 +++ net/mac80211/main.c | 7 +++++++ net/wireless/core.c | 9 ++++++++- net/wireless/nl80211.c | 22 ++++++++++++++++++++-- 17 files changed, 95 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 3333d4596b8..c6a55cd12db 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1884,6 +1884,7 @@ static int __devinit adm8211_probe(struct pci_dev *pdev, dev->extra_tx_headroom = sizeof(struct adm8211_tx_hdr); /* dev->flags = IEEE80211_HW_RX_INCLUDES_FCS in promisc mode */ dev->flags = IEEE80211_HW_SIGNAL_UNSPEC; + dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); dev->channel_change_time = 1000; dev->max_signal = 100; /* FIXME: find better value */ diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 7989ab5c2bb..85260c39aa2 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -485,6 +485,12 @@ ath5k_pci_probe(struct pci_dev *pdev, hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; + + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_ADHOC) | + BIT(NL80211_IFTYPE_MESH_POINT); + hw->extra_tx_headroom = 2; hw->channel_change_time = 5000; sc = hw->priv; diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index dc45eef3289..39a4a70d013 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -1482,6 +1482,11 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_ADHOC); + SET_IEEE80211_DEV(hw, &pdev->dev); pci_set_drvdata(pdev, hw); diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 63bafc2f3f0..2d915c1a82a 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4569,6 +4569,13 @@ static int b43_wireless_init(struct ssb_device *dev) IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_MESH_POINT) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_WDS) | + BIT(NL80211_IFTYPE_ADHOC); + hw->queues = b43_modparam_qos ? 4 : 1; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 1cb77db5c29..68f63f5093a 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3704,6 +3704,11 @@ static int b43legacy_wireless_init(struct ssb_device *dev) hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_WDS) | + BIT(NL80211_IFTYPE_ADHOC); hw->queues = 1; /* FIXME: hardware has more queues */ SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index fbf75a62958..0a511ef8e35 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -819,6 +819,10 @@ int iwl_setup_mac(struct iwl_priv *priv) /* Tell mac80211 our characteristics */ hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_ADHOC); /* Default value; 4 EDCA QOS priorities */ hw->queues = 4; /* queues to support 11n aggregation */ diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index a622fc33590..cee3045f160 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -7888,6 +7888,11 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_ADHOC); + /* 4 EDCA QOS priorities */ hw->queues = 4; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 732429d4912..6ba50f087f7 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -447,6 +447,9 @@ static int __init init_mac80211_hwsim(void) hw->channel_change_time = 1; hw->queues = 4; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP); hw->ampdu_queues = 1; memcpy(data->channels, hwsim_channels, sizeof(hwsim_channels)); diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 17e06bbc996..6da98e6e6a9 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -1072,6 +1072,9 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len) dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | /* not sure */ IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_UNSPEC; + + dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); + dev->channel_change_time = 1000; /* TODO: find actual value */ dev->max_signal = 127; diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 369b0b2d864..2f3bfc60688 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1052,6 +1052,11 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) */ rt2x00dev->hw->vif_data_size = sizeof(struct rt2x00_intf); + rt2x00dev->hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_ADHOC); + /* * Let the driver probe the device to detect the capabilities. */ diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c index 060a2650535..8a42bfa6d4f 100644 --- a/drivers/net/wireless/rtl8187_dev.c +++ b/drivers/net/wireless/rtl8187_dev.c @@ -1184,6 +1184,8 @@ static int __devinit rtl8187_probe(struct usb_interface *intf, dev->max_signal = 65; } + dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); + if ((id->driver_info == DEVICE_RTL8187) && priv->is_rtl8187b) printk(KERN_INFO "rtl8187: inconsistency between id with OEM" " info!\n"); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 4d7b98b0503..e019102b228 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -937,6 +937,11 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf) hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_DB; + hw->wiphy->interface_modes = + BIT(NL80211_IFTYPE_MESH_POINT) | + BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_ADHOC); + hw->max_signal = 100; hw->queues = 1; hw->extra_tx_headroom = sizeof(struct zd_ctrlset); diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 0c1147de3ec..5e51f4e7600 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -210,6 +210,10 @@ enum nl80211_commands { * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) * + * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all + * supported interface types, each a flag attribute with the number + * of the interface mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -259,6 +263,8 @@ enum nl80211_attrs { NL80211_ATTR_HT_CAPABILITY, + NL80211_ATTR_SUPPORTED_IFTYPES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/wireless.h b/include/net/wireless.h index 9324f8dd183..1dc8ec3daa2 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -185,6 +185,9 @@ struct wiphy { /* permanent MAC address */ u8 perm_addr[ETH_ALEN]; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ + u16 interface_modes; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 638b75f36e2..396cfb2d0f4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1675,6 +1675,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + /* if low-level driver supports AP, we also support VLAN */ + if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) + local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); + + /* mac80211 always supports monitor */ + local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); + result = wiphy_register(local->hw.wiphy); if (result < 0) return result; diff --git a/net/wireless/core.c b/net/wireless/core.c index f1da0b93bc5..7e995ac06a0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1,7 +1,7 @@ /* * This is the linux wireless configuration interface. * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006-2008 Johannes Berg */ #include @@ -259,6 +259,13 @@ int wiphy_register(struct wiphy *wiphy) struct ieee80211_supported_band *sband; bool have_band = false; int i; + u16 ifmodes = wiphy->interface_modes; + + /* sanity check ifmodes */ + WARN_ON(!ifmodes); + ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; + if (WARN_ON(ifmodes != wiphy->interface_modes)) + wiphy->interface_modes = ifmodes; /* sanity check supported bands/channels */ for (band = 0; band < IEEE80211_NUM_BANDS; band++) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4d6c02afd6f..77880ba8b61 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -113,10 +113,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_rates, *nl_rate; + struct nlattr *nl_modes; enum ieee80211_band band; struct ieee80211_channel *chan; struct ieee80211_rate *rate; int i; + u16 ifmodes = dev->wiphy.interface_modes; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -125,6 +127,20 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); + if (!nl_modes) + goto nla_put_failure; + + i = 0; + while (ifmodes) { + if (ifmodes & 1) + NLA_PUT_FLAG(msg, i); + ifmodes >>= 1; + i++; + } + + nla_nest_end(msg, nl_modes); + nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; @@ -415,7 +431,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) ifindex = dev->ifindex; dev_put(dev); - if (!drv->ops->change_virtual_intf) { + if (!drv->ops->change_virtual_intf || + !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; goto unlock; } @@ -462,7 +479,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(drv)) return PTR_ERR(drv); - if (!drv->ops->add_virtual_intf) { + if (!drv->ops->add_virtual_intf || + !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; goto unlock; } -- cgit v1.2.3-70-g09d2 From 2206a3f5b75be5dadf11541961bd7c924857eb5d Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 8 Sep 2008 13:38:11 +0200 Subject: ipvs: Restrict connection table size via Kconfig Instead of checking the value in include/net/ip_vs.h, we can just restrict the range in our Kconfig file. This will prevent values outside of the range early. Signed-off-by: Sven Wegener Reviewed-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 10 +--------- net/ipv4/ipvs/Kconfig | 3 ++- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1b13cef4b54..38f4f690b18 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -621,16 +621,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows); #ifndef CONFIG_IP_VS_TAB_BITS #define CONFIG_IP_VS_TAB_BITS 12 #endif -/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */ -#if CONFIG_IP_VS_TAB_BITS < 8 -#define IP_VS_CONN_TAB_BITS 8 -#endif -#if CONFIG_IP_VS_TAB_BITS > 20 -#define IP_VS_CONN_TAB_BITS 20 -#endif -#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20 + #define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS -#endif #define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS) #define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1) diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig index 794cecb249a..de6004de80b 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/ipv4/ipvs/Kconfig @@ -41,7 +41,8 @@ config IP_VS_DEBUG config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" - default "12" + range 8 20 + default 12 ---help--- The IPVS connection hash table uses the chaining scheme to handle hash collisions. Using a big IPVS connection hash table will greatly -- cgit v1.2.3-70-g09d2 From e9c0ce232e7a36daae1ca08282609d7f0c57c567 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 8 Sep 2008 13:39:04 +0200 Subject: ipvs: Embed user stats structure into kernel stats structure Instead of duplicating the fields, integrate a user stats structure into the kernel stats structure. This is more robust when the members are changed, because they are now automatically kept in sync. Signed-off-by: Sven Wegener Reviewed-by: Julius Volz Signed-off-by: Simon Horman --- include/net/ip_vs.h | 21 ++---------------- net/ipv4/ipvs/ip_vs_core.c | 30 +++++++++++++------------- net/ipv4/ipvs/ip_vs_ctl.c | 53 ++++++++++++++++++---------------------------- net/ipv4/ipvs/ip_vs_est.c | 40 +++++++++++++++++----------------- 4 files changed, 58 insertions(+), 86 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 38f4f690b18..33e2ac6ceb3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -254,27 +254,10 @@ struct ip_vs_estimator { struct ip_vs_stats { - __u32 conns; /* connections scheduled */ - __u32 inpkts; /* incoming packets */ - __u32 outpkts; /* outgoing packets */ - __u64 inbytes; /* incoming bytes */ - __u64 outbytes; /* outgoing bytes */ - - __u32 cps; /* current connection rate */ - __u32 inpps; /* current in packet rate */ - __u32 outpps; /* current out packet rate */ - __u32 inbps; /* current in byte rate */ - __u32 outbps; /* current out byte rate */ - - /* - * Don't add anything before the lock, because we use memcpy() to copy - * the members before the lock to struct ip_vs_stats_user in - * ip_vs_ctl.c. - */ + struct ip_vs_stats_user ustats; /* statistics */ + struct ip_vs_estimator est; /* estimator */ spinlock_t lock; /* spin lock */ - - struct ip_vs_estimator est; /* estimator */ }; struct dst_entry; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index bdc92d73fbe..80a4fcf33a5 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -102,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); - dest->stats.inpkts++; - dest->stats.inbytes += skb->len; + dest->stats.ustats.inpkts++; + dest->stats.ustats.inbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); - dest->svc->stats.inpkts++; - dest->svc->stats.inbytes += skb->len; + dest->svc->stats.ustats.inpkts++; + dest->svc->stats.ustats.inbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.inpkts++; - ip_vs_stats.inbytes += skb->len; + ip_vs_stats.ustats.inpkts++; + ip_vs_stats.ustats.inbytes += skb->len; spin_unlock(&ip_vs_stats.lock); } } @@ -125,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); - dest->stats.outpkts++; - dest->stats.outbytes += skb->len; + dest->stats.ustats.outpkts++; + dest->stats.ustats.outbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); - dest->svc->stats.outpkts++; - dest->svc->stats.outbytes += skb->len; + dest->svc->stats.ustats.outpkts++; + dest->svc->stats.ustats.outbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.outpkts++; - ip_vs_stats.outbytes += skb->len; + ip_vs_stats.ustats.outpkts++; + ip_vs_stats.ustats.outbytes += skb->len; spin_unlock(&ip_vs_stats.lock); } } @@ -146,15 +146,15 @@ static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { spin_lock(&cp->dest->stats.lock); - cp->dest->stats.conns++; + cp->dest->stats.ustats.conns++; spin_unlock(&cp->dest->stats.lock); spin_lock(&svc->stats.lock); - svc->stats.conns++; + svc->stats.ustats.conns++; spin_unlock(&svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.conns++; + ip_vs_stats.ustats.conns++; spin_unlock(&ip_vs_stats.lock); } diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index e53efe41f01..993a83fb0d5 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -744,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - stats->conns = 0; - stats->inpkts = 0; - stats->outpkts = 0; - stats->inbytes = 0; - stats->outbytes = 0; - - stats->cps = 0; - stats->inpps = 0; - stats->outpps = 0; - stats->inbps = 0; - stats->outbps = 0; - + memset(&stats->ustats, 0, sizeof(stats->ustats)); ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -1964,20 +1953,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns, - ip_vs_stats.inpkts, ip_vs_stats.outpkts, - (unsigned long long) ip_vs_stats.inbytes, - (unsigned long long) ip_vs_stats.outbytes); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, + ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, + (unsigned long long) ip_vs_stats.ustats.inbytes, + (unsigned long long) ip_vs_stats.ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.cps, - ip_vs_stats.inpps, - ip_vs_stats.outpps, - ip_vs_stats.inbps, - ip_vs_stats.outbps); + ip_vs_stats.ustats.cps, + ip_vs_stats.ustats.inpps, + ip_vs_stats.ustats.outpps, + ip_vs_stats.ustats.inbps, + ip_vs_stats.ustats.outbps); spin_unlock_bh(&ip_vs_stats.lock); return 0; @@ -2215,7 +2204,7 @@ static void ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) { spin_lock_bh(&src->lock); - memcpy(dst, src, (char*)&src->lock - (char*)src); + memcpy(dst, &src->ustats, sizeof(*dst)); spin_unlock_bh(&src->lock); } @@ -2591,16 +2580,16 @@ static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, spin_lock_bh(&stats->lock); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); spin_unlock_bh(&stats->lock); diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 4fb620ec208..2eb2860dabb 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg) s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); - n_conns = s->conns; - n_inpkts = s->inpkts; - n_outpkts = s->outpkts; - n_inbytes = s->inbytes; - n_outbytes = s->outbytes; + n_conns = s->ustats.conns; + n_inpkts = s->ustats.inpkts; + n_outpkts = s->ustats.outpkts; + n_inbytes = s->ustats.inbytes; + n_outbytes = s->ustats.outbytes; /* scaled by 2^10, but divided 2 seconds */ rate = (n_conns - e->last_conns)<<9; e->last_conns = n_conns; e->cps += ((long)rate - (long)e->cps)>>2; - s->cps = (e->cps+0x1FF)>>10; + s->ustats.cps = (e->cps+0x1FF)>>10; rate = (n_inpkts - e->last_inpkts)<<9; e->last_inpkts = n_inpkts; e->inpps += ((long)rate - (long)e->inpps)>>2; - s->inpps = (e->inpps+0x1FF)>>10; + s->ustats.inpps = (e->inpps+0x1FF)>>10; rate = (n_outpkts - e->last_outpkts)<<9; e->last_outpkts = n_outpkts; e->outpps += ((long)rate - (long)e->outpps)>>2; - s->outpps = (e->outpps+0x1FF)>>10; + s->ustats.outpps = (e->outpps+0x1FF)>>10; rate = (n_inbytes - e->last_inbytes)<<4; e->last_inbytes = n_inbytes; e->inbps += ((long)rate - (long)e->inbps)>>2; - s->inbps = (e->inbps+0xF)>>5; + s->ustats.inbps = (e->inbps+0xF)>>5; rate = (n_outbytes - e->last_outbytes)<<4; e->last_outbytes = n_outbytes; e->outbps += ((long)rate - (long)e->outbps)>>2; - s->outbps = (e->outbps+0xF)>>5; + s->ustats.outbps = (e->outbps+0xF)>>5; spin_unlock(&s->lock); } spin_unlock(&est_lock); @@ -108,20 +108,20 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) INIT_LIST_HEAD(&est->list); - est->last_conns = stats->conns; - est->cps = stats->cps<<10; + est->last_conns = stats->ustats.conns; + est->cps = stats->ustats.cps<<10; - est->last_inpkts = stats->inpkts; - est->inpps = stats->inpps<<10; + est->last_inpkts = stats->ustats.inpkts; + est->inpps = stats->ustats.inpps<<10; - est->last_outpkts = stats->outpkts; - est->outpps = stats->outpps<<10; + est->last_outpkts = stats->ustats.outpkts; + est->outpps = stats->ustats.outpps<<10; - est->last_inbytes = stats->inbytes; - est->inbps = stats->inbps<<5; + est->last_inbytes = stats->ustats.inbytes; + est->inbps = stats->ustats.inbps<<5; - est->last_outbytes = stats->outbytes; - est->outbps = stats->outbps<<5; + est->last_outbytes = stats->ustats.outbytes; + est->outbps = stats->ustats.outbps<<5; spin_lock_bh(&est_lock); list_add(&est->list, &est_list); -- cgit v1.2.3-70-g09d2 From 09ab6f4c2376a0fc31abde1e2991513f900ea825 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Sep 2008 07:19:20 +0200 Subject: [Bluetooth] Enforce correct authentication requirements With the introduction of Security Mode 4 and Simple Pairing from the Bluetooth 2.1 specification it became mandatory that the initiator requires authentication and encryption before any L2CAP channel can be established. The only exception here is PSM 1 for the service discovery protocol (SDP). It is meant to be used without any encryption since it contains only public information. This is how Bluetooth 2.0 and before handle connections on PSM 1. For Bluetooth 2.1 devices the pairing procedure differentiates between no bonding, general bonding and dedicated bonding. The L2CAP layer wrongly uses always general bonding when creating new connections, but it should not do this for SDP connections. In this case the authentication requirement should be no bonding and the just-works model should be used, but in case of non-SDP connection it is required to use general bonding. If the new connection requires man-in-the-middle (MITM) protection, it also first wrongly creates an unauthenticated link key and then later on requests an upgrade to an authenticated link key to provide full MITM protection. With Simple Pairing the link key generation is an expensive operation (compared to Bluetooth 2.0 and before) and doing this twice during a connection setup causes a noticeable delay when establishing a new connection. This should be avoided to not regress from the expected Bluetooth 2.0 connection times. The authentication requirements are known up-front and so enforce them. To fulfill these requirements the hci_connect() function has been extended with an authentication requirement parameter that will be stored inside the connection information and can be retrieved by userspace at any time. This allows the correct IO capabilities exchange and results in the expected behavior. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 8 +++++--- net/bluetooth/l2cap.c | 19 +++++++++++++++++-- net/bluetooth/sco.c | 2 +- 4 files changed, 24 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cbf75109468..5e785b968e7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -325,7 +325,7 @@ int hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *src); +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type); int hci_conn_auth(struct hci_conn *conn); int hci_conn_encrypt(struct hci_conn *conn); int hci_conn_change_link_key(struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ca8d05245ca..a2f9efaa336 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -330,7 +330,7 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO or ACL connection. * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; @@ -344,8 +344,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_conn_hold(acl); - if (acl->state == BT_OPEN || acl->state == BT_CLOSED) + if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { + acl->auth_type = auth_type; hci_acl_connect(acl); + } if (type == ACL_LINK) return acl; @@ -381,7 +383,7 @@ int hci_conn_auth(struct hci_conn *conn) if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { if (!(conn->auth_type & 0x01)) { - conn->auth_type = HCI_AT_GENERAL_BONDING_MITM; + conn->auth_type |= 0x01; conn->link_mode &= ~HCI_LM_AUTH; } } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 3396d5bdef1..a96d6de80d1 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -55,7 +55,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.10" +#define VERSION "2.11" static u32 l2cap_feat_mask = 0x0000; @@ -778,6 +778,7 @@ static int l2cap_do_connect(struct sock *sk) struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + __u8 auth_type; int err = 0; BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); @@ -789,7 +790,21 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - hcon = hci_connect(hdev, ACL_LINK, dst); + if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || + l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || + l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + auth_type = HCI_AT_NO_BONDING_MITM; + else + auth_type = HCI_AT_GENERAL_BONDING_MITM; + } else { + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + auth_type = HCI_AT_NO_BONDING; + else + auth_type = HCI_AT_GENERAL_BONDING; + } + + hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); if (!hcon) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index a16011fedc1..0cc91e6da76 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -200,7 +200,7 @@ static int sco_connect(struct sock *sk) else type = SCO_LINK; - hcon = hci_connect(hdev, type, dst); + hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); if (!hcon) goto done; -- cgit v1.2.3-70-g09d2 From e7c29cb16c833441fd2160642bb13025f4e7ac70 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Sep 2008 07:19:20 +0200 Subject: [Bluetooth] Reject L2CAP connections on an insecure ACL link The Security Mode 4 of the Bluetooth 2.1 specification has strict authentication and encryption requirements. It is the initiators job to create a secure ACL link. However in case of malicious devices, the acceptor has to make sure that the ACL is encrypted before allowing any kind of L2CAP connection. The only exception here is the PSM 1 for the service discovery protocol, because that is allowed to run on an insecure ACL link. Previously it was enough to reject a L2CAP connection during the connection setup phase, but with Bluetooth 2.1 it is forbidden to do any L2CAP protocol exchange on an insecure link (except SDP). The new hci_conn_check_link_mode() function can be used to check the integrity of an ACL link. This functions also takes care of the cases where Security Mode 4 is disabled or one of the devices is based on an older specification. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/af_bluetooth.c | 2 +- net/bluetooth/hci_conn.c | 13 +++++++++++++ net/bluetooth/l2cap.c | 15 +++++++++++---- 4 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5e785b968e7..46a43b721dd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -326,6 +326,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev); struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type); +int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_auth(struct hci_conn *conn); int hci_conn_encrypt(struct hci_conn *conn); int hci_conn_change_link_key(struct hci_conn *conn); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1edfdf4c095..f6348e078aa 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -49,7 +49,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.12" +#define VERSION "2.13" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a2f9efaa336..b7002429f15 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -376,6 +376,19 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 } EXPORT_SYMBOL(hci_connect); +/* Check link security requirement */ +int hci_conn_check_link_mode(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0 && + !(conn->link_mode & HCI_LM_ENCRYPT)) + return 0; + + return 1; +} +EXPORT_SYMBOL(hci_conn_check_link_mode); + /* Authenticate remote device */ int hci_conn_auth(struct hci_conn *conn) { diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index a96d6de80d1..9610a9c85b9 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1568,10 +1568,10 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; struct sock *sk, *parent; - int result, status = 0; + int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); - __le16 psm = req->psm; + __le16 psm = req->psm; BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid); @@ -1582,6 +1582,13 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto sendresp; } + /* Check if the ACL is secure enough (if not SDP) */ + if (psm != cpu_to_le16(0x0001) && + !hci_conn_check_link_mode(conn->hcon)) { + result = L2CAP_CR_SEC_BLOCK; + goto response; + } + result = L2CAP_CR_NO_MEM; /* Check for backlog size */ @@ -2239,7 +2246,7 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } @@ -2311,7 +2318,7 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } -- cgit v1.2.3-70-g09d2 From 410e27a49bb98bc7fa3ff5fc05cc313817b9f253 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 9 Sep 2008 13:27:22 +0200 Subject: This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp" as it accentally contained the wrong set of patches. These will be submitted separately. Signed-off-by: Gerrit Renker --- Documentation/networking/dccp.txt | 54 +- include/linux/dccp.h | 122 ++- include/net/tcp.h | 15 - net/dccp/Kconfig | 3 + net/dccp/Makefile | 5 +- net/dccp/ackvec.c | 619 ++++++------ net/dccp/ackvec.h | 204 ++-- net/dccp/ccid.c | 101 +- net/dccp/ccid.h | 113 +-- net/dccp/ccids/Kconfig | 30 +- net/dccp/ccids/ccid2.c | 622 +++++++----- net/dccp/ccids/ccid2.h | 63 +- net/dccp/ccids/ccid3.c | 762 +++++++++------ net/dccp/ccids/ccid3.h | 153 +-- net/dccp/ccids/lib/loss_interval.c | 30 +- net/dccp/ccids/lib/loss_interval.h | 4 +- net/dccp/ccids/lib/packet_history.c | 282 +++--- net/dccp/ccids/lib/packet_history.h | 78 +- net/dccp/ccids/lib/tfrc.h | 16 - net/dccp/ccids/lib/tfrc_equation.c | 29 +- net/dccp/dccp.h | 104 +- net/dccp/diag.c | 2 +- net/dccp/feat.c | 1805 +++++++++-------------------------- net/dccp/feat.h | 144 +-- net/dccp/input.c | 164 ++-- net/dccp/ipv4.c | 4 +- net/dccp/ipv6.c | 4 +- net/dccp/minisocks.c | 87 +- net/dccp/options.c | 341 ++++--- net/dccp/output.c | 279 ++---- net/dccp/probe.c | 75 +- net/dccp/proto.c | 281 ++---- net/dccp/qpolicy.c | 137 --- net/dccp/sysctl.c | 64 +- net/dccp/timer.c | 42 +- net/ipv4/tcp_input.c | 17 +- 36 files changed, 2884 insertions(+), 3971 deletions(-) delete mode 100644 net/dccp/qpolicy.c (limited to 'include/net') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index fcfc1253442..39131a3c78f 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -45,25 +45,6 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree Socket options ============== -DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes -a policy ID as argument and can only be set before the connection (i.e. changes -during an established connection are not supported). Currently, two policies are -defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special, -and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an -u32 priority value as ancillary data to sendmsg(), where higher numbers indicate -a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to -be formatted using a cmsg(3) message header filled in as follows: - cmsg->cmsg_level = SOL_DCCP; - cmsg->cmsg_type = DCCP_SCM_PRIORITY; - cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */ - -DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero -value is always interpreted as unbounded queue length. If different from zero, -the interpretation of this parameter depends on the current dequeuing policy -(see above): the "simple" policy will enforce a fixed queue size by returning -EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the -lowest-priority packet first. The default value for this parameter is -initialised from /proc/sys/net/dccp/default/tx_qlen. DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of service codes (RFC 4340, sec. 8.1.2); if this socket option is not set, @@ -76,24 +57,6 @@ can be set before calling bind(). DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet size (application payload size) in bytes, see RFC 4340, section 14. -DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs -supported by the endpoint (see include/linux/dccp.h for symbolic constants). -The caller needs to provide a sufficiently large (> 2) array of type uint8_t. - -DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same -time, combining the operation of the next two socket options. This option is -preferrable over the latter two, since often applications will use the same -type of CCID for both directions; and mixed use of CCIDs is not currently well -understood. This socket option takes as argument at least one uint8_t value, or -an array of uint8_t values, which must match available CCIDS (see above). CCIDs -must be registered on the socket before calling connect() or listen(). - -DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets -the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID. -Please note that the getsockopt argument type here is `int', not uint8_t. - -DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID. - DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold timewait state when closing the connection (RFC 4340, 8.3). The usual case is that the closing server sends a CloseReq, whereupon the client holds timewait @@ -152,16 +115,23 @@ retries2 importance for retransmitted acknowledgments and feature negotiation, data packets are never retransmitted. Analogue of tcp_retries2. +send_ndp = 1 + Whether or not to send NDP count options (sec. 7.7.2). + +send_ackvec = 1 + Whether or not to send Ack Vector options (sec. 11.5). + +ack_ratio = 2 + The default Ack Ratio (sec. 11.3) to use. + tx_ccid = 2 - Default CCID for the sender-receiver half-connection. Depending on the - choice of CCID, the Send Ack Vector feature is enabled automatically. + Default CCID for the sender-receiver half-connection. rx_ccid = 2 - Default CCID for the receiver-sender half-connection; see tx_ccid. + Default CCID for the receiver-sender half-connection. seq_window = 100 - The initial sequence window (sec. 7.5.2) of the sender. This influences - the local ackno validity and the remote seqno validity windows (7.5.1). + The initial sequence window (sec. 7.5.2). tx_qlen = 5 The size of the transmit buffer in packets. A value of 0 corresponds diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 010e2d87ed7..6080449fbec 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -165,13 +165,9 @@ enum { DCCPO_TIMESTAMP_ECHO = 42, DCCPO_ELAPSED_TIME = 43, DCCPO_MAX = 45, - DCCPO_MIN_RX_CCID_SPECIFIC = 128, /* from sender to receiver */ - DCCPO_MAX_RX_CCID_SPECIFIC = 191, - DCCPO_MIN_TX_CCID_SPECIFIC = 192, /* from receiver to sender */ - DCCPO_MAX_TX_CCID_SPECIFIC = 255, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, }; -/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ -#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -180,36 +176,27 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum dccp_feature_numbers { +enum { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, + DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, + DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, + DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, - DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* DCCP socket control message types for cmsg */ -enum dccp_cmsg_type { - DCCP_SCM_PRIORITY = 1, - DCCP_SCM_QPOLICY_MAX = 0xFFFF, - /* ^-- Up to here reserved exclusively for qpolicy parameters */ - DCCP_SCM_MAX -}; - -/* DCCP priorities for outgoing/queued packets */ -enum dccp_packet_dequeueing_policy { - DCCPQ_POLICY_SIMPLE, - DCCPQ_POLICY_PRIO, - DCCPQ_POLICY_MAX +/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ +struct dccp_so_feat { + __u8 dccpsf_feat; + __u8 __user *dccpsf_val; + __u8 dccpsf_len; }; /* DCCP socket options */ @@ -221,12 +208,6 @@ enum dccp_packet_dequeueing_policy { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 -#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 -#define DCCP_SOCKOPT_CCID 13 -#define DCCP_SOCKOPT_TX_CCID 14 -#define DCCP_SOCKOPT_RX_CCID 15 -#define DCCP_SOCKOPT_QPOLICY_ID 16 -#define DCCP_SOCKOPT_QPOLICY_TXQLEN 17 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 @@ -374,13 +355,62 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) return __dccp_hdr_len(dccp_hdr(skb)); } + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +#define DCCPF_INITIAL_ACK_RATIO 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +/** + * struct dccp_minisock - Minimal DCCP connection representation + * + * Will be used to pass the state from dccp_request_sock to dccp_sock. + * + * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpms_ccid - Congestion Control Id (CCID) (section 10) + * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) + * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) + * @dccpms_pending - List of features being negotiated + * @dccpms_conf - + */ +struct dccp_minisock { + __u64 dccpms_sequence_window; + __u8 dccpms_rx_ccid; + __u8 dccpms_tx_ccid; + __u8 dccpms_send_ack_vector; + __u8 dccpms_send_ndp_count; + __u8 dccpms_ack_ratio; + struct list_head dccpms_pending; + struct list_head dccpms_conf; +}; + +struct dccp_opt_conf { + __u8 *dccpoc_val; + __u8 dccpoc_len; +}; + +struct dccp_opt_pend { + struct list_head dccpop_node; + __u8 dccpop_type; + __u8 dccpop_feat; + __u8 *dccpop_val; + __u8 dccpop_len; + int dccpop_conf; + struct dccp_opt_conf *dccpop_sc; +}; + +extern void dccp_minisock_init(struct dccp_minisock *dmsk); + /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -390,7 +420,6 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; - struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -462,28 +491,21 @@ struct dccp_ackvec; * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) + * @dccps_minisock - associated minisock (accessed via dccp_msk) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) + * @dccps_xmit_timer - timer for when CCID is not ready to send * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -507,26 +529,19 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; + __u16 dccps_pcslen; + __u16 dccps_pcrlen; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; - struct list_head dccps_featneg; + struct dccp_minisock dccps_minisock; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; @@ -535,6 +550,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } +static inline struct dccp_minisock *dccp_msk(const struct sock *sk) +{ + return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; +} + static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 6bc4b8148ca..8983386356a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -782,21 +782,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) -/* - * Convert RFC3390 larger initial windows into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. - */ -static inline u32 rfc3390_bytes_to_packets(const u32 bytes) -{ - return bytes <= 1095 ? 4 : (bytes > 1460 ? 2 : 3); -} - extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 206c16ad9c3..7aa2a7acc7e 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -25,6 +25,9 @@ config INET_DCCP_DIAG def_tristate y if (IP_DCCP = y && INET_DIAG = y) def_tristate m +config IP_DCCP_ACKVEC + bool + source "net/dccp/ccids/Kconfig" menu "DCCP Kernel Hacking" diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 0c1c9af2bf7..f4f8793aaff 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o -dccp-y := ccid.o feat.o input.o minisocks.o options.o \ - qpolicy.o output.o proto.o timer.o ackvec.o +dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o dccp_ipv4-y := ipv4.o @@ -9,6 +8,8 @@ dccp_ipv4-y := ipv4.o obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o dccp_ipv6-y := ipv6.o +dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o + obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 41819848bdd..1e8be246ad1 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -1,375 +1,445 @@ /* * net/dccp/ackvec.c * - * An implementation of Ack Vectors for the DCCP protocol - * Copyright (c) 2007 University of Aberdeen, Scotland, UK + * An implementation of the DCCP protocol * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; version 2 of the License; */ + +#include "ackvec.h" #include "dccp.h" + +#include +#include +#include #include +#include #include +#include + static struct kmem_cache *dccp_ackvec_slab; static struct kmem_cache *dccp_ackvec_record_slab; -struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) +static struct dccp_ackvec_record *dccp_ackvec_record_new(void) { - struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority); + struct dccp_ackvec_record *avr = + kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); - if (av != NULL) { - av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1; - INIT_LIST_HEAD(&av->av_records); - } - return av; + if (avr != NULL) + INIT_LIST_HEAD(&avr->avr_node); + + return avr; } -static void dccp_ackvec_purge_records(struct dccp_ackvec *av) +static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr) { - struct dccp_ackvec_record *cur, *next; - - list_for_each_entry_safe(cur, next, &av->av_records, avr_node) - kmem_cache_free(dccp_ackvec_record_slab, cur); - INIT_LIST_HEAD(&av->av_records); + if (unlikely(avr == NULL)) + return; + /* Check if deleting a linked record */ + WARN_ON(!list_empty(&avr->avr_node)); + kmem_cache_free(dccp_ackvec_record_slab, avr); } -void dccp_ackvec_free(struct dccp_ackvec *av) +static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, + struct dccp_ackvec_record *avr) { - if (likely(av != NULL)) { - dccp_ackvec_purge_records(av); - kmem_cache_free(dccp_ackvec_slab, av); + /* + * AVRs are sorted by seqno. Since we are sending them in order, we + * just add the AVR at the head of the list. + * -sorbo. + */ + if (!list_empty(&av->av_records)) { + const struct dccp_ackvec_record *head = + list_entry(av->av_records.next, + struct dccp_ackvec_record, + avr_node); + BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno)); } + + list_add(&avr->avr_node, &av->av_records); } -/** - * dccp_ackvec_update_records - Record information about sent Ack Vectors - * @av: Ack Vector records to update - * @seqno: Sequence number of the packet carrying the Ack Vector just sent - * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector - */ -int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum) +int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + /* Figure out how many options do we need to represent the ackvec */ + const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN); + u16 len = av->av_vec_len + 2 * nr_opts, i; + u32 elapsed_time; + const unsigned char *tail, *from; + unsigned char *to; struct dccp_ackvec_record *avr; + suseconds_t delta; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return -1; + + delta = ktime_us_delta(ktime_get_real(), av->av_time); + elapsed_time = delta / 10; - avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC); + if (elapsed_time != 0 && + dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) + return -1; + + avr = dccp_ackvec_record_new(); if (avr == NULL) - return -ENOBUFS; + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + len = av->av_vec_len; + from = av->av_buf + av->av_buf_head; + tail = av->av_buf + DCCP_MAX_ACKVEC_LEN; + + for (i = 0; i < nr_opts; ++i) { + int copylen = len; + + if (len > DCCP_MAX_ACKVEC_OPT_LEN) + copylen = DCCP_MAX_ACKVEC_OPT_LEN; + + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = copylen + 2; + + /* Check if buf_head wraps */ + if (from + copylen > tail) { + const u16 tailsize = tail - from; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + copylen -= tailsize; + from = av->av_buf; + } + + memcpy(to, from, copylen); + from += copylen; + to += copylen; + len -= copylen; + } - avr->avr_ack_seqno = seqno; - avr->avr_ack_ptr = av->av_buf_head; - avr->avr_ack_ackno = av->av_buf_ackno; - avr->avr_ack_nonce = nonce_sum; - avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head); - /* - * When the buffer overflows, we keep no more than one record. This is - * the simplest way of disambiguating sender-Acks dating from before the - * overflow from sender-Acks which refer to after the overflow; a simple - * solution is preferable here since we are handling an exception. - */ - if (av->av_overflow) - dccp_ackvec_purge_records(av); /* - * Since GSS is incremented for each packet, the list is automatically - * arranged in descending order of @ack_seqno. + * From RFC 4340, A.2: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. */ - list_add(&avr->avr_node, &av->av_records); + avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + avr->avr_ack_ptr = av->av_buf_head; + avr->avr_ack_ackno = av->av_buf_ackno; + avr->avr_ack_nonce = av->av_buf_nonce; + avr->avr_sent_len = av->av_vec_len; - dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n", + dccp_ackvec_insert_avr(av, avr); + + dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", + dccp_role(sk), avr->avr_sent_len, (unsigned long long)avr->avr_ack_seqno, - (unsigned long long)avr->avr_ack_ackno, - avr->avr_ack_runlen); + (unsigned long long)avr->avr_ack_ackno); return 0; } -static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list, - const u64 ackno) +struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) { - struct dccp_ackvec_record *avr; - /* - * Exploit that records are inserted in descending order of sequence - * number, start with the oldest record first. If @ackno is `before' - * the earliest ack_ackno, the packet is too old to be considered. - */ - list_for_each_entry_reverse(avr, av_list, avr_node) { - if (avr->avr_ack_seqno == ackno) - return avr; - if (before48(ackno, avr->avr_ack_seqno)) - break; + struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); + + if (av != NULL) { + av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1; + av->av_buf_ackno = UINT48_MAX + 1; + av->av_buf_nonce = 0; + av->av_time = ktime_set(0, 0); + av->av_vec_len = 0; + INIT_LIST_HEAD(&av->av_records); } - return NULL; + + return av; } -/* - * Buffer index and length computation using modulo-buffersize arithmetic. - * Note that, as pointers move from right to left, head is `before' tail. - */ -static inline u16 __ackvec_idx_add(const u16 a, const u16 b) +void dccp_ackvec_free(struct dccp_ackvec *av) { - return (a + b) % DCCPAV_MAX_ACKVEC_LEN; + if (unlikely(av == NULL)) + return; + + if (!list_empty(&av->av_records)) { + struct dccp_ackvec_record *avr, *next; + + list_for_each_entry_safe(avr, next, &av->av_records, avr_node) { + list_del_init(&avr->avr_node); + dccp_ackvec_record_delete(avr); + } + } + + kmem_cache_free(dccp_ackvec_slab, av); } -static inline u16 __ackvec_idx_sub(const u16 a, const u16 b) +static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, + const u32 index) { - return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b); + return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK; } -u16 dccp_ackvec_buflen(const struct dccp_ackvec *av) +static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, + const u32 index) { - if (unlikely(av->av_overflow)) - return DCCPAV_MAX_ACKVEC_LEN; - return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head); + return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK; } -/** - * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1 - * @av: non-empty buffer to update - * @distance: negative or zero distance of @seqno from buf_ackno downward - * @seqno: the (old) sequence number whose record is to be updated - * @state: state in which packet carrying @seqno was received +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. */ -static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance, - u64 seqno, enum dccp_ackvec_states state) +static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, + const unsigned int packets, + const unsigned char state) { - u16 ptr = av->av_buf_head; + unsigned int gap; + long new_head; - BUG_ON(distance > 0); - if (unlikely(dccp_ackvec_is_empty(av))) - return; + if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) + return -ENOBUFS; - do { - u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr); + gap = packets - 1; + new_head = av->av_buf_head - packets; - if (distance + runlen >= 0) { - /* - * Only update the state if packet has not been received - * yet. This is OK as per the second table in RFC 4340, - * 11.4.1; i.e. here we are using the following table: - * RECEIVED - * 0 1 3 - * S +---+---+---+ - * T 0 | 0 | 0 | 0 | - * O +---+---+---+ - * R 1 | 1 | 1 | 1 | - * E +---+---+---+ - * D 3 | 0 | 1 | 3 | - * +---+---+---+ - * The "Not Received" state was set by reserve_seats(). - */ - if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED) - av->av_buf[ptr] = state; - else - dccp_pr_debug("Not changing %llu state to %u\n", - (unsigned long long)seqno, state); - break; + if (new_head < 0) { + if (gap > 0) { + memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; } + new_head += DCCP_MAX_ACKVEC_LEN; + } - distance += runlen + 1; - ptr = __ackvec_idx_add(ptr, 1); + av->av_buf_head = new_head; - } while (ptr != av->av_buf_tail); -} + if (gap > 0) + memset(av->av_buf + av->av_buf_head + 1, + DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); -/* Mark @num entries after buf_head as "Not yet received". */ -static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num) -{ - u16 start = __ackvec_idx_add(av->av_buf_head, 1), - len = DCCPAV_MAX_ACKVEC_LEN - start; - - /* check for buffer wrap-around */ - if (num > len) { - memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len); - start = 0; - num -= len; - } - if (num) - memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num); + av->av_buf[av->av_buf_head] = state; + av->av_vec_len += packets; + return 0; } -/** - * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer - * @av: container of buffer to update (can be empty or non-empty) - * @num_packets: number of packets to register (must be >= 1) - * @seqno: sequence number of the first packet in @num_packets - * @state: state in which packet carrying @seqno was received +/* + * Implements the RFC 4340, Appendix A */ -static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets, - u64 seqno, enum dccp_ackvec_states state) +int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) { - u32 num_cells = num_packets; + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in av_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A.1.1 (`New Packets'): + * + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. + */ - if (num_packets > DCCPAV_BURST_THRESH) { - u32 lost_packets = num_packets - 1; + /* See if this is the first ackno being inserted */ + if (av->av_vec_len == 0) { + av->av_buf[av->av_buf_head] = state; + av->av_vec_len = 1; + } else if (after48(ackno, av->av_buf_ackno)) { + const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno); - DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets); /* - * We received 1 packet and have a loss of size "num_packets-1" - * which we squeeze into num_cells-1 rather than reserving an - * entire byte for each lost packet. - * The reason is that the vector grows in O(burst_length); when - * it grows too large there will no room left for the payload. - * This is a trade-off: if a few packets out of the burst show - * up later, their state will not be changed; it is simply too - * costly to reshuffle/reallocate/copy the buffer each time. - * Should such problems persist, we will need to switch to a - * different underlying data structure. + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. */ - for (num_packets = num_cells = 1; lost_packets; ++num_cells) { - u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN); - - av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1); - av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len; + if (delta == 1 && + dccp_ackvec_state(av, av->av_buf_head) == state && + dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK) + av->av_buf[av->av_buf_head]++; + else if (dccp_ackvec_set_buf_head_state(av, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S <= buf_ackno + * arrives, the HC-Receiver will scan the table for + * the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno); + u32 index = av->av_buf_head; - lost_packets -= len; + while (1) { + const u8 len = dccp_ackvec_len(av, index); + const u8 av_state = dccp_ackvec_state(av, index); + /* + * valid packets not yet in av_buf have a reserved + * entry, with a len equal to 0. + */ + if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our + reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long)ackno); + av->av_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == DCCP_MAX_ACKVEC_LEN) + index = 0; } } - if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) { - DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n"); - av->av_overflow = true; - } - - av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets); - if (av->av_overflow) - av->av_buf_tail = av->av_buf_head; - - av->av_buf[av->av_buf_head] = state; - av->av_buf_ackno = seqno; + av->av_buf_ackno = ackno; + av->av_time = ktime_get_real(); +out: + return 0; - if (num_packets > 1) - dccp_ackvec_reserve_seats(av, num_packets - 1); +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long)ackno); + return -EILSEQ; } -/** - * dccp_ackvec_input - Register incoming packet in the buffer - */ -void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb) +static void dccp_ackvec_throw_record(struct dccp_ackvec *av, + struct dccp_ackvec_record *avr) { - u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq; - enum dccp_ackvec_states state = DCCPAV_RECEIVED; + struct dccp_ackvec_record *next; - if (dccp_ackvec_is_empty(av)) { - dccp_ackvec_add_new(av, 1, seqno, state); - av->av_tail_ackno = seqno; + /* sort out vector length */ + if (av->av_buf_head <= avr->avr_ack_ptr) + av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; + else + av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 - + av->av_buf_head + avr->avr_ack_ptr; - } else { - s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno); - u8 *current_head = av->av_buf + av->av_buf_head; - - if (num_packets == 1 && - dccp_ackvec_state(current_head) == state && - dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) { + /* free records */ + list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { + list_del_init(&avr->avr_node); + dccp_ackvec_record_delete(avr); + } +} - *current_head += 1; - av->av_buf_ackno = seqno; +void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, + const u64 ackno) +{ + struct dccp_ackvec_record *avr; - } else if (num_packets > 0) { - dccp_ackvec_add_new(av, num_packets, seqno, state); - } else { - dccp_ackvec_update_old(av, num_packets, seqno, state); - } + /* + * If we traverse backwards, it should be faster when we have large + * windows. We will be receiving ACKs for stuff we sent a while back + * -sorbo. + */ + list_for_each_entry_reverse(avr, &av->av_records, avr_node) { + if (ackno == avr->avr_ack_seqno) { + dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", + dccp_role(sk), 1, + (unsigned long long)avr->avr_ack_seqno, + (unsigned long long)avr->avr_ack_ackno); + dccp_ackvec_throw_record(av, avr); + break; + } else if (avr->avr_ack_seqno > ackno) + break; /* old news */ } } -/** - * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection - * This routine is called when the peer acknowledges the receipt of Ack Vectors - * up to and including @ackno. While based on on section A.3 of RFC 4340, here - * are additional precautions to prevent corrupted buffer state. In particular, - * we use tail_ackno to identify outdated records; it always marks the earliest - * packet of group (2) in 11.4.2. - */ -void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno) - { - struct dccp_ackvec_record *avr, *next; - u8 runlen_now, eff_runlen; - s64 delta; +static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, + struct sock *sk, u64 *ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + struct dccp_ackvec_record *avr; - avr = dccp_ackvec_lookup(&av->av_records, ackno); - if (avr == NULL) + /* Check if we actually sent an ACK vector */ + if (list_empty(&av->av_records)) return; - /* - * Deal with outdated acknowledgments: this arises when e.g. there are - * several old records and the acks from the peer come in slowly. In - * that case we may still have records that pre-date tail_ackno. - */ - delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno); - if (delta < 0) - goto free_records; - /* - * Deal with overlapping Ack Vectors: don't subtract more than the - * number of packets between tail_ackno and ack_ackno. - */ - eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen; - runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr); + i = len; /* - * The run length of Ack Vector cells does not decrease over time. If - * the run length is the same as at the time the Ack Vector was sent, we - * free the ack_ptr cell. That cell can however not be freed if the run - * length has increased: in this case we need to move the tail pointer - * backwards (towards higher indices), to its next-oldest neighbour. + * XXX + * I think it might be more efficient to work backwards. See comment on + * rcv_ackno. -sorbo. */ - if (runlen_now > eff_runlen) { + avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); + while (i--) { + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + u64 ackno_end_rl; - av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1; - av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1); + dccp_set_seqno(&ackno_end_rl, *ackno - rl); - /* This move may not have cleared the overflow flag. */ - if (av->av_overflow) - av->av_overflow = (av->av_buf_head == av->av_buf_tail); - } else { - av->av_buf_tail = avr->avr_ack_ptr; /* - * We have made sure that avr points to a valid cell within the - * buffer. This cell is either older than head, or equals head - * (empty buffer): in both cases we no longer have any overflow. + * If our AVR sequence number is greater than the ack, go + * forward in the AVR list until it is not so. */ - av->av_overflow = 0; - } - - /* - * The peer has acknowledged up to and including ack_ackno. Hence the - * first packet in group (2) of 11.4.2 is the successor of ack_ackno. - */ - av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1); + list_for_each_entry_from(avr, &av->av_records, avr_node) { + if (!after48(avr->avr_ack_seqno, *ackno)) + goto found; + } + /* End of the av_records list, not found, exit */ + break; +found: + if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) { + const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; + if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { + dccp_pr_debug("%s ACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", + dccp_role(sk), len, + (unsigned long long) + avr->avr_ack_seqno, + (unsigned long long) + avr->avr_ack_ackno); + dccp_ackvec_throw_record(av, avr); + break; + } + /* + * If it wasn't received, continue scanning... we might + * find another one. + */ + } -free_records: - list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { - list_del(&avr->avr_node); - kmem_cache_free(dccp_ackvec_record_slab, avr); + dccp_set_seqno(ackno, ackno_end_rl - 1); + ++vector; } } -/* - * Routines to keep track of Ack Vectors received in an skb - */ -int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce) +int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + u64 *ackno, const u8 opt, const u8 *value, const u8 len) { - struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC); - - if (new == NULL) - return -ENOBUFS; - new->vec = vec; - new->len = len; - new->nonce = nonce; + if (len > DCCP_MAX_ACKVEC_OPT_LEN) + return -1; - list_add_tail(&new->node, head); + /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ + dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, + ackno, len, value); return 0; } -EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add); - -void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks) -{ - struct dccp_ackvec_parsed *cur, *next; - - list_for_each_entry_safe(cur, next, parsed_chunks, node) - kfree(cur); - INIT_LIST_HEAD(parsed_chunks); -} -EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); int __init dccp_ackvec_init(void) { @@ -379,9 +449,10 @@ int __init dccp_ackvec_init(void) if (dccp_ackvec_slab == NULL) goto out_err; - dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", - sizeof(struct dccp_ackvec_record), - 0, SLAB_HWCACHE_ALIGN, NULL); + dccp_ackvec_record_slab = + kmem_cache_create("dccp_ackvec_record", + sizeof(struct dccp_ackvec_record), + 0, SLAB_HWCACHE_ALIGN, NULL); if (dccp_ackvec_record_slab == NULL) goto out_destroy_slab; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 6cdca79a99f..bcb64fb4ace 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -3,134 +3,156 @@ /* * net/dccp/ackvec.h * - * An implementation of Ack Vectors for the DCCP protocol - * Copyright (c) 2007 University of Aberdeen, Scotland, UK + * An implementation of the DCCP protocol * Copyright (c) 2005 Arnaldo Carvalho de Melo + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include #include +#include #include #include -/* - * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN, - * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1 - * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives - * more headroom if Ack Ratio is higher or when the sender acknowledges slowly. - * The maximum value is bounded by the u16 types for indices and functions. - */ -#define DCCPAV_NUM_ACKVECS 2 -#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS) - -/* Estimated minimum average Ack Vector length - used for updating MPS */ -#define DCCPAV_MIN_OPTLEN 16 - -/* Threshold for coping with large bursts of losses */ -#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8) - -enum dccp_ackvec_states { - DCCPAV_RECEIVED = 0x00, - DCCPAV_ECN_MARKED = 0x40, - DCCPAV_RESERVED = 0x80, - DCCPAV_NOT_RECEIVED = 0xC0 -}; -#define DCCPAV_MAX_RUNLEN 0x3F +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACKVEC_OPT_LEN 253 +/* We can spread an ack vector across multiple options */ +#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2) -static inline u8 dccp_ackvec_runlen(const u8 *cell) -{ - return *cell & DCCPAV_MAX_RUNLEN; -} +#define DCCP_ACKVEC_STATE_RECEIVED 0 +#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) -static inline u8 dccp_ackvec_state(const u8 *cell) -{ - return *cell & ~DCCPAV_MAX_RUNLEN; -} +#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ -/** struct dccp_ackvec - Ack Vector main data structure +/** struct dccp_ackvec - ack vector + * + * This data structure is the one defined in RFC 4340, Appendix A. * - * This implements a fixed-size circular buffer within an array and is largely - * based on Appendix A of RFC 4340. + * @av_buf_head - circular buffer head + * @av_buf_tail - circular buffer tail + * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %av_buf_head) + * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 * - * @av_buf: circular buffer storage area - * @av_buf_head: head index; begin of live portion in @av_buf - * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf - * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf - * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf - * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to - * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf - * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound - * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously) + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @av_records - list of dccp_ackvec_record + * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @av_time - the time in usecs + * @av_buf - circular buffer of acknowledgeable packets */ struct dccp_ackvec { - u8 av_buf[DCCPAV_MAX_ACKVEC_LEN]; - u16 av_buf_head; - u16 av_buf_tail; - u64 av_buf_ackno:48; - u64 av_tail_ackno:48; - bool av_buf_nonce[DCCPAV_NUM_ACKVECS]; - u8 av_overflow:1; + u64 av_buf_ackno; struct list_head av_records; + ktime_t av_time; + u16 av_buf_head; + u16 av_vec_len; + u8 av_buf_nonce; + u8 av_ack_nonce; + u8 av_buf[DCCP_MAX_ACKVEC_LEN]; }; -/** struct dccp_ackvec_record - Records information about sent Ack Vectors +/** struct dccp_ackvec_record - ack vector record * - * These list entries define the additional information which the HC-Receiver - * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A. + * ACK vector record as defined in Appendix A of spec. * - * @avr_node: the list node in @av_records - * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on - * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to - * @avr_ack_ptr: pointer into @av_buf where this record starts - * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending - * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent + * The list is sorted by avr_ack_seqno * - * The list as a whole is sorted in descending order by @avr_ack_seqno. + * @avr_node - node in av_records + * @avr_ack_seqno - sequence number of the packet this record was sent on + * @avr_ack_ackno - sequence number being acknowledged + * @avr_ack_ptr - pointer into av_buf where this record starts + * @avr_ack_nonce - av_ack_nonce at the time this record was sent + * @avr_sent_len - lenght of the record in av_buf */ struct dccp_ackvec_record { struct list_head avr_node; - u64 avr_ack_seqno:48; - u64 avr_ack_ackno:48; + u64 avr_ack_seqno; + u64 avr_ack_ackno; u16 avr_ack_ptr; - u8 avr_ack_runlen; - u8 avr_ack_nonce:1; + u16 avr_sent_len; + u8 avr_ack_nonce; }; -extern int dccp_ackvec_init(void); +struct sock; +struct sk_buff; + +#ifdef CONFIG_IP_DCCP_ACKVEC +extern int dccp_ackvec_init(void); extern void dccp_ackvec_exit(void); extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); extern void dccp_ackvec_free(struct dccp_ackvec *av); -extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); -extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); -extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); -extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); +extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state); + +extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno); +extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + u64 *ackno, const u8 opt, + const u8 *value, const u8 len); -static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) +extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return av->av_vec_len; +} +#else /* CONFIG_IP_DCCP_ACKVEC */ +static inline int dccp_ackvec_init(void) { - return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail; + return 0; } -/** - * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb - * @vec: start of vector (offset into skb) - * @len: length of @vec - * @nonce: whether @vec had an ECN nonce of 0 or 1 - * @node: FIFO - arranged in descending order of ack_ackno - * This structure is used by CCIDs to access Ack Vectors in a received skb. - */ -struct dccp_ackvec_parsed { - u8 *vec, - len, - nonce:1; - struct list_head node; -}; +static inline void dccp_ackvec_exit(void) +{ +} + +static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) +{ + return NULL; +} + +static inline void dccp_ackvec_free(struct dccp_ackvec *av) +{ +} + +static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, + const u64 ackno, const u8 state) +{ + return -1; +} -extern int dccp_ackvec_parsed_add(struct list_head *head, - u8 *vec, u8 len, u8 nonce); -extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); +static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, + struct sock *sk, const u64 ackno) +{ +} + +static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, + const u64 *ackno, const u8 opt, + const u8 *value, const u8 len) +{ + return -1; +} + +static inline int dccp_insert_option_ackvec(const struct sock *sk, + const struct sk_buff *skb) +{ + return -1; +} + +static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) +{ + return 0; +} +#endif /* CONFIG_IP_DCCP_ACKVEC */ #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index e3fb52b4f5c..4809753d12a 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -13,13 +13,6 @@ #include "ccid.h" -static u8 builtin_ccids[] = { - DCCPC_CCID2, /* CCID2 is supported by default */ -#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) - DCCPC_CCID3, -#endif -}; - static struct ccid_operations *ccids[CCID_MAX]; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t ccids_lockct = ATOMIC_INIT(0); @@ -93,47 +86,6 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } -/* check that up to @array_len members in @ccid_array are supported */ -bool ccid_support_check(u8 const *ccid_array, u8 array_len) -{ - u8 i, j, found; - - for (i = 0, found = 0; i < array_len; i++, found = 0) { - for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++) - found = (ccid_array[i] == builtin_ccids[j]); - if (!found) - return false; - } - return true; -} - -/** - * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array - * @ccid_array: pointer to copy into - * @array_len: value to return length into - * This function allocates memory - caller must see that it is freed after use. - */ -int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) -{ - *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any()); - if (*ccid_array == NULL) - return -ENOBUFS; - *array_len = ARRAY_SIZE(builtin_ccids); - return 0; -} - -int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - if (len < sizeof(builtin_ccids)) - return -EINVAL; - - if (put_user(sizeof(builtin_ccids), optlen) || - copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids))) - return -EFAULT; - return 0; -} - int ccid_register(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; @@ -196,41 +148,22 @@ int ccid_unregister(struct ccid_operations *ccid_ops) EXPORT_SYMBOL_GPL(ccid_unregister); -/** - * ccid_request_module - Pre-load CCID module for later use - * This should be called only from process context (e.g. during connection - * setup) and is necessary for later calls to ccid_new (typically in software - * interrupt), so that it has the modules available when they are needed. - */ -static int ccid_request_module(u8 id) -{ - if (!in_atomic()) { - ccids_read_lock(); - if (ccids[id] == NULL) { - ccids_read_unlock(); - return request_module("net-dccp-ccid-%d", id); - } - ccids_read_unlock(); - } - return 0; -} - -int ccid_request_modules(u8 const *ccid_array, u8 array_len) -{ -#ifdef CONFIG_KMOD - while (array_len--) - if (ccid_request_module(ccid_array[array_len])) - return -1; -#endif - return 0; -} - struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) { struct ccid_operations *ccid_ops; struct ccid *ccid = NULL; ccids_read_lock(); +#ifdef CONFIG_KMOD + if (ccids[id] == NULL) { + /* We only try to load if in process context */ + ccids_read_unlock(); + if (gfp & GFP_ATOMIC) + goto out; + request_module("net-dccp-ccid-%d", id); + ccids_read_lock(); + } +#endif ccid_ops = ccids[id]; if (ccid_ops == NULL) goto out_unlock; @@ -272,6 +205,20 @@ out_module_put: EXPORT_SYMBOL_GPL(ccid_new); +struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp) +{ + return ccid_new(id, sk, 1, gfp); +} + +EXPORT_SYMBOL_GPL(ccid_hc_rx_new); + +struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp) +{ + return ccid_new(id, sk, 0, gfp); +} + +EXPORT_SYMBOL_GPL(ccid_hc_tx_new); + static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx) { struct ccid_operations *ccid_ops; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index d27054ba215..fdeae7b5731 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -60,18 +60,22 @@ struct ccid_operations { void (*ccid_hc_tx_exit)(struct sock *sk); void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); - int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt, - u8 opt, u8 *val, u8 len); + int (*ccid_hc_rx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); int (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); - int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt, - u8 opt, u8 *val, u8 len); + int (*ccid_hc_tx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); int (*ccid_hc_tx_send_packet)(struct sock *sk, struct sk_buff *skb); void (*ccid_hc_tx_packet_sent)(struct sock *sk, - unsigned int len); + int more, unsigned int len); void (*ccid_hc_rx_get_info)(struct sock *sk, struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, @@ -99,78 +103,31 @@ static inline void *ccid_priv(const struct ccid *ccid) return (void *)ccid->ccid_priv; } -extern bool ccid_support_check(u8 const *ccid_array, u8 array_len); -extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); -extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *, int __user *); - -extern int ccid_request_modules(u8 const *ccid_array, u8 array_len); extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp); -static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) -{ - struct ccid *ccid = dp->dccps_hc_rx_ccid; - - if (ccid == NULL || ccid->ccid_ops == NULL) - return -1; - return ccid->ccid_ops->ccid_id; -} - -static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) -{ - struct ccid *ccid = dp->dccps_hc_tx_ccid; - - if (ccid == NULL || ccid->ccid_ops == NULL) - return -1; - return ccid->ccid_ops->ccid_id; -} +extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, + gfp_t gfp); +extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk, + gfp_t gfp); extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); -/* - * Congestion control of queued data packets via CCID decision. - * - * The TX CCID performs its congestion-control by indicating whether and when a - * queued packet may be sent, using the return code of ccid_hc_tx_send_packet(). - * The following modes are supported via the symbolic constants below: - * - timer-based pacing (CCID returns a delay value in milliseconds); - * - autonomous dequeueing (CCID internally schedules dccps_xmitlet). - */ - -enum ccid_dequeueing_decision { - CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */ - CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */ - CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */ - CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */ - CCID_PACKET_ERR = 0xF0000, /* error condition */ -}; - -static inline int ccid_packet_dequeue_eval(const int return_code) -{ - if (return_code < 0) - return CCID_PACKET_ERR; - if (return_code == 0) - return CCID_PACKET_SEND_AT_ONCE; - if (return_code <= CCID_PACKET_DELAY_MAX) - return CCID_PACKET_DELAY; - return return_code; -} - static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { + int rc = 0; if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) - return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); - return CCID_PACKET_SEND_AT_ONCE; + rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); + return rc; } static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, - unsigned int len) + int more, unsigned int len) { if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) - ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len); + ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); } static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, @@ -187,31 +144,27 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); } -/** - * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver - * @pkt: type of packet that @opt appears on (RFC 4340, 5.1) - * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3) - * @val: value of @opt - * @len: length of @val in bytes - */ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, - u8 pkt, u8 opt, u8 *val, u8 len) + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) - return 0; - return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); + int rc = 0; + if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) + rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, + value); + return rc; } -/** - * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender - * Arguments are analogous to ccid_hc_tx_parse_options() - */ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, - u8 pkt, u8 opt, u8 *val, u8 len) + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) - return 0; - return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); + int rc = 0; + if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) + rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); + return rc; } static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index fb168be2cb4..12275943eab 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,8 +1,10 @@ menu "DCCP CCIDs Configuration (EXPERIMENTAL)" + depends on EXPERIMENTAL config IP_DCCP_CCID2 - tristate "CCID2 (TCP-Like)" + tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" def_tristate IP_DCCP + select IP_DCCP_ACKVEC ---help--- CCID 2, TCP-like Congestion Control, denotes Additive Increase, Multiplicative Decrease (AIMD) congestion control with behavior @@ -34,7 +36,7 @@ config IP_DCCP_CCID2_DEBUG If in doubt, say N. config IP_DCCP_CCID3 - tristate "CCID3 (TCP-Friendly)" + tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" def_tristate IP_DCCP select IP_DCCP_TFRC_LIB ---help--- @@ -62,9 +64,9 @@ config IP_DCCP_CCID3 If in doubt, say M. -if IP_DCCP_CCID3 config IP_DCCP_CCID3_DEBUG bool "CCID3 debugging messages" + depends on IP_DCCP_CCID3 ---help--- Enable CCID3-specific debugging messages. @@ -74,29 +76,10 @@ config IP_DCCP_CCID3_DEBUG If in doubt, say N. -choice - prompt "Select method for measuring the packet size s" - default IP_DCCP_CCID3_MEASURE_S_AS_MPS - -config IP_DCCP_CCID3_MEASURE_S_AS_MPS - bool "Always use MPS in place of s" - ---help--- - This use is recommended as it is consistent with the initialisation - of X and suggested when s varies (rfc3448bis, (1) in section 4.1). -config IP_DCCP_CCID3_MEASURE_S_AS_AVG - bool "Use moving average" - ---help--- - An alternative way of tracking s, also supported by rfc3448bis. - This used to be the default for CCID-3 in previous kernels. -config IP_DCCP_CCID3_MEASURE_S_AS_MAX - bool "Track the maximum payload length" - ---help--- - An experimental method based on tracking the maximum packet size. -endchoice - config IP_DCCP_CCID3_RTO int "Use higher bound for nofeedback timer" default 100 + depends on IP_DCCP_CCID3 && EXPERIMENTAL ---help--- Use higher lower bound for nofeedback timer expiration. @@ -123,7 +106,6 @@ config IP_DCCP_CCID3_RTO The purpose of the nofeedback timer is to slow DCCP down when there is serious network congestion: experimenting with larger values should therefore not be performed on WANs. -endif # IP_DCCP_CCID3 config IP_DCCP_TFRC_LIB tristate diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index fa713227c66..9a430734530 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -25,7 +25,7 @@ /* * This implementation should follow RFC 4341 */ -#include "../feat.h" + #include "../ccid.h" #include "../dccp.h" #include "ccid2.h" @@ -34,8 +34,51 @@ #ifdef CONFIG_IP_DCCP_CCID2_DEBUG static int ccid2_debug; #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) + +static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) +{ + int len = 0; + int pipe = 0; + struct ccid2_seq *seqp = hctx->ccid2hctx_seqh; + + /* there is data in the chain */ + if (seqp != hctx->ccid2hctx_seqt) { + seqp = seqp->ccid2s_prev; + len++; + if (!seqp->ccid2s_acked) + pipe++; + + while (seqp != hctx->ccid2hctx_seqt) { + struct ccid2_seq *prev = seqp->ccid2s_prev; + + len++; + if (!prev->ccid2s_acked) + pipe++; + + /* packets are sent sequentially */ + BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq, + prev->ccid2s_seq ) >= 0); + BUG_ON(time_before(seqp->ccid2s_sent, + prev->ccid2s_sent)); + + seqp = prev; + } + } + + BUG_ON(pipe != hctx->ccid2hctx_pipe); + ccid2_pr_debug("len of chain=%d\n", len); + + do { + seqp = seqp->ccid2s_prev; + len++; + } while (seqp != hctx->ccid2hctx_seqh); + + ccid2_pr_debug("total len=%d\n", len); + BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); +} #else #define ccid2_pr_debug(format, a...) +#define ccid2_hc_tx_check_sanity(hctx) #endif static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) @@ -44,7 +87,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) int i; /* check if we have space to preserve the pointer to the buffer */ - if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *)) + if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / + sizeof(struct ccid2_seq*))) return -ENOMEM; /* allocate buffer and initialize linked list */ @@ -60,35 +104,38 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; /* This is the first allocation. Initiate the head and tail. */ - if (hctx->seqbufc == 0) - hctx->seqh = hctx->seqt = seqp; + if (hctx->ccid2hctx_seqbufc == 0) + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; else { /* link the existing list with the one we just created */ - hctx->seqh->ccid2s_next = seqp; - seqp->ccid2s_prev = hctx->seqh; + hctx->ccid2hctx_seqh->ccid2s_next = seqp; + seqp->ccid2s_prev = hctx->ccid2hctx_seqh; - hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; - seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt; + hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; } /* store the original pointer to the buffer so we can free it */ - hctx->seqbuf[hctx->seqbufc] = seqp; - hctx->seqbufc++; + hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; + hctx->ccid2hctx_seqbufc++; return 0; } static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { - if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk))) - return CCID_PACKET_WILL_DEQUEUE_LATER; - return CCID_PACKET_SEND_AT_ONCE; + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) + return 0; + + return 1; /* XXX CCID should dequeue when ready instead of polling */ } static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { struct dccp_sock *dp = dccp_sk(sk); - u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2); + u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2); /* * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from @@ -100,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + if (val > 0xFFFF) /* RFC 4340, 11.3 */ + val = 0xFFFF; if (val == dp->dccps_l_ack_ratio) return; @@ -110,77 +157,99 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) dp->dccps_l_ack_ratio = val; } +static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) +{ + ccid2_pr_debug("change SRTT to %ld\n", val); + hctx->ccid2hctx_srtt = val; +} + +static void ccid2_start_rto_timer(struct sock *sk); + static void ccid2_hc_tx_rto_expire(unsigned long data) { struct sock *sk = (struct sock *)data; struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); + long s; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5); + sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, + jiffies + HZ / 5); goto out; } ccid2_pr_debug("RTO_EXPIRE\n"); + ccid2_hc_tx_check_sanity(hctx); + /* back-off timer */ - hctx->rto <<= 1; - if (hctx->rto > DCCP_RTO_MAX) - hctx->rto = DCCP_RTO_MAX; + hctx->ccid2hctx_rto <<= 1; + + s = hctx->ccid2hctx_rto / HZ; + if (s > 60) + hctx->ccid2hctx_rto = 60 * HZ; + + ccid2_start_rto_timer(sk); /* adjust pipe, cwnd etc */ - hctx->ssthresh = hctx->cwnd / 2; - if (hctx->ssthresh < 2) - hctx->ssthresh = 2; - hctx->cwnd = 1; - hctx->pipe = 0; + hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2; + if (hctx->ccid2hctx_ssthresh < 2) + hctx->ccid2hctx_ssthresh = 2; + hctx->ccid2hctx_cwnd = 1; + hctx->ccid2hctx_pipe = 0; /* clear state about stuff we sent */ - hctx->seqt = hctx->seqh; - hctx->packets_acked = 0; + hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; + hctx->ccid2hctx_packets_acked = 0; /* clear ack ratio state. */ - hctx->rpseq = 0; - hctx->rpdupack = -1; + hctx->ccid2hctx_rpseq = 0; + hctx->ccid2hctx_rpdupack = -1; ccid2_change_l_ack_ratio(sk, 1); - - /* if we were blocked before, we may now send cwnd=1 packet */ - if (sender_was_blocked) - tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); - /* restart backed-off timer */ - sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); + ccid2_hc_tx_check_sanity(hctx); out: bh_unlock_sock(sk); sock_put(sk); } -static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) +static void ccid2_start_rto_timer(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto); + + BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer)); + sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, + jiffies + hctx->ccid2hctx_rto); +} + +static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); struct ccid2_seq *next; - hctx->pipe++; + hctx->ccid2hctx_pipe++; - hctx->seqh->ccid2s_seq = dp->dccps_gss; - hctx->seqh->ccid2s_acked = 0; - hctx->seqh->ccid2s_sent = jiffies; + hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss; + hctx->ccid2hctx_seqh->ccid2s_acked = 0; + hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; - next = hctx->seqh->ccid2s_next; + next = hctx->ccid2hctx_seqh->ccid2s_next; /* check if we need to alloc more space */ - if (next == hctx->seqt) { + if (next == hctx->ccid2hctx_seqt) { if (ccid2_hc_tx_alloc_seq(hctx)) { DCCP_CRIT("packet history - out of memory!"); /* FIXME: find a more graceful way to bail out */ return; } - next = hctx->seqh->ccid2s_next; - BUG_ON(next == hctx->seqt); + next = hctx->ccid2hctx_seqh->ccid2s_next; + BUG_ON(next == hctx->ccid2hctx_seqt); } - hctx->seqh = next; + hctx->ccid2hctx_seqh = next; - ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe); + ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, + hctx->ccid2hctx_pipe); /* * FIXME: The code below is broken and the variables have been removed @@ -203,12 +272,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) */ #if 0 /* Ack Ratio. Need to maintain a concept of how many windows we sent */ - hctx->arsent++; + hctx->ccid2hctx_arsent++; /* We had an ack loss in this window... */ - if (hctx->ackloss) { - if (hctx->arsent >= hctx->cwnd) { - hctx->arsent = 0; - hctx->ackloss = 0; + if (hctx->ccid2hctx_ackloss) { + if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) { + hctx->ccid2hctx_arsent = 0; + hctx->ccid2hctx_ackloss = 0; } } else { /* No acks lost up to now... */ @@ -218,28 +287,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - dp->dccps_l_ack_ratio; - denom = hctx->cwnd * hctx->cwnd / denom; + denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom; - if (hctx->arsent >= denom) { + if (hctx->ccid2hctx_arsent >= denom) { ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); - hctx->arsent = 0; + hctx->ccid2hctx_arsent = 0; } } else { /* we can't increase ack ratio further [1] */ - hctx->arsent = 0; /* or maybe set it to cwnd*/ + hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ } } #endif /* setup RTO timer */ - if (!timer_pending(&hctx->rtotimer)) - sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); + if (!timer_pending(&hctx->ccid2hctx_rtotimer)) + ccid2_start_rto_timer(sk); #ifdef CONFIG_IP_DCCP_CCID2_DEBUG do { - struct ccid2_seq *seqp = hctx->seqt; + struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; - while (seqp != hctx->seqh) { + while (seqp != hctx->ccid2hctx_seqh) { ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", (unsigned long long)seqp->ccid2s_seq, seqp->ccid2s_acked, seqp->ccid2s_sent); @@ -247,158 +316,205 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) } } while (0); ccid2_pr_debug("=========\n"); + ccid2_hc_tx_check_sanity(hctx); #endif } -/** - * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm - * This code is almost identical with TCP's tcp_rtt_estimator(), since - * - it has a higher sampling frequency (recommended by RFC 1323), - * - the RTO does not collapse into RTT due to RTTVAR going towards zero, - * - it is simple (cf. more complex proposals such as Eifel timer or research - * which suggests that the gain should be set according to window size), - * - in tests it was found to work well with CCID2 [gerrit]. +/* XXX Lame code duplication! + * returns -1 if none was found. + * else returns the next offset to use in the function call. */ -static void ccid2_rtt_estimator(struct sock *sk, const long mrtt) +static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, + unsigned char **vec, unsigned char *veclen) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - long m = mrtt ? : 1; - - if (hctx->srtt == 0) { - /* First measurement m */ - hctx->srtt = m << 3; - hctx->mdev = m << 1; - - hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev); - hctx->rttvar = hctx->mdev_max; - hctx->rtt_seq = dccp_sk(sk)->dccps_gss; - } else { - /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */ - m -= (hctx->srtt >> 3); - hctx->srtt += m; - - /* Similarly, update scaled mdev with regard to |m| */ - if (m < 0) { - m = -m; - m -= (hctx->mdev >> 2); + const struct dccp_hdr *dh = dccp_hdr(skb); + unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); + unsigned char *opt_ptr; + const unsigned char *opt_end = (unsigned char *)dh + + (dh->dccph_doff * 4); + unsigned char opt, len; + unsigned char *value; + + BUG_ON(offset < 0); + options += offset; + opt_ptr = options; + if (opt_ptr >= opt_end) + return -1; + + while (opt_ptr != opt_end) { + opt = *opt_ptr++; + len = 0; + value = NULL; + + /* Check if this isn't a single byte option */ + if (opt > DCCPO_MAX_RESERVED) { + if (opt_ptr == opt_end) + goto out_invalid_option; + + len = *opt_ptr++; + if (len < 3) + goto out_invalid_option; /* - * This neutralises RTO increase when RTT < SRTT - mdev - * (see P. Sarolahti, A. Kuznetsov,"Congestion Control - * in Linux TCP", USENIX 2002, pp. 49-62). + * Remove the type and len fields, leaving + * just the value size */ - if (m > 0) - m >>= 3; - } else { - m -= (hctx->mdev >> 2); - } - hctx->mdev += m; + len -= 2; + value = opt_ptr; + opt_ptr += len; - if (hctx->mdev > hctx->mdev_max) { - hctx->mdev_max = hctx->mdev; - if (hctx->mdev_max > hctx->rttvar) - hctx->rttvar = hctx->mdev_max; + if (opt_ptr > opt_end) + goto out_invalid_option; } - /* - * Decay RTTVAR at most once per flight, exploiting that - * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2) - * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1) - * GAR is a useful bound for FlightSize = pipe, AWL is probably - * too low as it over-estimates pipe. - */ - if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) { - if (hctx->mdev_max < hctx->rttvar) - hctx->rttvar -= (hctx->rttvar - - hctx->mdev_max) >> 2; - hctx->rtt_seq = dccp_sk(sk)->dccps_gss; - hctx->mdev_max = TCP_RTO_MIN; + switch (opt) { + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + *vec = value; + *veclen = len; + return offset + (opt_ptr - options); } } - /* - * Set RTO from SRTT and RTTVAR - * Clock granularity is ignored since the minimum error for RTTVAR is - * clamped to 50msec (corresponding to HZ=20). This leads to a minimum - * RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP - * does not retransmit data, DCCP does not require TCP's recommended - * minimum timeout of one second". - */ - hctx->rto = (hctx->srtt >> 3) + hctx->rttvar; + return -1; - if (hctx->rto > DCCP_RTO_MAX) - hctx->rto = DCCP_RTO_MAX; +out_invalid_option: + DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); + return -1; } -static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, - unsigned int *maxincr) +static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (hctx->cwnd < hctx->ssthresh) { - if (*maxincr > 0 && ++hctx->packets_acked == 2) { - hctx->cwnd += 1; - *maxincr -= 1; - hctx->packets_acked = 0; - } - } else if (++hctx->packets_acked >= hctx->cwnd) { - hctx->cwnd += 1; - hctx->packets_acked = 0; - } - /* - * FIXME: RTT is sampled several times per acknowledgment (for each - * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). - * This causes the RTT to be over-estimated, since the older entries - * in the Ack Vector have earlier sending times. - * The cleanest solution is to not use the ccid2s_sent field at all - * and instead use DCCP timestamps - need to be resolved at some time. - */ - ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent); + sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer); + ccid2_pr_debug("deleted RTO timer\n"); } -static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) +static inline void ccid2_new_ack(struct sock *sk, + struct ccid2_seq *seqp, + unsigned int *maxincr) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (time_before(seqp->ccid2s_sent, hctx->last_cong)) { - ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); - return; + if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { + if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) { + hctx->ccid2hctx_cwnd += 1; + *maxincr -= 1; + hctx->ccid2hctx_packets_acked = 0; + } + } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) { + hctx->ccid2hctx_cwnd += 1; + hctx->ccid2hctx_packets_acked = 0; } - hctx->last_cong = jiffies; + /* update RTO */ + if (hctx->ccid2hctx_srtt == -1 || + time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { + unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; + int s; + + /* first measurement */ + if (hctx->ccid2hctx_srtt == -1) { + ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", + r, jiffies, + (unsigned long long)seqp->ccid2s_seq); + ccid2_change_srtt(hctx, r); + hctx->ccid2hctx_rttvar = r >> 1; + } else { + /* RTTVAR */ + long tmp = hctx->ccid2hctx_srtt - r; + long srtt; + + if (tmp < 0) + tmp *= -1; + + tmp >>= 2; + hctx->ccid2hctx_rttvar *= 3; + hctx->ccid2hctx_rttvar >>= 2; + hctx->ccid2hctx_rttvar += tmp; + + /* SRTT */ + srtt = hctx->ccid2hctx_srtt; + srtt *= 7; + srtt >>= 3; + tmp = r >> 3; + srtt += tmp; + ccid2_change_srtt(hctx, srtt); + } + s = hctx->ccid2hctx_rttvar << 2; + /* clock granularity is 1 when based on jiffies */ + if (!s) + s = 1; + hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s; + + /* must be at least a second */ + s = hctx->ccid2hctx_rto / HZ; + /* DCCP doesn't require this [but I like it cuz my code sux] */ +#if 1 + if (s < 1) + hctx->ccid2hctx_rto = HZ; +#endif + /* max 60 seconds */ + if (s > 60) + hctx->ccid2hctx_rto = HZ * 60; - hctx->cwnd = hctx->cwnd / 2 ? : 1U; - hctx->ssthresh = max(hctx->cwnd, 2U); + hctx->ccid2hctx_lastrtt = jiffies; - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd) - ccid2_change_l_ack_ratio(sk, hctx->cwnd); + ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", + hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, + hctx->ccid2hctx_rto, HZ, r); + } + + /* we got a new ack, so re-start RTO timer */ + ccid2_hc_tx_kill_rto_timer(sk); + ccid2_start_rto_timer(sk); } -static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, - u8 option, u8 *optval, u8 optlen) +static void ccid2_hc_tx_dec_pipe(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - switch (option) { - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen, - option - DCCPO_ACK_VECTOR_0); + if (hctx->ccid2hctx_pipe == 0) + DCCP_BUG("pipe == 0"); + else + hctx->ccid2hctx_pipe--; + + if (hctx->ccid2hctx_pipe == 0) + ccid2_hc_tx_kill_rto_timer(sk); +} + +static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) +{ + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + + if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { + ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); + return; } - return 0; + + hctx->ccid2hctx_last_cong = jiffies; + + hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U; + hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U); + + /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ + if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd) + ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd); } static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx); - struct dccp_ackvec_parsed *avp; u64 ackno, seqno; struct ccid2_seq *seqp; + unsigned char *vector; + unsigned char veclen; + int offset = 0; int done = 0; unsigned int maxincr = 0; + ccid2_hc_tx_check_sanity(hctx); /* check reverse path congestion */ seqno = DCCP_SKB_CB(skb)->dccpd_seq; @@ -407,21 +523,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * -sorbo. */ /* need to bootstrap */ - if (hctx->rpdupack == -1) { - hctx->rpdupack = 0; - hctx->rpseq = seqno; + if (hctx->ccid2hctx_rpdupack == -1) { + hctx->ccid2hctx_rpdupack = 0; + hctx->ccid2hctx_rpseq = seqno; } else { /* check if packet is consecutive */ - if (dccp_delta_seqno(hctx->rpseq, seqno) == 1) - hctx->rpseq = seqno; + if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) + hctx->ccid2hctx_rpseq = seqno; /* it's a later packet */ - else if (after48(seqno, hctx->rpseq)) { - hctx->rpdupack++; + else if (after48(seqno, hctx->ccid2hctx_rpseq)) { + hctx->ccid2hctx_rpdupack++; /* check if we got enough dupacks */ - if (hctx->rpdupack >= NUMDUPACK) { - hctx->rpdupack = -1; /* XXX lame */ - hctx->rpseq = 0; + if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) { + hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ + hctx->ccid2hctx_rpseq = 0; ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); } @@ -429,22 +545,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* check forward path congestion */ - if (dccp_packet_without_ack(skb)) + /* still didn't send out new data packets */ + if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) return; - /* still didn't send out new data packets */ - if (hctx->seqh == hctx->seqt) - goto done; + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case DCCP_PKT_ACK: + case DCCP_PKT_DATAACK: + break; + default: + return; + } ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - if (after48(ackno, hctx->high_ack)) - hctx->high_ack = ackno; + if (after48(ackno, hctx->ccid2hctx_high_ack)) + hctx->ccid2hctx_high_ack = ackno; - seqp = hctx->seqt; + seqp = hctx->ccid2hctx_seqt; while (before48(seqp->ccid2s_seq, ackno)) { seqp = seqp->ccid2s_next; - if (seqp == hctx->seqh) { - seqp = hctx->seqh->ccid2s_prev; + if (seqp == hctx->ccid2hctx_seqh) { + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; break; } } @@ -454,26 +575,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * packets per acknowledgement. Rounding up avoids that cwnd is not * advanced when Ack Ratio is 1 and gives a slight edge otherwise. */ - if (hctx->cwnd < hctx->ssthresh) + if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ - list_for_each_entry(avp, &hctx->av_chunks, node) { + while ((offset = ccid2_ackvector(sk, skb, offset, + &vector, &veclen)) != -1) { /* go through this ack vector */ - for (; avp->len--; avp->vec++) { - u64 ackno_end_rl = SUB48(ackno, - dccp_ackvec_runlen(avp->vec)); + while (veclen--) { + const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; + u64 ackno_end_rl = SUB48(ackno, rl); - ccid2_pr_debug("ackvec %llu |%u,%u|\n", + ccid2_pr_debug("ackvec start:%llu end:%llu\n", (unsigned long long)ackno, - dccp_ackvec_state(avp->vec) >> 6, - dccp_ackvec_runlen(avp->vec)); + (unsigned long long)ackno_end_rl); /* if the seqno we are analyzing is larger than the * current ackno, then move towards the tail of our * seqnos. */ while (after48(seqp->ccid2s_seq, ackno)) { - if (seqp == hctx->seqt) { + if (seqp == hctx->ccid2hctx_seqt) { done = 1; break; } @@ -486,24 +607,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = dccp_ackvec_state(avp->vec); + const u8 state = *vector & + DCCP_ACKVEC_STATE_MASK; /* new packet received or marked */ - if (state != DCCPAV_NOT_RECEIVED && + if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == DCCPAV_ECN_MARKED) + if (state == + DCCP_ACKVEC_STATE_ECN_MARKED) { ccid2_congestion_event(sk, seqp); - else + } else ccid2_new_ack(sk, seqp, &maxincr); seqp->ccid2s_acked = 1; ccid2_pr_debug("Got ack for %llu\n", (unsigned long long)seqp->ccid2s_seq); - hctx->pipe--; + ccid2_hc_tx_dec_pipe(sk); } - if (seqp == hctx->seqt) { + if (seqp == hctx->ccid2hctx_seqt) { done = 1; break; } @@ -513,6 +636,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) break; ackno = SUB48(ackno_end_rl, 1); + vector++; } if (done) break; @@ -521,11 +645,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* The state about what is acked should be correct now * Check for NUMDUPACK */ - seqp = hctx->seqt; - while (before48(seqp->ccid2s_seq, hctx->high_ack)) { + seqp = hctx->ccid2hctx_seqt; + while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) { seqp = seqp->ccid2s_next; - if (seqp == hctx->seqh) { - seqp = hctx->seqh->ccid2s_prev; + if (seqp == hctx->ccid2hctx_seqh) { + seqp = hctx->ccid2hctx_seqh->ccid2s_prev; break; } } @@ -536,7 +660,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (done == NUMDUPACK) break; } - if (seqp == hctx->seqt) + if (seqp == hctx->ccid2hctx_seqt) break; seqp = seqp->ccid2s_prev; } @@ -557,34 +681,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * one ack vector. */ ccid2_congestion_event(sk, seqp); - hctx->pipe--; + ccid2_hc_tx_dec_pipe(sk); } - if (seqp == hctx->seqt) + if (seqp == hctx->ccid2hctx_seqt) break; seqp = seqp->ccid2s_prev; } - hctx->seqt = last_acked; + hctx->ccid2hctx_seqt = last_acked; } /* trim acked packets in tail */ - while (hctx->seqt != hctx->seqh) { - if (!hctx->seqt->ccid2s_acked) + while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) { + if (!hctx->ccid2hctx_seqt->ccid2s_acked) break; - hctx->seqt = hctx->seqt->ccid2s_next; + hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; } - /* restart RTO timer if not all outstanding data has been acked */ - if (hctx->pipe == 0) - sk_stop_timer(sk, &hctx->rtotimer); - else - sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto); -done: - /* check if incoming Acks allow pending packets to be sent */ - if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx)) - tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); - dccp_ackvec_parsed_cleanup(&hctx->av_chunks); + ccid2_hc_tx_check_sanity(hctx); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) @@ -594,13 +709,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) u32 max_ratio; /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ - hctx->ssthresh = ~0U; + hctx->ccid2hctx_ssthresh = ~0U; - /* Use larger initial windows (RFC 3390, rfc2581bis) */ - hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); + /* + * RFC 4341, 5: "The cwnd parameter is initialized to at most four + * packets for new connections, following the rules from [RFC3390]". + * We need to convert the bytes of RFC3390 into the packets of RFC 4341. + */ + hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); /* Make sure that Ack Ratio is enabled and within bounds. */ - max_ratio = DIV_ROUND_UP(hctx->cwnd, 2); + max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2); if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) dp->dccps_l_ack_ratio = max_ratio; @@ -608,11 +727,15 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) if (ccid2_hc_tx_alloc_seq(hctx)) return -ENOMEM; - hctx->rto = DCCP_TIMEOUT_INIT; - hctx->rpdupack = -1; - hctx->last_cong = jiffies; - setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); - INIT_LIST_HEAD(&hctx->av_chunks); + hctx->ccid2hctx_rto = 3 * HZ; + ccid2_change_srtt(hctx, -1); + hctx->ccid2hctx_rttvar = -1; + hctx->ccid2hctx_rpdupack = -1; + hctx->ccid2hctx_last_cong = jiffies; + setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire, + (unsigned long)sk); + + ccid2_hc_tx_check_sanity(hctx); return 0; } @@ -621,11 +744,11 @@ static void ccid2_hc_tx_exit(struct sock *sk) struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); int i; - sk_stop_timer(sk, &hctx->rtotimer); + ccid2_hc_tx_kill_rto_timer(sk); - for (i = 0; i < hctx->seqbufc; i++) - kfree(hctx->seqbuf[i]); - hctx->seqbufc = 0; + for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) + kfree(hctx->ccid2hctx_seqbuf[i]); + hctx->ccid2hctx_seqbufc = 0; } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -636,28 +759,27 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_DATA: case DCCP_PKT_DATAACK: - hcrx->data++; - if (hcrx->data >= dp->dccps_r_ack_ratio) { + hcrx->ccid2hcrx_data++; + if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) { dccp_send_ack(sk); - hcrx->data = 0; + hcrx->ccid2hcrx_data = 0; } break; } } static struct ccid_operations ccid2 = { - .ccid_id = DCCPC_CCID2, - .ccid_name = "TCP-like", - .ccid_owner = THIS_MODULE, - .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), - .ccid_hc_tx_init = ccid2_hc_tx_init, - .ccid_hc_tx_exit = ccid2_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, - .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, - .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, - .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), - .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, + .ccid_id = DCCPC_CCID2, + .ccid_name = "TCP-like", + .ccid_owner = THIS_MODULE, + .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), + .ccid_hc_tx_init = ccid2_hc_tx_init, + .ccid_hc_tx_exit = ccid2_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, + .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, + .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), + .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 8b7a2dee2f6..2c94ca02901 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -42,49 +42,34 @@ struct ccid2_seq { /** struct ccid2_hc_tx_sock - CCID2 TX half connection * - * @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 - * @packets_acked: Ack counter for deriving cwnd growth (RFC 3465) - * @srtt: smoothed RTT estimate, scaled by 2^3 - * @mdev: smoothed RTT variation, scaled by 2^2 - * @mdev_max: maximum of @mdev during one flight - * @rttvar: moving average/maximum of @mdev_max - * @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) - * @rtt_seq: to decay RTTVAR at most once per flight - * @rpseq: last consecutive seqno - * @rpdupack: dupacks since rpseq - * @av_chunks: list of Ack Vectors received on current skb - */ + * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 + * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465) + * @ccid2hctx_lastrtt -time RTT was last measured + * @ccid2hctx_rpseq - last consecutive seqno + * @ccid2hctx_rpdupack - dupacks since rpseq +*/ struct ccid2_hc_tx_sock { - u32 cwnd; - u32 ssthresh; - u32 pipe; - u32 packets_acked; - struct ccid2_seq *seqbuf[CCID2_SEQBUF_MAX]; - int seqbufc; - struct ccid2_seq *seqh; - struct ccid2_seq *seqt; - /* RTT measurement: variables/principles are the same as in TCP */ - u32 srtt, - mdev, - mdev_max, - rttvar, - rto; - u64 rtt_seq:48; - struct timer_list rtotimer; - u64 rpseq; - int rpdupack; - unsigned long last_cong; - u64 high_ack; - struct list_head av_chunks; + u32 ccid2hctx_cwnd; + u32 ccid2hctx_ssthresh; + u32 ccid2hctx_pipe; + u32 ccid2hctx_packets_acked; + struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; + int ccid2hctx_seqbufc; + struct ccid2_seq *ccid2hctx_seqh; + struct ccid2_seq *ccid2hctx_seqt; + long ccid2hctx_rto; + long ccid2hctx_srtt; + long ccid2hctx_rttvar; + unsigned long ccid2hctx_lastrtt; + struct timer_list ccid2hctx_rtotimer; + u64 ccid2hctx_rpseq; + int ccid2hctx_rpdupack; + unsigned long ccid2hctx_last_cong; + u64 ccid2hctx_high_ack; }; -static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx) -{ - return (hctx->pipe >= hctx->cwnd); -} - struct ccid2_hc_rx_sock { - int data; + int ccid2hcrx_data; }; static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 06cfdad84a6..3b8bd7ca676 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -49,41 +49,75 @@ static int ccid3_debug; /* * Transmitter Half-Connection Routines */ -/* Oscillation Prevention/Reduction: recommended by rfc3448bis, on by default */ -static int do_osc_prev = true; +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG +static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) +{ + static char *ccid3_state_names[] = { + [TFRC_SSTATE_NO_SENT] = "NO_SENT", + [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", + [TFRC_SSTATE_FBACK] = "FBACK", + [TFRC_SSTATE_TERM] = "TERM", + }; + + return ccid3_state_names[state]; +} +#endif + +static void ccid3_hc_tx_set_state(struct sock *sk, + enum ccid3_hc_tx_states state) +{ + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), + ccid3_tx_state_name(state)); + WARN_ON(state == oldstate); + hctx->ccid3hctx_state = state; +} /* * Compute the initial sending rate X_init in the manner of RFC 3390: * - * X_init = min(4 * MPS, max(2 * MPS, 4380 bytes)) / RTT + * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT * + * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis + * (rev-02) clarifies the use of RFC 3390 with regard to the above formula. * For consistency with other parts of the code, X_init is scaled by 2^6. */ static inline u64 rfc3390_initial_rate(struct sock *sk) { - const u32 mps = dccp_sk(sk)->dccps_mss_cache, - w_init = clamp(4380U, 2 * mps, 4 * mps); + const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + const __u32 w_init = clamp_t(__u32, 4380U, + 2 * hctx->ccid3hctx_s, 4 * hctx->ccid3hctx_s); - return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->rtt); + return scaled_div(w_init << 6, hctx->ccid3hctx_rtt); } -/** - * ccid3_update_send_interval - Calculate new t_ipi = s / X - * This respects the granularity of X (64 * bytes/second) and enforces the - * scaled minimum of s * 64 / t_mbi = `s' bytes/second as per RFC 3448/4342. +/* + * Recalculate t_ipi and delta (should be called whenever X changes) */ static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) { - if (unlikely(hctx->x <= hctx->s)) - hctx->x = hctx->s; - hctx->t_ipi = scaled_div32(((u64)hctx->s) << 6, hctx->x); + /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ + hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_x); + + /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, + TFRC_OPSYS_HALF_TIME_GRAN); + + ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", + hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta, + hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); + } static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) { - u32 delta = ktime_us_delta(now, hctx->t_last_win_count); + u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); - return delta / hctx->rtt; + return delta / hctx->ccid3hctx_rtt; } /** @@ -99,8 +133,8 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - u64 min_rate = 2 * hctx->x_recv; - const u64 old_x = hctx->x; + __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; + const __u64 old_x = hctx->ccid3hctx_x; ktime_t now = stamp ? *stamp : ktime_get_real(); /* @@ -111,44 +145,50 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) */ if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) { min_rate = rfc3390_initial_rate(sk); - min_rate = max(min_rate, 2 * hctx->x_recv); + min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); } - if (hctx->p > 0) { + if (hctx->ccid3hctx_p > 0) { - hctx->x = min(((u64)hctx->x_calc) << 6, min_rate); + hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, + min_rate); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); - } else if (ktime_us_delta(now, hctx->t_ld) - (s64)hctx->rtt >= 0) { + } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) + - (s64)hctx->ccid3hctx_rtt >= 0) { - hctx->x = min(2 * hctx->x, min_rate); - hctx->x = max(hctx->x, - scaled_div(((u64)hctx->s) << 6, hctx->rtt)); - hctx->t_ld = now; + hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt)); + hctx->ccid3hctx_t_ld = now; } - if (hctx->x != old_x) { + if (hctx->ccid3hctx_x != old_x) { ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " "X_recv=%u\n", (unsigned)(old_x >> 6), - (unsigned)(hctx->x >> 6), hctx->x_calc, - (unsigned)(hctx->x_recv >> 6)); + (unsigned)(hctx->ccid3hctx_x >> 6), + hctx->ccid3hctx_x_calc, + (unsigned)(hctx->ccid3hctx_x_recv >> 6)); ccid3_update_send_interval(hctx); } } /* - * ccid3_hc_tx_measure_packet_size - Measuring the packet size `s' (sec 4.1) - * @new_len: DCCP payload size in bytes (not used by all methods) + * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) + * @len: DCCP packet payload size in bytes */ -static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len) +static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) { -#if defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_AVG) - return tfrc_ewma(ccid3_hc_tx_sk(sk)->s, new_len, 9); -#elif defined(CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MAX) - return max(ccid3_hc_tx_sk(sk)->s, new_len); -#else /* CONFIG_IP_DCCP_CCID3_MEASURE_S_AS_MPS */ - return dccp_sk(sk)->dccps_mss_cache; -#endif + const u16 old_s = hctx->ccid3hctx_s; + + hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9); + + if (hctx->ccid3hctx_s != old_s) + ccid3_update_send_interval(hctx); } /* @@ -158,13 +198,13 @@ static u32 ccid3_hc_tx_measure_packet_size(struct sock *sk, const u16 new_len) static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, ktime_t now) { - u32 delta = ktime_us_delta(now, hctx->t_last_win_count), - quarter_rtts = (4 * delta) / hctx->rtt; + u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count), + quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt; if (quarter_rtts > 0) { - hctx->t_last_win_count = now; - hctx->last_win_count += min(quarter_rtts, 5U); - hctx->last_win_count &= 0xF; /* mod 16 */ + hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U); + hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ } } @@ -181,26 +221,25 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) goto restart_timer; } - ccid3_pr_debug("%s(%p) entry with%s feedback\n", dccp_role(sk), sk, - hctx->feedback ? "" : "out"); + ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* Ignore and do not restart after leaving the established state */ - if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) + if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK) + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) goto out; - /* Reset feedback state to "no feedback received" */ - hctx->feedback = false; - /* * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 - * RTO is 0 if and only if no feedback has been received yet. */ - if (hctx->t_rto == 0 || hctx->p == 0) { + if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */ + hctx->ccid3hctx_p == 0) { /* halve send rate directly */ - hctx->x /= 2; + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); ccid3_update_send_interval(hctx); - } else { /* * Modify the cached value of X_recv @@ -212,41 +251,44 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * * Note that X_recv is scaled by 2^6 while X_calc is not */ - BUG_ON(hctx->p && !hctx->x_calc); + BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); - if (hctx->x_calc > (hctx->x_recv >> 5)) - hctx->x_recv /= 2; + if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5)) + hctx->ccid3hctx_x_recv = + max(hctx->ccid3hctx_x_recv / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + (2 * TFRC_T_MBI)); else { - hctx->x_recv = hctx->x_calc; - hctx->x_recv <<= 4; + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; + hctx->ccid3hctx_x_recv <<= 4; } ccid3_hc_tx_update_x(sk, NULL); } ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", - (unsigned long long)hctx->x); + (unsigned long long)hctx->ccid3hctx_x); /* * Set new timeout for the nofeedback timer. * See comments in packet_recv() regarding the value of t_RTO. */ - if (unlikely(hctx->t_rto == 0)) /* no feedback received yet */ + if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */ t_nfb = TFRC_INITIAL_TIMEOUT; else - t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi); + t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); restart_timer: - sk_reset_timer(sk, &hctx->no_feedback_timer, + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); out: bh_unlock_sock(sk); sock_put(sk); } -/** - * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets - * @skb: next packet candidate to send on @sk - * This function uses the convention of ccid_packet_dequeue_eval() and - * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. +/* + * returns + * > 0: delay (in msecs) that should pass before actually sending + * = 0: can send immediately + * < 0: error condition; do not send packet */ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { @@ -263,14 +305,18 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) if (unlikely(skb->len == 0)) return -EBADMSG; - if (hctx->s == 0) { - sk_reset_timer(sk, &hctx->no_feedback_timer, (jiffies + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + (jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); - hctx->last_win_count = 0; - hctx->t_last_win_count = now; + hctx->ccid3hctx_last_win_count = 0; + hctx->ccid3hctx_t_last_win_count = now; /* Set t_0 for initial packet */ - hctx->t_nom = now; + hctx->ccid3hctx_t_nom = now; + + hctx->ccid3hctx_s = skb->len; /* * Use initial RTT sample when available: recommended by erratum @@ -279,9 +325,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) */ if (dp->dccps_syn_rtt) { ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); - hctx->rtt = dp->dccps_syn_rtt; - hctx->x = rfc3390_initial_rate(sk); - hctx->t_ld = now; + hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; + hctx->ccid3hctx_x = rfc3390_initial_rate(sk); + hctx->ccid3hctx_t_ld = now; } else { /* * Sender does not have RTT sample: @@ -289,20 +335,17 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * is needed in several parts (e.g. window counter); * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. */ - hctx->rtt = DCCP_FALLBACK_RTT; - hctx->x = dp->dccps_mss_cache; - hctx->x <<= 6; + hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT; + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_x <<= 6; } - - /* Compute t_ipi = s / X */ - hctx->s = ccid3_hc_tx_measure_packet_size(sk, skb->len); ccid3_update_send_interval(hctx); - /* Seed value for Oscillation Prevention (sec. 4.5) */ - hctx->r_sqmean = tfrc_scaled_sqrt(hctx->rtt); - - } else { - delay = ktime_us_delta(hctx->t_nom, now); + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + break; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions [RFC 3448, 4.6] @@ -312,80 +355,99 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay >= TFRC_T_DELTA) - return (u32)delay / USEC_PER_MSEC; + if (delay - (s64)hctx->ccid3hctx_delta >= 1000) + return (u32)delay / 1000L; ccid3_hc_tx_update_win_count(hctx, now); + break; + case TFRC_SSTATE_TERM: + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); + return -EINVAL; } /* prepare to send now (add options etc.) */ dp->dccps_hc_tx_insert_options = 1; - DCCP_SKB_CB(skb)->dccpd_ccval = hctx->last_win_count; + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; /* set the nominal send time for the next following packet */ - hctx->t_nom = ktime_add_us(hctx->t_nom, hctx->t_ipi); - return CCID_PACKET_SEND_AT_ONCE; + hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); + return 0; } -static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len) +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, + unsigned int len) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - /* Changes to s will become effective the next time X is computed */ - hctx->s = ccid3_hc_tx_measure_packet_size(sk, len); + ccid3_hc_tx_update_s(hctx, len); - if (tfrc_tx_hist_add(&hctx->hist, dccp_sk(sk)->dccps_gss)) + if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss)) DCCP_CRIT("packet history - out of memory!"); } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct tfrc_tx_hist_entry *acked; + struct ccid3_options_received *opt_recv; ktime_t now; unsigned long t_nfb; - u32 r_sample; + u32 pinv, r_sample; /* we are only interested in ACKs */ if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) return; - /* - * Locate the acknowledged packet in the TX history. - * - * Returning "entry not found" here can for instance happen when - * - the host has not sent out anything (e.g. a passive server), - * - the Ack is outdated (packet with higher Ack number was received), - * - it is a bogus Ack (for a packet not sent on this connection). - */ - acked = tfrc_tx_hist_find_entry(hctx->hist, dccp_hdr_ack_seq(skb)); - if (acked == NULL) + /* ... and only in the established state */ + if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK && + hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) + return; + + opt_recv = &hctx->ccid3hctx_options_received; + now = ktime_get_real(); + + /* Estimate RTT from history if ACK number is valid */ + r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist, + DCCP_SKB_CB(skb)->dccpd_ack_seq, now); + if (r_sample == 0) { + DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type), + (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq); return; - /* For the sake of RTT sampling, ignore/remove all older entries */ - tfrc_tx_hist_purge(&acked->next); + } - /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */ - now = ktime_get_real(); - r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp)); - hctx->rtt = tfrc_ewma(hctx->rtt, r_sample, 9); + /* Update receive rate in units of 64 * bytes/second */ + hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; + hctx->ccid3hctx_x_recv <<= 6; + /* Update loss event rate (which is scaled by 1e6) */ + pinv = opt_recv->ccid3or_loss_event_rate; + if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ + hctx->ccid3hctx_p = 0; + else /* can not exceed 100% */ + hctx->ccid3hctx_p = scaled_div(1, pinv); + /* + * Validate new RTT sample and update moving average + */ + r_sample = dccp_sample_rtt(sk, r_sample); + hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9); /* * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 */ - if (!hctx->feedback) { - hctx->feedback = true; + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - if (hctx->t_rto == 0) { + if (hctx->ccid3hctx_t_rto == 0) { /* * Initial feedback packet: Larger Initial Windows (4.2) */ - hctx->x = rfc3390_initial_rate(sk); - hctx->t_ld = now; + hctx->ccid3hctx_x = rfc3390_initial_rate(sk); + hctx->ccid3hctx_t_ld = now; ccid3_update_send_interval(hctx); goto done_computing_x; - } else if (hctx->p == 0) { + } else if (hctx->ccid3hctx_p == 0) { /* * First feedback after nofeedback timer expiry (4.3) */ @@ -394,52 +456,25 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ - if (hctx->p > 0) - hctx->x_calc = tfrc_calc_x(hctx->s, hctx->rtt, hctx->p); + if (hctx->ccid3hctx_p > 0) + hctx->ccid3hctx_x_calc = + tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); ccid3_hc_tx_update_x(sk, &now); done_computing_x: ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " "p=%u, X_calc=%u, X_recv=%u, X=%u\n", - dccp_role(sk), sk, hctx->rtt, r_sample, - hctx->s, hctx->p, hctx->x_calc, - (unsigned)(hctx->x_recv >> 6), - (unsigned)(hctx->x >> 6)); - /* - * Oscillation Reduction (RFC 3448, 4.5) - modifying t_ipi according to - * RTT changes, multiplying by X/X_inst = sqrt(R_sample)/R_sqmean. This - * can be useful if few connections share a link, avoiding that buffer - * fill levels (RTT) oscillate as a result of frequent adjustments to X. - * A useful presentation with background information is in - * Joerg Widmer, "Equation-Based Congestion Control", - * MSc Thesis, University of Mannheim, Germany, 2000 - * (sec. 3.6.4), who calls this ISM ("Inter-packet Space Modulation"). - */ - if (do_osc_prev) { - r_sample = tfrc_scaled_sqrt(r_sample); - /* - * The modulation can work in both ways: increase/decrease t_ipi - * according to long-term increases/decreases of the RTT. The - * former is a useful measure, since it works against queue - * build-up. The latter temporarily increases the sending rate, - * so that buffers fill up more quickly. This in turn causes - * the RTT to increase, so that either later reduction becomes - * necessary or the RTT stays at a very high level. Decreasing - * t_ipi is therefore not supported. - * Furthermore, during the initial slow-start phase the RTT - * naturally increases, where using the algorithm would cause - * delays. Hence it is disabled during the initial slow-start. - */ - if (r_sample > hctx->r_sqmean && hctx->p > 0) - hctx->t_ipi = div_u64((u64)hctx->t_ipi * (u64)r_sample, - hctx->r_sqmean); - hctx->t_ipi = min_t(u32, hctx->t_ipi, TFRC_T_MBI); - /* update R_sqmean _after_ computing the modulation factor */ - hctx->r_sqmean = tfrc_ewma(hctx->r_sqmean, r_sample, 9); - } + dccp_role(sk), + sk, hctx->ccid3hctx_rtt, r_sample, + hctx->ccid3hctx_s, hctx->ccid3hctx_p, + hctx->ccid3hctx_x_calc, + (unsigned)(hctx->ccid3hctx_x_recv >> 6), + (unsigned)(hctx->ccid3hctx_x >> 6)); /* unschedule no feedback timer */ - sk_stop_timer(sk, &hctx->no_feedback_timer); + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); /* * As we have calculated new ipi, delta, t_nom it is possible @@ -453,66 +488,95 @@ done_computing_x: * This can help avoid triggering the nofeedback timer too * often ('spinning') on LANs with small RTTs. */ - hctx->t_rto = max_t(u32, 4 * hctx->rtt, (CONFIG_IP_DCCP_CCID3_RTO * - (USEC_PER_SEC / 1000))); + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + (CONFIG_IP_DCCP_CCID3_RTO * + (USEC_PER_SEC / 1000))); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) */ - t_nfb = max(hctx->t_rto, 2 * hctx->t_ipi); + t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb); + dccp_role(sk), + sk, usecs_to_jiffies(t_nfb), t_nfb); - sk_reset_timer(sk, &hctx->no_feedback_timer, + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); } -static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type, - u8 option, u8 *optval, u8 optlen) +static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, + unsigned char len, u16 idx, + unsigned char *value) { + int rc = 0; + const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_options_received *opt_recv; __be32 opt_val; - switch (option) { - case TFRC_OPT_RECEIVE_RATE: - case TFRC_OPT_LOSS_EVENT_RATE: - /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */ - if (packet_type == DCCP_PKT_DATA) - break; - if (unlikely(optlen != 4)) { - DCCP_WARN("%s(%p), invalid len %d for %u\n", - dccp_role(sk), sk, optlen, option); - return -EINVAL; - } - opt_val = ntohl(get_unaligned((__be32 *)optval)); + opt_recv = &hctx->ccid3hctx_options_received; - if (option == TFRC_OPT_RECEIVE_RATE) { - /* Receive Rate is kept in units of 64 bytes/second */ - hctx->x_recv = opt_val; - hctx->x_recv <<= 6; + if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { + opt_recv->ccid3or_seqno = dp->dccps_gsr; + opt_recv->ccid3or_loss_event_rate = ~0; + opt_recv->ccid3or_loss_intervals_idx = 0; + opt_recv->ccid3or_loss_intervals_len = 0; + opt_recv->ccid3or_receive_rate = 0; + } - ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", - dccp_role(sk), sk, opt_val); + switch (option) { + case TFRC_OPT_LOSS_EVENT_RATE: + if (unlikely(len != 4)) { + DCCP_WARN("%s(%p), invalid len %d " + "for TFRC_OPT_LOSS_EVENT_RATE\n", + dccp_role(sk), sk, len); + rc = -EINVAL; } else { - /* Update the fixpoint Loss Event Rate fraction */ - hctx->p = tfrc_invert_loss_event_rate(opt_val); - + opt_val = get_unaligned((__be32 *)value); + opt_recv->ccid3or_loss_event_rate = ntohl(opt_val); ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", - dccp_role(sk), sk, opt_val); + dccp_role(sk), sk, + opt_recv->ccid3or_loss_event_rate); } + break; + case TFRC_OPT_LOSS_INTERVALS: + opt_recv->ccid3or_loss_intervals_idx = idx; + opt_recv->ccid3or_loss_intervals_len = len; + ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_intervals_idx, + opt_recv->ccid3or_loss_intervals_len); + break; + case TFRC_OPT_RECEIVE_RATE: + if (unlikely(len != 4)) { + DCCP_WARN("%s(%p), invalid len %d " + "for TFRC_OPT_RECEIVE_RATE\n", + dccp_role(sk), sk, len); + rc = -EINVAL; + } else { + opt_val = get_unaligned((__be32 *)value); + opt_recv->ccid3or_receive_rate = ntohl(opt_val); + ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_receive_rate); + } + break; } - return 0; + + return rc; } static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); - hctx->hist = NULL; - setup_timer(&hctx->no_feedback_timer, - ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); + hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; + hctx->ccid3hctx_hist = NULL; + setup_timer(&hctx->ccid3hctx_no_feedback_timer, + ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); + return 0; } @@ -520,36 +584,42 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - sk_stop_timer(sk, &hctx->no_feedback_timer); - tfrc_tx_hist_purge(&hctx->hist); + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + tfrc_tx_hist_purge(&hctx->ccid3hctx_hist); } static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - info->tcpi_rto = ccid3_hc_tx_sk(sk)->t_rto; - info->tcpi_rtt = ccid3_hc_tx_sk(sk)->rtt; + struct ccid3_hc_tx_sock *hctx; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return; + + hctx = ccid3_hc_tx_sk(sk); + info->tcpi_rto = hctx->ccid3hctx_t_rto; + info->tcpi_rtt = hctx->ccid3hctx_rtt; } static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct tfrc_tx_info tfrc; + const struct ccid3_hc_tx_sock *hctx; const void *val; + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + hctx = ccid3_hc_tx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_TX_INFO: - if (len < sizeof(tfrc)) + if (len < sizeof(hctx->ccid3hctx_tfrc)) return -EINVAL; - tfrc.tfrctx_x = hctx->x; - tfrc.tfrctx_x_recv = hctx->x_recv; - tfrc.tfrctx_x_calc = hctx->x_calc; - tfrc.tfrctx_rtt = hctx->rtt; - tfrc.tfrctx_p = hctx->p; - tfrc.tfrctx_rto = hctx->t_rto; - tfrc.tfrctx_ipi = hctx->t_ipi; - len = sizeof(tfrc); - val = &tfrc; + len = sizeof(hctx->ccid3hctx_tfrc); + val = &hctx->ccid3hctx_tfrc; break; default: return -ENOPROTOOPT; @@ -564,82 +634,112 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, /* * Receiver Half-Connection Routines */ + +/* CCID3 feedback types */ +enum ccid3_fback_type { + CCID3_FBACK_NONE = 0, + CCID3_FBACK_INITIAL, + CCID3_FBACK_PERIODIC, + CCID3_FBACK_PARAM_CHANGE +}; + +#ifdef CONFIG_IP_DCCP_CCID3_DEBUG +static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) +{ + static char *ccid3_rx_state_names[] = { + [TFRC_RSTATE_NO_DATA] = "NO_DATA", + [TFRC_RSTATE_DATA] = "DATA", + [TFRC_RSTATE_TERM] = "TERM", + }; + + return ccid3_rx_state_names[state]; +} +#endif + +static void ccid3_hc_rx_set_state(struct sock *sk, + enum ccid3_hc_rx_states state) +{ + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), + ccid3_rx_state_name(state)); + WARN_ON(state == oldstate); + hcrx->ccid3hcrx_state = state; +} + static void ccid3_hc_rx_send_feedback(struct sock *sk, const struct sk_buff *skb, enum ccid3_fback_type fbtype) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + ktime_t now; + s64 delta = 0; + + if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM)) + return; + + now = ktime_get_real(); switch (fbtype) { case CCID3_FBACK_INITIAL: - hcrx->x_recv = 0; - hcrx->p_inverse = ~0U; /* see RFC 4342, 8.5 */ + hcrx->ccid3hcrx_x_recv = 0; + hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ break; case CCID3_FBACK_PARAM_CHANGE: - if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) { - /* - * rfc3448bis-06, 6.3.1: First packet(s) lost or marked - * FIXME: in rfc3448bis the receiver returns X_recv=0 - * here as it normally would in the first feedback packet. - * However this is not possible yet, since the code still - * uses RFC 3448, i.e. - * If (p > 0) - * Calculate X_calc using the TCP throughput equation. - * X = max(min(X_calc, 2*X_recv), s/t_mbi); - * would bring X down to s/t_mbi. That is why we return - * X_recv according to rfc3448bis-06 for the moment. - */ - u32 s = tfrc_rx_hist_packet_size(&hcrx->hist), - rtt = tfrc_rx_hist_rtt(&hcrx->hist); - - hcrx->x_recv = scaled_div32(s, 2 * rtt); - break; - } /* * When parameters change (new loss or p > p_prev), we do not * have a reliable estimate for R_m of [RFC 3448, 6.2] and so - * always check whether at least RTT time units were covered. + * need to reuse the previous value of X_recv. However, when + * X_recv was 0 (due to early loss), this would kill X down to + * s/t_mbi (i.e. one packet in 64 seconds). + * To avoid such drastic reduction, we approximate X_recv as + * the number of bytes since last feedback. + * This is a safe fallback, since X is bounded above by X_calc. */ - hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); - break; + if (hcrx->ccid3hcrx_x_recv > 0) + break; + /* fall through */ case CCID3_FBACK_PERIODIC: - /* - * Step (2) of rfc3448bis-06, 6.2: - * - if no data packets have been received, just restart timer - * - if data packets have been received, re-compute X_recv - */ - if (hcrx->hist.bytes_recvd == 0) - goto prepare_for_next_time; - hcrx->x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); + delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback); + if (delta <= 0) + DCCP_BUG("delta (%ld) <= 0", (long)delta); + else + hcrx->ccid3hcrx_x_recv = + scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); break; default: return; } - ccid3_pr_debug("X_recv=%u, 1/p=%u\n", hcrx->x_recv, hcrx->p_inverse); + ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, + hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv); - dccp_sk(sk)->dccps_hc_rx_insert_options = 1; - dccp_send_ack(sk); + hcrx->ccid3hcrx_tstamp_last_feedback = now; + hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval; + hcrx->ccid3hcrx_bytes_recv = 0; -prepare_for_next_time: - tfrc_rx_hist_restart_byte_counter(&hcrx->hist); - hcrx->last_counter = dccp_hdr(skb)->dccph_ccval; - hcrx->feedback = fbtype; + dp->dccps_hc_rx_insert_options = 1; + dccp_send_ack(sk); } static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; __be32 x_recv, pinv; if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; + hcrx = ccid3_hc_rx_sk(sk); + if (dccp_packet_without_ack(skb)) return 0; - x_recv = htonl(hcrx->x_recv); - pinv = htonl(hcrx->p_inverse); + x_recv = htonl(hcrx->ccid3hcrx_x_recv); + pinv = htonl(hcrx->ccid3hcrx_pinv); if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)) || @@ -662,95 +762,171 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - u32 s = tfrc_rx_hist_packet_size(&hcrx->hist), - rtt = tfrc_rx_hist_rtt(&hcrx->hist), x_recv, p; + u32 x_recv, p, delta; u64 fval; - /* - * rfc3448bis-06, 6.3.1: First data packet(s) are marked or lost. Set p - * to give the equivalent of X_target = s/(2*R). Thus fval = 2 and so p - * is about 20.64%. This yields an interval length of 4.84 (rounded up). - */ - if (unlikely(hcrx->feedback == CCID3_FBACK_NONE)) - return 5; + if (hcrx->ccid3hcrx_rtt == 0) { + DCCP_WARN("No RTT estimate available, using fallback RTT\n"); + hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT; + } - x_recv = tfrc_rx_hist_x_recv(&hcrx->hist, hcrx->x_recv); - if (x_recv == 0) - goto failed; + delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback)); + x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0U; + } + } - fval = scaled_div32(scaled_div(s, rtt), x_recv); + fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt); + fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); - if (p > 0) - return scaled_div(1, p); -failed: - return UINT_MAX; + return p == 0 ? ~0U : scaled_div(1, p); } static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; const bool is_data_packet = dccp_data_packet(skb); + if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { + if (is_data_packet) { + const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; + do_feedback = CCID3_FBACK_INITIAL; + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); + hcrx->ccid3hcrx_s = payload; + /* + * Not necessary to update ccid3hcrx_bytes_recv here, + * since X_recv = 0 for the first feedback packet (cf. + * RFC 3448, 6.3) -- gerrit + */ + } + goto update_records; + } + + if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb)) + return; /* done receiving */ + + if (is_data_packet) { + const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; + /* + * Update moving-average of s and the sum of received payload bytes + */ + hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9); + hcrx->ccid3hcrx_bytes_recv += payload; + } + /* * Perform loss detection and handle pending losses */ - if (tfrc_rx_congestion_event(&hcrx->hist, &hcrx->li_hist, - skb, ndp, ccid3_first_li, sk)) - ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PARAM_CHANGE); + if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, + skb, ndp, ccid3_first_li, sk)) { + do_feedback = CCID3_FBACK_PARAM_CHANGE; + goto done_receiving; + } + + if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist)) + return; /* done receiving */ + /* - * Feedback for first non-empty data packet (RFC 3448, 6.3) + * Handle data packets: RTT sampling and monitoring p */ - else if (unlikely(hcrx->feedback == CCID3_FBACK_NONE && is_data_packet)) - ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_INITIAL); + if (unlikely(!is_data_packet)) + goto update_records; + + if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) { + const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb); + /* + * Empty loss history: no loss so far, hence p stays 0. + * Sample RTT values, since an RTT estimate is required for the + * computation of p when the first loss occurs; RFC 3448, 6.3.1. + */ + if (sample != 0) + hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9); + + } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) { + /* + * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean + * has decreased (resp. p has increased), send feedback now. + */ + do_feedback = CCID3_FBACK_PARAM_CHANGE; + } + /* * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 */ - else if (!tfrc_rx_hist_loss_pending(&hcrx->hist) && is_data_packet && - SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->last_counter) > 3) - ccid3_hc_rx_send_feedback(sk, skb, CCID3_FBACK_PERIODIC); + if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3) + do_feedback = CCID3_FBACK_PERIODIC; + +update_records: + tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp); + +done_receiving: + if (do_feedback) + ccid3_hc_rx_send_feedback(sk, skb, do_feedback); } static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); - tfrc_lh_init(&hcrx->li_hist); - return tfrc_rx_hist_init(&hcrx->hist, sk); + hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; + tfrc_lh_init(&hcrx->ccid3hcrx_li_hist); + return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist); } static void ccid3_hc_rx_exit(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - tfrc_rx_hist_purge(&hcrx->hist); - tfrc_lh_cleanup(&hcrx->li_hist); + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); + + tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist); + tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist); } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { + const struct ccid3_hc_rx_sock *hcrx; + + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return; + + hcrx = ccid3_hc_rx_sk(sk); + info->tcpi_ca_state = hcrx->ccid3hcrx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - info->tcpi_rcv_rtt = tfrc_rx_hist_rtt(&ccid3_hc_rx_sk(sk)->hist); + info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; } static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + const struct ccid3_hc_rx_sock *hcrx; struct tfrc_rx_info rx_info; const void *val; + /* Listen socks doesn't have a private CCID block */ + if (sk->sk_state == DCCP_LISTEN) + return -EINVAL; + + hcrx = ccid3_hc_rx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_RX_INFO: if (len < sizeof(rx_info)) return -EINVAL; - rx_info.tfrcrx_x_recv = hcrx->x_recv; - rx_info.tfrcrx_rtt = tfrc_rx_hist_rtt(&hcrx->hist); - rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hcrx->p_inverse); + rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv; + rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt; + rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U : + scaled_div(1, hcrx->ccid3hcrx_pinv); len = sizeof(rx_info); val = &rx_info; break; @@ -786,9 +962,6 @@ static struct ccid_operations ccid3 = { .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; -module_param(do_osc_prev, bool, 0644); -MODULE_PARM_DESC(do_osc_prev, "Use Oscillation Prevention (RFC 3448, 4.5)"); - #ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, bool, 0644); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); @@ -796,19 +969,6 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); static __init int ccid3_module_init(void) { - struct timespec tp; - - /* - * Without a fine-grained clock resolution, RTTs/X_recv are not sampled - * correctly and feedback is sent either too early or too late. - */ - hrtimer_get_res(CLOCK_MONOTONIC, &tp); - if (tp.tv_sec || tp.tv_nsec > DCCP_TIME_RESOLUTION * NSEC_PER_USEC) { - printk(KERN_ERR "%s: Timer too coarse (%ld usec), need %u-usec" - " resolution - check your clocksource.\n", __func__, - tp.tv_nsec/NSEC_PER_USEC, DCCP_TIME_RESOLUTION); - return -ESOCKTNOSUPPORT; - } return ccid_register(&ccid3); } module_init(ccid3_module_init); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index af6e1bf937d..49ca32bd7e7 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -47,22 +47,11 @@ /* Two seconds as per RFC 3448 4.2 */ #define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) -/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */ -#define TFRC_T_MBI (64 * USEC_PER_SEC) +/* In usecs - half the scheduling granularity as per RFC3448 4.6 */ +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ)) -/* - * The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are - * rounded down to 0, since sk_reset_timer() here uses millisecond granularity. - * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse - * resolution of HZ < 500 means that the error is below one timer tick (t_gran) - * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ). - */ -#if (HZ >= 500) -# define TFRC_T_DELTA USEC_PER_MSEC -#else -# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ)) -#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC. -#endif +/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ +#define TFRC_T_MBI 64 enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, @@ -70,43 +59,62 @@ enum ccid3_options { TFRC_OPT_RECEIVE_RATE = 194, }; +struct ccid3_options_received { + u64 ccid3or_seqno:48, + ccid3or_loss_intervals_idx:16; + u16 ccid3or_loss_intervals_len; + u32 ccid3or_loss_event_rate; + u32 ccid3or_receive_rate; +}; + +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket * - * @x - Current sending rate in 64 * bytes per second - * @x_recv - Receive rate in 64 * bytes per second - * @x_calc - Calculated rate in bytes per second - * @rtt - Estimate of current round trip time in usecs - * @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5) - * @p - Current loss event rate (0-1) scaled by 1000000 - * @s - Packet size in bytes - * @t_rto - Nofeedback Timer setting in usecs - * @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs - * @feedback - Whether feedback has been received or not - * @last_win_count - Last window counter sent - * @t_last_win_count - Timestamp of earliest packet with - * last_win_count value sent - * @no_feedback_timer - Handle to no feedback timer - * @t_ld - Time last doubled during slow start - * @t_nom - Nominal send time of next packet - * @hist - Packet history + * @ccid3hctx_x - Current sending rate in 64 * bytes per second + * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second + * @ccid3hctx_x_calc - Calculated rate in bytes per second + * @ccid3hctx_rtt - Estimate of current round trip time in usecs + * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 + * @ccid3hctx_s - Packet size in bytes + * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs + * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs + * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states + * @ccid3hctx_last_win_count - Last window counter sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet + * with last_win_count value sent + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer + * @ccid3hctx_t_ld - Time last doubled during slow start + * @ccid3hctx_t_nom - Nominal send time of next packet + * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs + * @ccid3hctx_hist - Packet history + * @ccid3hctx_options_received - Parsed set of retrieved options */ struct ccid3_hc_tx_sock { - u64 x; - u64 x_recv; - u32 x_calc; - u32 rtt; - u16 r_sqmean; - u32 p; - u32 t_rto; - u32 t_ipi; - u16 s; - bool feedback:1; - u8 last_win_count; - ktime_t t_last_win_count; - struct timer_list no_feedback_timer; - ktime_t t_ld; - ktime_t t_nom; - struct tfrc_tx_hist_entry *hist; + struct tfrc_tx_info ccid3hctx_tfrc; +#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x +#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv +#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc +#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt +#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p +#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto +#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi + u16 ccid3hctx_s; + enum ccid3_hc_tx_states ccid3hctx_state:8; + u8 ccid3hctx_last_win_count; + ktime_t ccid3hctx_t_last_win_count; + struct timer_list ccid3hctx_no_feedback_timer; + ktime_t ccid3hctx_t_ld; + ktime_t ccid3hctx_t_nom; + u32 ccid3hctx_delta; + struct tfrc_tx_hist_entry *ccid3hctx_hist; + struct ccid3_options_received ccid3hctx_options_received; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) @@ -116,32 +124,41 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) return hctx; } - -enum ccid3_fback_type { - CCID3_FBACK_NONE = 0, - CCID3_FBACK_INITIAL, - CCID3_FBACK_PERIODIC, - CCID3_FBACK_PARAM_CHANGE +/* TFRC receiver states */ +enum ccid3_hc_rx_states { + TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_DATA, + TFRC_RSTATE_TERM = 127, }; /** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket * - * @last_counter - Tracks window counter (RFC 4342, 8.1) - * @feedback - The type of the feedback last sent - * @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3) - * @tstamp_last_feedback - Time at which last feedback was sent - * @hist - Packet history (loss detection + RTT sampling) - * @li_hist - Loss Interval database - * @p_inverse - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) + * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) + * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) + * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4) + * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1) + * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states + * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes + * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3) + * @ccid3hcrx_rtt - Receiver estimate of RTT + * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent + * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent + * @ccid3hcrx_hist - Packet history (loss detection + RTT sampling) + * @ccid3hcrx_li_hist - Loss Interval database + * @ccid3hcrx_s - Received packet size in bytes + * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) */ struct ccid3_hc_rx_sock { - u8 last_counter:4; - enum ccid3_fback_type feedback:4; - u32 x_recv; - ktime_t tstamp_last_feedback; - struct tfrc_rx_hist hist; - struct tfrc_loss_hist li_hist; -#define p_inverse li_hist.i_mean + u8 ccid3hcrx_last_counter:4; + enum ccid3_hc_rx_states ccid3hcrx_state:8; + u32 ccid3hcrx_bytes_recv; + u32 ccid3hcrx_x_recv; + u32 ccid3hcrx_rtt; + ktime_t ccid3hcrx_tstamp_last_feedback; + struct tfrc_rx_hist ccid3hcrx_hist; + struct tfrc_loss_hist ccid3hcrx_li_hist; + u16 ccid3hcrx_s; +#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean }; static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index b1ae8f8259e..5b3ce0688c5 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) /** * tfrc_lh_update_i_mean - Update the `open' loss interval I_0 - * This updates I_mean as the sequence numbers increase. As a consequence, the - * open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1) - * decreases, and thus there is no need to send renewed feedback. + * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev */ -void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) +u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh); + u32 old_i_mean = lh->i_mean; s64 len; if (cur == NULL) /* not initialised */ - return; - - /* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */ - if (!dccp_data_packet(skb)) - return; + return 0; len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1; if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */ - return; + return 0; if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4) /* @@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) cur->li_is_closed = 1; if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */ - return; + return 0; cur->li_length = len; tfrc_lh_calc_i_mean(lh); + + return (lh->i_mean < old_i_mean); } +EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, @@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, * @sk: Used by @calc_first_li in caller-specific way (subtyping) * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. */ -bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, - u32 (*calc_first_li)(struct sock *), struct sock *sk) +int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, + u32 (*calc_first_li)(struct sock *), struct sock *sk) { struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new; if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh))) - return false; + return 0; new = tfrc_lh_demand_next(lh); if (unlikely(new == NULL)) { DCCP_CRIT("Cannot allocate/add loss record."); - return false; + return 0; } new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno; @@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, tfrc_lh_calc_i_mean(lh); } - return true; + return 1; } EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index d08a226db43..246018a3b26 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) struct tfrc_rx_hist; -extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, +extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, u32 (*first_li)(struct sock *), struct sock *); -extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); +extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index cce9f03bda3..6cc108afdc3 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -40,6 +40,18 @@ #include "packet_history.h" #include "../../dccp.h" +/** + * tfrc_tx_hist_entry - Simple singly-linked TX history list + * @next: next oldest entry (LIFO order) + * @seqno: sequence number of this entry + * @stamp: send time of packet with sequence number @seqno + */ +struct tfrc_tx_hist_entry { + struct tfrc_tx_hist_entry *next; + u64 seqno; + ktime_t stamp; +}; + /* * Transmitter History Routines */ @@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void) } } +static struct tfrc_tx_hist_entry * + tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) +{ + while (head != NULL && head->seqno != seqno) + head = head->next; + + return head; +} + int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) { struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); @@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) } EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); +u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, + const ktime_t now) +{ + u32 rtt = 0; + struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno); + + if (packet != NULL) { + rtt = ktime_us_delta(now, packet->stamp); + /* + * Garbage-collect older (irrelevant) entries: + */ + tfrc_tx_hist_purge(&packet->next); + } + + return rtt; +} +EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); + + /* * Receiver History Routines */ @@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); - -static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) -{ - struct tfrc_rx_hist_entry *tmp = h->ring[a]; - - h->ring[a] = h->ring[b]; - h->ring[b] = tmp; -} - static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { - __tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a), - tfrc_rx_hist_index(h, b)); -} + const u8 idx_a = tfrc_rx_hist_index(h, a), + idx_b = tfrc_rx_hist_index(h, b); + struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; -/** - * tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling - * This is called after loss detection has finished, when the history entry - * with the index of `loss_count' holds the highest-received sequence number. - * RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt). - */ -static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h) -{ - __tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count)); - h->loss_count = h->loss_start = 0; + h->ring[idx_a] = h->ring[idx_b]; + h->ring[idx_b] = tmp; } /* @@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1) u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, s1 = DCCP_SKB_CB(skb)->dccpd_seq; - if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */ + if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */ h->loss_count = 1; + tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1); + } } static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2) @@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2 if (dccp_loss_free(s2, s1, n1)) { /* hole is filled: S0, S2, and S1 are consecutive */ - tfrc_rx_hist_resume_rtt_sampling(h); + h->loss_count = 0; + h->loss_start = tfrc_rx_hist_index(h, 1); } else /* gap between S2 and S1: just update loss_prev */ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2); @@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3) if (dccp_loss_free(s1, s2, n2)) { /* entire hole filled by S0, S3, S1, S2 */ - tfrc_rx_hist_resume_rtt_sampling(h); + h->loss_start = tfrc_rx_hist_index(h, 2); + h->loss_count = 0; } else { /* gap remains between S1 and S2 */ h->loss_start = tfrc_rx_hist_index(h, 1); @@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h) if (dccp_loss_free(s2, s3, n3)) { /* no gap between S2 and S3: entire hole is filled */ - tfrc_rx_hist_resume_rtt_sampling(h); + h->loss_start = tfrc_rx_hist_index(h, 3); + h->loss_count = 0; } else { /* gap between S2 and S3 */ h->loss_start = tfrc_rx_hist_index(h, 2); @@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h) } /** - * tfrc_rx_congestion_event - Loss detection and further processing - * @h: The non-empty RX history object - * @lh: Loss Intervals database to update - * @skb: Currently received packet - * @ndp: The NDP count belonging to @skb - * @first_li: Caller-dependent computation of first loss interval in @lh - * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) + * tfrc_rx_handle_loss - Loss detection and further processing + * @h: The non-empty RX history object + * @lh: Loss Intervals database to update + * @skb: Currently received packet + * @ndp: The NDP count belonging to @skb + * @calc_first_li: Caller-dependent computation of first loss interval in @lh + * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. @@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * records accordingly, the caller should not perform any more RX history * operations when loss_count is greater than 0 after calling this function. */ -bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *), struct sock *sk) +int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*calc_first_li)(struct sock *), struct sock *sk) { - bool new_event = false; - - if (tfrc_rx_hist_duplicate(h, skb)) - return 0; + int is_new_loss = 0; if (h->loss_count == 0) { __do_track_loss(h, skb, ndp); - tfrc_rx_hist_sample_rtt(h, skb); - tfrc_rx_hist_add_packet(h, skb, ndp); } else if (h->loss_count == 1) { __one_after_loss(h, skb, ndp); } else if (h->loss_count != 2) { @@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, /* * Update Loss Interval database and recycle RX records */ - new_event = tfrc_lh_interval_add(lh, h, first_li, sk); + is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk); __three_after_loss(h); } - - /* - * Update moving-average of `s' and the sum of received payload bytes. - */ - if (dccp_data_packet(skb)) { - const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; - - h->packet_size = tfrc_ewma(h->packet_size, payload, 9); - h->bytes_recvd += payload; - } - - /* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */ - if (!new_event) - tfrc_lh_update_i_mean(lh, skb); - - return new_event; + return is_new_loss; } -EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event); +EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); -/* Compute the sending rate X_recv measured between feedback intervals */ -u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv) +int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) { - u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate; - s64 delta = ktime_to_us(net_timedelta(h->bytes_start)); - - WARN_ON(delta <= 0); - /* - * Ensure that the sampling interval for X_recv is at least one RTT, - * by extending the sampling interval backwards in time, over the last - * R_(m-1) seconds, as per rfc3448bis-06, 6.2. - * To reduce noise (e.g. when the RTT changes often), this is only - * done when delta is smaller than RTT/2. - */ - if (last_x_recv > 0 && delta < last_rtt/2) { - tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n", - (long)delta, (unsigned)last_rtt); + int i; - delta = (bytes ? delta : 0) + last_rtt; - bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC); + for (i = 0; i <= TFRC_NDUPACK; i++) { + h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); + if (h->ring[i] == NULL) + goto out_free; } - if (unlikely(bytes == 0)) { - DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv); - return last_x_recv; + h->loss_count = h->loss_start = 0; + return 0; + +out_free: + while (i-- != 0) { + kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]); + h->ring[i] = NULL; } - return scaled_div32(bytes, delta); + return -ENOBUFS; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv); +EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { @@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); -static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) +/** + * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against + */ +static inline struct tfrc_rx_hist_entry * + tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) { - int i; - - memset(h, 0, sizeof(*h)); - - for (i = 0; i <= TFRC_NDUPACK; i++) { - h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC); - if (h->ring[i] == NULL) { - tfrc_rx_hist_purge(h); - return -ENOBUFS; - } - } - return 0; + return h->ring[0]; } -int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk) +/** + * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry + */ +static inline struct tfrc_rx_hist_entry * + tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) { - if (tfrc_rx_hist_alloc(h)) - return -ENOBUFS; - /* - * Initialise first entry with GSR to start loss detection as early as - * possible. Code using this must not use any other fields. The entry - * will be overwritten once the CCID updates its received packets. - */ - tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr; - return 0; + return h->ring[h->rtt_sample_prev]; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_init); /** * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal - * Based on ideas presented in RFC 4342, 8.1. This function expects that no loss - * is pending and uses the following history entries (via rtt_sample_prev): - * - h->ring[0] contains the most recent history entry prior to @skb; - * - h->ring[1] is an unused `dummy' entry when the current difference is 0; + * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able + * to compute a sample with given data - calling function should check this. */ -void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) +u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb) { - struct tfrc_rx_hist_entry *last = h->ring[0]; - u32 sample, delta_v; - - /* - * When not to sample: - * - on non-data packets - * (RFC 4342, 8.1: CCVal only fully defined for data packets); - * - when no data packets have been received yet - * (FIXME: using sampled packet size as indicator here); - * - as long as there are gaps in the sequence space (pending loss). - */ - if (!dccp_data_packet(skb) || h->packet_size == 0 || - tfrc_rx_hist_loss_pending(h)) - return; + u32 sample = 0, + delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, + tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); + + if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */ + if (h->rtt_sample_prev == 2) { /* previous candidate stored */ + sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, + tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); + if (sample) + sample = 4 / sample * + ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp, + tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp); + else /* + * FIXME: This condition is in principle not + * possible but occurs when CCID is used for + * two-way data traffic. I have tried to trace + * it, but the cause does not seem to be here. + */ + DCCP_BUG("please report to dccp@vger.kernel.org" + " => prev = %u, last = %u", + tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval, + tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval); + } else if (delta_v < 1) { + h->rtt_sample_prev = 1; + goto keep_ref_for_next_time; + } - h->rtt_sample_prev = 0; /* reset previous candidate */ + } else if (delta_v == 4) /* optimal match */ + sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp)); + else { /* suboptimal match */ + h->rtt_sample_prev = 2; + goto keep_ref_for_next_time; + } - delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval); - if (delta_v == 0) { /* less than RTT/4 difference */ - h->rtt_sample_prev = 1; - return; + if (unlikely(sample > DCCP_SANE_RTT_MAX)) { + DCCP_WARN("RTT sample %u too large, using max\n", sample); + sample = DCCP_SANE_RTT_MAX; } - sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp))); - if (delta_v <= 4) /* between RTT/4 and RTT */ - sample *= 4 / delta_v; - else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2)) - /* - * Optimisation: CCVal difference is greater than 1 RTT, yet the - * sample is less than the local RTT estimate; which means that - * the RTT estimate is too high. - * To avoid noise, it is not done if the sample is below RTT/2. - */ - return; + h->rtt_sample_prev = 0; /* use current entry as next reference */ +keep_ref_for_next_time: - /* Use a lower weight than usual to increase responsiveness */ - h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5); + return sample; } EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 555e65cd73a..461cc91cce8 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -40,28 +40,12 @@ #include #include "tfrc.h" -/** - * tfrc_tx_hist_entry - Simple singly-linked TX history list - * @next: next oldest entry (LIFO order) - * @seqno: sequence number of this entry - * @stamp: send time of packet with sequence number @seqno - */ -struct tfrc_tx_hist_entry { - struct tfrc_tx_hist_entry *next; - u64 seqno; - ktime_t stamp; -}; - -static inline struct tfrc_tx_hist_entry * - tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) -{ - while (head != NULL && head->seqno != seqno) - head = head->next; - return head; -} +struct tfrc_tx_hist_entry; extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); +extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, + const u64 seqno, const ktime_t now); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) @@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry { * @loss_count: Number of entries in circular history * @loss_start: Movable index (for loss detection) * @rtt_sample_prev: Used during RTT sampling, points to candidate entry - * @rtt_estimate: Receiver RTT estimate - * @packet_size: Packet size in bytes (as per RFC 3448, 3.1) - * @bytes_recvd: Number of bytes received since @bytes_start - * @bytes_start: Start time for counting @bytes_recvd */ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; u8 loss_count:2, loss_start:2; - /* Receiver RTT sampling */ #define rtt_sample_prev loss_start - u32 rtt_estimate; - /* Receiver sampling of application payload lengths */ - u32 packet_size, - bytes_recvd; - ktime_t bytes_start; }; /** @@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) return h->loss_count > 0; } -/* - * Accessor functions to retrieve parameters sampled by the RX history - */ -static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h) -{ - if (h->packet_size == 0) { - DCCP_WARN("No sample for s, using fallback\n"); - return TCP_MIN_RCVMSS; - } - return h->packet_size; - -} -static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h) -{ - if (h->rtt_estimate == 0) { - DCCP_WARN("No RTT estimate available, using fallback RTT\n"); - return DCCP_FALLBACK_RTT; - } - return h->rtt_estimate; -} - -static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h) -{ - h->bytes_recvd = 0; - h->bytes_start = ktime_get_real(); -} - -extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv); - - extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp); extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; -extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), - struct sock *sk); -extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, - const struct sk_buff *skb); -extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk); +extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *sk), + struct sock *sk); +extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, + const struct sk_buff *skb); +extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ede12f53de5..ed9857527ac 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -47,21 +47,6 @@ static inline u32 scaled_div32(u64 a, u64 b) return result; } -/** - * tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1 - * Uses scaling to improve accuracy of the integer approximation of sqrt(). The - * scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for - * clamped RTT samples (dccp_sample_rtt). - * Should best be used for expressions of type sqrt(x)/sqrt(y), since then the - * scaling factor is neutralised. For this purpose, it avoids returning zero. - */ -static inline u16 tfrc_scaled_sqrt(const u32 sample) -{ - const unsigned long non_zero_sample = sample ? : 1; - - return int_sqrt(non_zero_sample << 10); -} - /** * tfrc_ewma - Exponentially weighted moving average * @weight: Weight to be used as damping factor, in units of 1/10 @@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); -extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); extern int tfrc_tx_packet_history_init(void); extern void tfrc_tx_packet_history_exit(void); diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 38239c4d5e1..2f20a29cffe 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */ if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */ - /* - * In the congestion-avoidance phase p decays towards 0 - * when there are no further losses, so this case is - * natural. Truncating to p_min = 0.01% means that the - * maximum achievable throughput is limited to about - * X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g. - * with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps. - */ - tfrc_pr_debug("Value of p (%d) below resolution. " - "Substituting %d\n", p, TFRC_SMALLEST_P); + DCCP_WARN("Value of p (%d) below resolution. " + "Substituting %d\n", p, TFRC_SMALLEST_P); index = 0; } else /* 0.0001 <= p <= 0.05 */ index = p/TFRC_SMALLEST_P - 1; @@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) result = scaled_div(s, R); return scaled_div32(result, f); } + EXPORT_SYMBOL_GPL(tfrc_calc_x); /** @@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) index = tfrc_binsearch(fvalue, 0); return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; } -EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); -/** - * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100% - * When @loss_event_rate is large, there is a chance that p is truncated to 0. - * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0. - */ -u32 tfrc_invert_loss_event_rate(u32 loss_event_rate) -{ - if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */ - return 0; - if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */ - return 1000000; - return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P); -} -EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate); +EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5281190aa19..b4bc6e095a0 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -42,11 +42,9 @@ extern int dccp_debug; #define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) -#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) -#define dccp_debug(format, a...) #endif extern struct inet_hashinfo dccp_hashinfo; @@ -63,14 +61,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields * Hence a safe upper bound for the maximum option length is 1020-28 = 992 */ -#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t)) +#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) #define DCCP_MAX_PACKET_HDR 28 #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) -/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */ -#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t)) - #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ @@ -86,13 +81,10 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); */ #define DCCP_RTO_MAX ((unsigned)(64 * HZ)) -/* DCCP base time resolution - 10 microseconds (RFC 4340, 13.1 ... 13.3) */ -#define DCCP_TIME_RESOLUTION 10 - /* * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4 */ -#define DCCP_SANE_RTT_MIN (10 * DCCP_TIME_RESOLUTION) +#define DCCP_SANE_RTT_MIN 100 #define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5) #define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC) @@ -103,6 +95,12 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; extern int sysctl_dccp_retries2; +extern int sysctl_dccp_feat_sequence_window; +extern int sysctl_dccp_feat_rx_ccid; +extern int sysctl_dccp_feat_tx_ccid; +extern int sysctl_dccp_feat_ack_ratio; +extern int sysctl_dccp_feat_send_ack_vector; +extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; extern int sysctl_dccp_sync_ratelimit; @@ -237,22 +235,8 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -/* - * TX Packet Dequeueing Interface - */ -extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); -extern bool dccp_qpolicy_full(struct sock *sk); -extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); -extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); -extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); -extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); - -/* - * TX Packet Output and TX Timers - */ -extern void dccp_write_xmit(struct sock *sk); +extern void dccp_write_xmit(struct sock *sk, int block); extern void dccp_write_space(struct sock *sk); -extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); extern void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) @@ -268,8 +252,7 @@ extern const char *dccp_state_name(const int state); extern void dccp_set_state(struct sock *sk, const int state); extern void dccp_done(struct sock *sk); -extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, - struct sk_buff const *skb); +extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); @@ -334,14 +317,7 @@ extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk, extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); - -static inline u32 dccp_sane_rtt(long usec_sample) -{ - if (unlikely(usec_sample <= 0 || usec_sample > DCCP_SANE_RTT_MAX)) - DCCP_WARN("RTT sample %ld out of bounds!\n", usec_sample); - return clamp_val(usec_sample, DCCP_SANE_RTT_MIN, DCCP_SANE_RTT_MAX); -} -extern u32 dccp_sample_rtt(struct sock *sk, long delta); +extern u32 dccp_sample_rtt(struct sock *sk, long delta); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -435,62 +411,36 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); + const struct dccp_minisock *dmsk = dccp_msk(sk); dp->dccps_gsr = seq; - /* Sequence validity window depends on remote Sequence Window (7.5.1) */ - dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); - /* - * Adjust SWL so that it is not below ISR. In contrast to RFC 4340, - * 7.5.1 we perform this check beyond the initial handshake: W/W' are - * always > 32, so for the first W/W' packets in the lifetime of a - * connection we always have to adjust SWL. - * A second reason why we are doing this is that the window depends on - * the feature-remote value of Sequence Window: nothing stops the peer - * from updating this value while we are busy adjusting SWL for the - * first W packets (we would have to count from scratch again then). - * Therefore it is safer to always make sure that the Sequence Window - * is not artificially extended by a peer who grows SWL downwards by - * continually updating the feature-remote Sequence-Window. - * If sequence numbers wrap it is bad luck. But that will take a while - * (48 bit), and this measure prevents Sequence-number attacks. - */ - if (before48(dp->dccps_swl, dp->dccps_isr)) - dp->dccps_swl = dp->dccps_isr; - dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); + dccp_set_seqno(&dp->dccps_swl, + dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4)); + dccp_set_seqno(&dp->dccps_swh, + dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4); } static inline void dccp_update_gss(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - dp->dccps_gss = seq; - /* Ack validity window depends on local Sequence Window value (7.5.1) */ - dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); - /* Adjust AWL so that it is not below ISS - see comment above for SWL */ - if (before48(dp->dccps_awl, dp->dccps_iss)) - dp->dccps_awl = dp->dccps_iss; - dp->dccps_awh = dp->dccps_gss; -} - -static inline int dccp_ackvec_pending(const struct sock *sk) -{ - return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL && - !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec); + dp->dccps_awh = dp->dccps_gss = seq; + dccp_set_seqno(&dp->dccps_awl, + (dp->dccps_gss - + dccp_msk(sk)->dccpms_sequence_window + 1)); } static inline int dccp_ack_pending(const struct sock *sk) { - return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); + const struct dccp_sock *dp = dccp_sk(sk); + return dp->dccps_timestamp_echo != 0 || +#ifdef CONFIG_IP_DCCP_ACKVEC + (dccp_msk(sk)->dccpms_send_ack_vector && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || +#endif + inet_csk_ack_scheduled(sk); } -extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); -extern int dccp_feat_finalise_settings(struct dccp_sock *dp); -extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); -extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, - struct sk_buff *skb); -extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); -extern void dccp_feat_list_purge(struct list_head *fn_list); - extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); extern int dccp_insert_option_elapsed_time(struct sock *sk, diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 93aae7c9555..d8a3509b26f 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -29,7 +29,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_backoff = icsk->icsk_backoff; info->tcpi_pmtu = icsk->icsk_pmtu_cookie; - if (dp->dccps_hc_rx_ackvec != NULL) + if (dccp_msk(sk)->dccpms_send_ack_vector) info->tcpi_options |= TCPI_OPT_SACK; ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index f94c7c9d1a7..933a0ecf8d4 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1,19 +1,11 @@ /* * net/dccp/feat.c * - * Feature negotiation for the DCCP protocol (RFC 4340, section 6) - * - * Copyright (c) 2008 The University of Aberdeen, Scotland, UK - * Copyright (c) 2008 Gerrit Renker - * Rewrote from scratch, some bits from earlier code by - * Copyright (c) 2005 Andrea Bittau - * + * An implementation of the DCCP protocol + * Andrea Bittau * * ASSUMPTIONS * ----------- - * o Feature negotiation is coordinated with connection setup (as in TCP), wild - * changes of parameters of an established connection are not supported. - * o Changing NN values (Ack Ratio only) is supported in state OPEN/PARTOPEN. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -23,1510 +15,635 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + #include + #include "ccid.h" #include "feat.h" -/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */ -unsigned long sysctl_dccp_sequence_window __read_mostly = 100; -int sysctl_dccp_rx_ccid __read_mostly = 2, - sysctl_dccp_tx_ccid __read_mostly = 2; +#define DCCP_FEAT_SP_NOAGREE (-123) -/* - * Feature activation handlers. - * - * These all use an u64 argument, to provide enough room for NN/SP features. At - * this stage the negotiated values have been checked to be within their range. - */ -static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) +int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, + u8 *val, u8 len, gfp_t gfp) { - struct dccp_sock *dp = dccp_sk(sk); - struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any()); + struct dccp_opt_pend *opt; - if (new_ccid == NULL) - return -ENOMEM; + dccp_feat_debug(type, feature, *val); - if (rx) { - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - dp->dccps_hc_rx_ccid = new_ccid; - } else { - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_tx_ccid = new_ccid; + if (len > 3) { + DCCP_WARN("invalid length %d\n", len); + return -EINVAL; + } + /* XXX add further sanity checks */ + + /* check if that feature is already being negotiated */ + list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { + /* ok we found a negotiation for this option already */ + if (opt->dccpop_feat == feature && opt->dccpop_type == type) { + dccp_pr_debug("Replacing old\n"); + /* replace */ + BUG_ON(opt->dccpop_val == NULL); + kfree(opt->dccpop_val); + opt->dccpop_val = val; + opt->dccpop_len = len; + opt->dccpop_conf = 0; + return 0; + } } - return 0; -} -static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) -{ - struct dccp_sock *dp = dccp_sk(sk); + /* negotiation for a new feature */ + opt = kmalloc(sizeof(*opt), gfp); + if (opt == NULL) + return -ENOMEM; - if (rx) { - dp->dccps_r_seq_win = seq_win; - /* propagate changes to update SWL/SWH */ - dccp_update_gsr(sk, dp->dccps_gsr); - } else { - dp->dccps_l_seq_win = seq_win; - /* propagate changes to update AWL */ - dccp_update_gss(sk, dp->dccps_gss); - } - return 0; -} + opt->dccpop_type = type; + opt->dccpop_feat = feature; + opt->dccpop_len = len; + opt->dccpop_val = val; + opt->dccpop_conf = 0; + opt->dccpop_sc = NULL; -static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx) -{ -#ifndef __CCID2_COPES_GRACEFULLY_WITH_DYNAMIC_ACK_RATIO_UPDATES__ - /* - * FIXME: This is required until several problems in the CCID-2 code are - * resolved. The CCID-2 code currently does not cope well; using dynamic - * Ack Ratios greater than 1 caused instabilities. These were manifest - * in hangups and long RTO timeouts (1...3 seconds). Until this has been - * stabilised, it is safer not to activate dynamic Ack Ratio changes. - */ - dccp_pr_debug("Not changing %s Ack Ratio from 1 to %u\n", - rx ? "RX" : "TX", (u16)ratio); - ratio = 1; -#endif - if (rx) - dccp_sk(sk)->dccps_r_ack_ratio = ratio; - else - dccp_sk(sk)->dccps_l_ack_ratio = ratio; + BUG_ON(opt->dccpop_val == NULL); + + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending); return 0; } -static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx) +EXPORT_SYMBOL_GPL(dccp_feat_change); + +static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); + /* figure out if we are changing our CCID or the peer's */ + const int rx = type == DCCPO_CHANGE_R; + const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid; + struct ccid *new_ccid; + + /* Check if nothing is being changed. */ + if (ccid_nr == new_ccid_nr) + return 0; + + new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC); + if (new_ccid == NULL) + return -ENOMEM; if (rx) { - if (enable && dp->dccps_hc_rx_ackvec == NULL) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any()); - if (dp->dccps_hc_rx_ackvec == NULL) - return -ENOMEM; - } else if (!enable) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + dp->dccps_hc_rx_ccid = new_ccid; + dmsk->dccpms_rx_ccid = new_ccid_nr; + } else { + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_tx_ccid = new_ccid; + dmsk->dccpms_tx_ccid = new_ccid_nr; } - return 0; -} -static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx) -{ - if (!rx) - dccp_sk(sk)->dccps_send_ndp_count = (enable > 0); return 0; } -/* - * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that - * `rx' holds when the sending peer informs about his partial coverage via a - * ChangeR() option. In the other case, we are the sender and the receiver - * announces its coverage via ChangeL() options. The policy here is to honour - * such communication by enabling the corresponding partial coverage - but only - * if it has not been set manually before; the warning here means that all - * packets will be dropped. - */ -static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx) +static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val) { - struct dccp_sock *dp = dccp_sk(sk); + dccp_feat_debug(type, feat, val); - if (rx) - dp->dccps_pcrlen = cscov; - else { - if (dp->dccps_pcslen == 0) - dp->dccps_pcslen = cscov; - else if (cscov > dp->dccps_pcslen) - DCCP_WARN("CsCov %u too small, peer requires >= %u\n", - dp->dccps_pcslen, (u8)cscov); + switch (feat) { + case DCCPF_CCID: + return dccp_feat_update_ccid(sk, type, val); + default: + dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n", + dccp_feat_typename(type), feat); + break; } return 0; } -static const struct { - u8 feat_num; /* DCCPF_xxx */ - enum dccp_feat_type rxtx; /* RX or TX */ - enum dccp_feat_type reconciliation; /* SP or NN */ - u8 default_value; /* as in 6.4 */ - int (*activation_hdlr)(struct sock *sk, u64 val, bool rx); -/* - * Lookup table for location and type of features (from RFC 4340/4342) - * +--------------------------+----+-----+----+----+---------+-----------+ - * | Feature | Location | Reconc. | Initial | Section | - * | | RX | TX | SP | NN | Value | Reference | - * +--------------------------+----+-----+----+----+---------+-----------+ - * | DCCPF_CCID | | X | X | | 2 | 10 | - * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 | - * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 | - * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 | - * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 | - * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 | - * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 | - * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 | - * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 | - * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 | - * +--------------------------+----+-----+----+----+---------+-----------+ - */ -} dccp_feat_table[] = { - { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2, dccp_hdlr_ccid }, - { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0, NULL }, - { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win }, - { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0, NULL }, - { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2, dccp_hdlr_ack_ratio}, - { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_ackvec }, - { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0, dccp_hdlr_ndp }, - { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_min_cscov}, - { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0, NULL }, - { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0, NULL }, -}; -#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table) - -/** - * dccp_feat_index - Hash function to map feature number into array position - * Returns consecutive array index or -1 if the feature is not understood. - */ -static int dccp_feat_index(u8 feat_num) +static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, + u8 *rpref, u8 rlen) { - /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */ - if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM) - return feat_num - 1; + struct dccp_sock *dp = dccp_sk(sk); + u8 *spref, slen, *res = NULL; + int i, j, rc, agree = 1; + BUG_ON(rpref == NULL); + + /* check if we are the black sheep */ + if (dp->dccps_role == DCCP_ROLE_CLIENT) { + spref = rpref; + slen = rlen; + rpref = opt->dccpop_val; + rlen = opt->dccpop_len; + } else { + spref = opt->dccpop_val; + slen = opt->dccpop_len; + } /* - * Other features: add cases for new feature types here after adding - * them to the above table. + * Now we have server preference list in spref and client preference in + * rpref */ - switch (feat_num) { - case DCCPF_SEND_LEV_RATE: - return DCCP_FEAT_SUPPORTED_MAX - 1; - } - return -1; -} - -static u8 dccp_feat_type(u8 feat_num) -{ - int idx = dccp_feat_index(feat_num); - - if (idx < 0) - return FEAT_UNKNOWN; - return dccp_feat_table[idx].reconciliation; -} + BUG_ON(spref == NULL); + BUG_ON(rpref == NULL); -static int dccp_feat_default_value(u8 feat_num) -{ - int idx = dccp_feat_index(feat_num); + /* FIXME sanity check vals */ - return idx < 0 ? : dccp_feat_table[idx].default_value; -} - -/* - * Debugging and verbose-printing section - */ -static const char *dccp_feat_fname(const u8 feat) -{ - static const char *feature_names[] = { - [DCCPF_RESERVED] = "Reserved", - [DCCPF_CCID] = "CCID", - [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", - [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", - [DCCPF_ECN_INCAPABLE] = "ECN Incapable", - [DCCPF_ACK_RATIO] = "Ack Ratio", - [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", - [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", - [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", - [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", - }; - if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) - return feature_names[DCCPF_RESERVED]; - - if (feat == DCCPF_SEND_LEV_RATE) - return "Send Loss Event Rate"; - if (feat >= DCCPF_MIN_CCID_SPECIFIC) - return "CCID-specific"; - - return feature_names[feat]; -} - -static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING", - "UNSTABLE", "STABLE" }; - -#ifdef CONFIG_IP_DCCP_DEBUG -static const char *dccp_feat_oname(const u8 opt) -{ - switch (opt) { - case DCCPO_CHANGE_L: return "Change_L"; - case DCCPO_CONFIRM_L: return "Confirm_L"; - case DCCPO_CHANGE_R: return "Change_R"; - case DCCPO_CONFIRM_R: return "Confirm_R"; + /* Are values in any order? XXX Lame "algorithm" here */ + for (i = 0; i < slen; i++) { + for (j = 0; j < rlen; j++) { + if (spref[i] == rpref[j]) { + res = &spref[i]; + break; + } + } + if (res) + break; } - return NULL; -} -static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val) -{ - u8 i, type = dccp_feat_type(feat_num); - - if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL)) - dccp_pr_debug_cat("(NULL)"); - else if (type == FEAT_SP) - for (i = 0; i < val->sp.len; i++) - dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]); - else if (type == FEAT_NN) - dccp_pr_debug_cat("%llu", (unsigned long long)val->nn); - else - dccp_pr_debug_cat("unknown type %u", type); -} - -static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len) -{ - u8 type = dccp_feat_type(feat_num); - dccp_feat_val fval = { .sp.vec = list, .sp.len = len }; - - if (type == FEAT_NN) - fval.nn = dccp_decode_value_var(list, len); - dccp_feat_printval(feat_num, &fval); -} + /* we didn't agree on anything */ + if (res == NULL) { + /* confirm previous value */ + switch (opt->dccpop_feat) { + case DCCPF_CCID: + /* XXX did i get this right? =P */ + if (opt->dccpop_type == DCCPO_CHANGE_L) + res = &dccp_msk(sk)->dccpms_tx_ccid; + else + res = &dccp_msk(sk)->dccpms_rx_ccid; + break; -static void dccp_feat_print_entry(struct dccp_feat_entry const *entry) -{ - dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote", - dccp_feat_fname(entry->feat_num)); - dccp_feat_printval(entry->feat_num, &entry->val); - dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state], - entry->needs_confirm ? "(Confirm pending)" : ""); -} + default: + DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat); + /* XXX implement res */ + return -EFAULT; + } -#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \ - dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\ - dccp_feat_printvals(feat, val, len); \ - dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0) - -#define dccp_feat_print_fnlist(fn_list) { \ - const struct dccp_feat_entry *___entry; \ - \ - dccp_pr_debug("List Dump:\n"); \ - list_for_each_entry(___entry, fn_list, node) \ - dccp_feat_print_entry(___entry); \ -} -#else /* ! CONFIG_IP_DCCP_DEBUG */ -#define dccp_feat_print_opt(opt, feat, val, len, mandatory) -#define dccp_feat_print_fnlist(fn_list) -#endif + dccp_pr_debug("Don't agree... reconfirming %d\n", *res); + agree = 0; /* this is used for mandatory options... */ + } -static int __dccp_feat_activate(struct sock *sk, const int idx, - const bool is_local, dccp_feat_val const *fval) -{ - bool rx; - u64 val; + /* need to put result and our preference list */ + rlen = 1 + opt->dccpop_len; + rpref = kmalloc(rlen, GFP_ATOMIC); + if (rpref == NULL) + return -ENOMEM; - if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX) - return -1; - if (dccp_feat_table[idx].activation_hdlr == NULL) - return 0; + *rpref = *res; + memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len); - if (fval == NULL) { - val = dccp_feat_table[idx].default_value; - } else if (dccp_feat_table[idx].reconciliation == FEAT_SP) { - if (fval->sp.vec == NULL) { - /* - * This can happen when an empty Confirm is sent - * for an SP (i.e. known) feature. In this case - * we would be using the default anyway. - */ - DCCP_CRIT("Feature #%d undefined: using default", idx); - val = dccp_feat_table[idx].default_value; - } else { - val = fval->sp.vec[0]; + /* put it in the "confirm queue" */ + if (opt->dccpop_sc == NULL) { + opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC); + if (opt->dccpop_sc == NULL) { + kfree(rpref); + return -ENOMEM; } } else { - val = fval->nn; + /* recycle the confirm slot */ + BUG_ON(opt->dccpop_sc->dccpoc_val == NULL); + kfree(opt->dccpop_sc->dccpoc_val); + dccp_pr_debug("recycling confirm slot\n"); + } + memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc)); + + opt->dccpop_sc->dccpoc_val = rpref; + opt->dccpop_sc->dccpoc_len = rlen; + + /* update the option on our side [we are about to send the confirm] */ + rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res); + if (rc) { + kfree(opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc); + opt->dccpop_sc = NULL; + return rc; } - /* Location is RX if this is a local-RX or remote-TX feature */ - rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); - - dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX", - dccp_feat_fname(dccp_feat_table[idx].feat_num), - fval ? "" : "default ", (unsigned long long)val); - - return dccp_feat_table[idx].activation_hdlr(sk, val, rx); -} - -/** - * dccp_feat_activate - Activate feature value on socket - * @sk: fully connected DCCP socket (after handshake is complete) - * @feat_num: feature to activate, one of %dccp_feature_numbers - * @local: whether local (1) or remote (0) @feat_num is meant - * @fval: the value (SP or NN) to activate, or NULL to use the default value - * For general use this function is preferable over __dccp_feat_activate(). - */ -static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, - dccp_feat_val const *fval) -{ - return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); -} - -/* Test for "Req'd" feature (RFC 4340, 6.4) */ -static inline int dccp_feat_must_be_understood(u8 feat_num) -{ - return feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS || - feat_num == DCCPF_SEQUENCE_WINDOW; -} + dccp_pr_debug("Will confirm %d\n", *rpref); -/* copy constructor, fval must not already contain allocated memory */ -static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len) -{ - fval->sp.len = len; - if (fval->sp.len > 0) { - fval->sp.vec = kmemdup(val, len, gfp_any()); - if (fval->sp.vec == NULL) { - fval->sp.len = 0; - return -ENOBUFS; - } + /* say we want to change to X but we just got a confirm X, suppress our + * change + */ + if (!opt->dccpop_conf) { + if (*opt->dccpop_val == *res) + opt->dccpop_conf = 1; + dccp_pr_debug("won't ask for change of same feature\n"); } - return 0; -} -static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val) -{ - if (unlikely(val == NULL)) - return; - if (dccp_feat_type(feat_num) == FEAT_SP) - kfree(val->sp.vec); - memset(val, 0, sizeof(*val)); + return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */ } -static struct dccp_feat_entry * - dccp_feat_clone_entry(struct dccp_feat_entry const *original) +static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) { - struct dccp_feat_entry *new; - u8 type = dccp_feat_type(original->feat_num); - - if (type == FEAT_UNKNOWN) - return NULL; + struct dccp_minisock *dmsk = dccp_msk(sk); + struct dccp_opt_pend *opt; + int rc = 1; + u8 t; - new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any()); - if (new == NULL) - return NULL; + /* + * We received a CHANGE. We gotta match it against our own preference + * list. If we got a CHANGE_R it means it's a change for us, so we need + * to compare our CHANGE_L list. + */ + if (type == DCCPO_CHANGE_L) + t = DCCPO_CHANGE_R; + else + t = DCCPO_CHANGE_L; - if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val, - original->val.sp.vec, - original->val.sp.len)) { - kfree(new); - return NULL; - } - return new; -} + /* find our preference list for this feature */ + list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { + if (opt->dccpop_type != t || opt->dccpop_feat != feature) + continue; -static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry) -{ - if (entry != NULL) { - dccp_feat_val_destructor(entry->feat_num, &entry->val); - kfree(entry); + /* find the winner from the two preference lists */ + rc = dccp_feat_reconcile(sk, opt, val, len); + break; } -} -/* - * List management functions - * - * Feature negotiation lists rely on and maintain the following invariants: - * - each feat_num in the list is known, i.e. we know its type and default value - * - each feat_num/is_local combination is unique (old entries are overwritten) - * - SP values are always freshly allocated - * - list is sorted in increasing order of feature number (faster lookup) - */ -static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list, - u8 feat_num, bool is_local) -{ - struct dccp_feat_entry *entry; + /* We didn't deal with the change. This can happen if we have no + * preference list for the feature. In fact, it just shouldn't + * happen---if we understand a feature, we should have a preference list + * with at least the default value. + */ + BUG_ON(rc == 1); - list_for_each_entry(entry, fn_list, node) - if (entry->feat_num == feat_num && entry->is_local == is_local) - return entry; - else if (entry->feat_num > feat_num) - break; - return NULL; + return rc; } -/** - * dccp_feat_entry_new - Central list update routine (called by all others) - * @head: list to add to - * @feat: feature number - * @local: whether the local (1) or remote feature with number @feat is meant - * This is the only constructor and serves to ensure the above invariants. - */ -static struct dccp_feat_entry * - dccp_feat_entry_new(struct list_head *head, u8 feat, bool local) +static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) { - struct dccp_feat_entry *entry; - - list_for_each_entry(entry, head, node) - if (entry->feat_num == feat && entry->is_local == local) { - dccp_feat_val_destructor(entry->feat_num, &entry->val); - return entry; - } else if (entry->feat_num > feat) { - head = &entry->node; - break; - } + struct dccp_opt_pend *opt; + struct dccp_minisock *dmsk = dccp_msk(sk); + u8 *copy; + int rc; - entry = kmalloc(sizeof(*entry), gfp_any()); - if (entry != NULL) { - entry->feat_num = feat; - entry->is_local = local; - list_add_tail(&entry->node, head); + /* NN features must be Change L (sec. 6.3.2) */ + if (type != DCCPO_CHANGE_L) { + dccp_pr_debug("received %s for NN feature %d\n", + dccp_feat_typename(type), feature); + return -EFAULT; } - return entry; -} -/** - * dccp_feat_push_change - Add/overwrite a Change option in the list - * @fn_list: feature-negotiation list to update - * @feat: one of %dccp_feature_numbers - * @local: whether local (1) or remote (0) @feat_num is meant - * @needs_mandatory: whether to use Mandatory feature negotiation options - * @fval: pointer to NN/SP value to be inserted (will be copied) - */ -static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, - u8 mandatory, dccp_feat_val *fval) -{ - struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); + /* XXX sanity check opt val */ - if (new == NULL) + /* copy option so we can confirm it */ + opt = kzalloc(sizeof(*opt), GFP_ATOMIC); + if (opt == NULL) return -ENOMEM; - new->feat_num = feat; - new->is_local = local; - new->state = FEAT_INITIALISING; - new->needs_confirm = 0; - new->empty_confirm = 0; - new->val = *fval; - new->needs_mandatory = mandatory; + copy = kmemdup(val, len, GFP_ATOMIC); + if (copy == NULL) { + kfree(opt); + return -ENOMEM; + } - return 0; -} + opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */ + opt->dccpop_feat = feature; + opt->dccpop_val = copy; + opt->dccpop_len = len; -/** - * dccp_feat_push_confirm - Add a Confirm entry to the FN list - * @fn_list: feature-negotiation list to add to - * @feat: one of %dccp_feature_numbers - * @local: whether local (1) or remote (0) @feat_num is being confirmed - * @fval: pointer to NN/SP value to be inserted or NULL - * Returns 0 on success, a Reset code for further processing otherwise. - */ -static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local, - dccp_feat_val *fval) -{ - struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local); + /* change feature */ + rc = dccp_feat_update(sk, type, feature, *val); + if (rc) { + kfree(opt->dccpop_val); + kfree(opt); + return rc; + } - if (new == NULL) - return DCCP_RESET_CODE_TOO_BUSY; + dccp_feat_debug(type, feature, *copy); - new->feat_num = feat; - new->is_local = local; - new->state = FEAT_STABLE; /* transition in 6.6.2 */ - new->needs_confirm = 1; - new->empty_confirm = (fval == NULL); - new->val.nn = 0; /* zeroes the whole structure */ - if (!new->empty_confirm) - new->val = *fval; - new->needs_mandatory = 0; + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); return 0; } -static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local) +static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, + u8 type, u8 feature) { - return dccp_feat_push_confirm(fn_list, feat, local, NULL); -} + /* XXX check if other confirms for that are queued and recycle slot */ + struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC); -static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry) -{ - list_del(&entry->node); - dccp_feat_entry_destructor(entry); -} - -void dccp_feat_list_purge(struct list_head *fn_list) -{ - struct dccp_feat_entry *entry, *next; - - list_for_each_entry_safe(entry, next, fn_list, node) - dccp_feat_entry_destructor(entry); - INIT_LIST_HEAD(fn_list); -} -EXPORT_SYMBOL_GPL(dccp_feat_list_purge); - -/* generate @to as full clone of @from - @to must not contain any nodes */ -int dccp_feat_clone_list(struct list_head const *from, struct list_head *to) -{ - struct dccp_feat_entry *entry, *new; - - INIT_LIST_HEAD(to); - list_for_each_entry(entry, from, node) { - new = dccp_feat_clone_entry(entry); - if (new == NULL) - goto cloning_failed; - list_add_tail(&new->node, to); + if (opt == NULL) { + /* XXX what do we do? Ignoring should be fine. It's a change + * after all =P + */ + return; } - return 0; -cloning_failed: - dccp_feat_list_purge(to); - return -ENOMEM; -} + switch (type) { + case DCCPO_CHANGE_L: + opt->dccpop_type = DCCPO_CONFIRM_R; + break; + case DCCPO_CHANGE_R: + opt->dccpop_type = DCCPO_CONFIRM_L; + break; + default: + DCCP_WARN("invalid type %d\n", type); + kfree(opt); + return; + } + opt->dccpop_feat = feature; + opt->dccpop_val = NULL; + opt->dccpop_len = 0; -/** - * dccp_feat_valid_nn_length - Enforce length constraints on NN options - * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only, - * incoming options are accepted as long as their values are valid. - */ -static u8 dccp_feat_valid_nn_length(u8 feat_num) -{ - if (feat_num == DCCPF_ACK_RATIO) /* RFC 4340, 11.3 and 6.6.8 */ - return 2; - if (feat_num == DCCPF_SEQUENCE_WINDOW) /* RFC 4340, 7.5.2 and 6.5 */ - return 6; - return 0; -} + /* change feature */ + dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature); -static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val) -{ - switch (feat_num) { - case DCCPF_ACK_RATIO: - return val <= DCCPF_ACK_RATIO_MAX; - case DCCPF_SEQUENCE_WINDOW: - return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX; - } - return 0; /* feature unknown - so we can't tell */ + list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf); } -/* check that SP values are within the ranges defined in RFC 4340 */ -static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val) +static void dccp_feat_flush_confirm(struct sock *sk) { - switch (feat_num) { - case DCCPF_CCID: - return val == DCCPC_CCID2 || val == DCCPC_CCID3; - /* Type-check Boolean feature values: */ - case DCCPF_SHORT_SEQNOS: - case DCCPF_ECN_INCAPABLE: - case DCCPF_SEND_ACK_VECTOR: - case DCCPF_SEND_NDP_COUNT: - case DCCPF_DATA_CHECKSUM: - case DCCPF_SEND_LEV_RATE: - return val < 2; - case DCCPF_MIN_CSUM_COVER: - return val < 16; - } - return 0; /* feature unknown */ -} + struct dccp_minisock *dmsk = dccp_msk(sk); + /* Check if there is anything to confirm in the first place */ + int yes = !list_empty(&dmsk->dccpms_conf); -static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len) -{ - if (sp_list == NULL || sp_len < 1) - return 0; - while (sp_len--) - if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++)) - return 0; - return 1; -} + if (!yes) { + struct dccp_opt_pend *opt; -/** - * dccp_feat_insert_opts - Generate FN options from current list state - * @skb: next sk_buff to be sent to the peer - * @dp: for client during handshake and general negotiation - * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND) - */ -int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, - struct sk_buff *skb) -{ - struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg; - struct dccp_feat_entry *pos, *next; - u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN]; - bool rpt; - - /* put entries into @skb in the order they appear in the list */ - list_for_each_entry_safe_reverse(pos, next, fn, node) { - opt = dccp_feat_genopt(pos); - type = dccp_feat_type(pos->feat_num); - rpt = false; - - if (pos->empty_confirm) { - len = 0; - ptr = NULL; - } else { - if (type == FEAT_SP) { - len = pos->val.sp.len; - ptr = pos->val.sp.vec; - rpt = pos->needs_confirm; - } else if (type == FEAT_NN) { - len = dccp_feat_valid_nn_length(pos->feat_num); - ptr = nn_in_nbo; - dccp_encode_value_var(pos->val.nn, ptr, len); - } else { - DCCP_BUG("unknown feature %u", pos->feat_num); - return -1; + list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { + if (opt->dccpop_conf) { + yes = 1; + break; } } - dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0); - - if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) - return -1; - if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) - return -1; - /* - * Enter CHANGING after transmitting the Change option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; } - return 0; -} - -/** - * __feat_register_nn - Register new NN value on socket - * @fn: feature-negotiation list to register with - * @feat: an NN feature from %dccp_feature_numbers - * @mandatory: use Mandatory option if 1 - * @nn_val: value to register (restricted to 4 bytes) - * Note that NN features are local by definition (RFC 4340, 6.3.2). - */ -static int __feat_register_nn(struct list_head *fn, u8 feat, - u8 mandatory, u64 nn_val) -{ - dccp_feat_val fval = { .nn = nn_val }; - - if (dccp_feat_type(feat) != FEAT_NN || - !dccp_feat_is_valid_nn_val(feat, nn_val)) - return -EINVAL; - - /* Don't bother with default values, they will be activated anyway. */ - if (nn_val - (u64)dccp_feat_default_value(feat) == 0) - return 0; - - return dccp_feat_push_change(fn, feat, 1, mandatory, &fval); -} - -/** - * __feat_register_sp - Register new SP value/list on socket - * @fn: feature-negotiation list to register with - * @feat: an SP feature from %dccp_feature_numbers - * @is_local: whether the local (1) or the remote (0) @feat is meant - * @mandatory: use Mandatory option if 1 - * @sp_val: SP value followed by optional preference list - * @sp_len: length of @sp_val in bytes - */ -static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, - u8 mandatory, u8 const *sp_val, u8 sp_len) -{ - dccp_feat_val fval; - if (dccp_feat_type(feat) != FEAT_SP || - !dccp_feat_sp_list_ok(feat, sp_val, sp_len)) - return -EINVAL; - - /* Avoid negotiating alien CCIDs by only advertising supported ones */ - if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len)) - return -EOPNOTSUPP; - - if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) - return -ENOMEM; + if (!yes) + return; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + /* OK there is something to confirm... */ + /* XXX check if packet is in flight? Send delayed ack?? */ + if (sk->sk_state == DCCP_OPEN) + dccp_send_ack(sk); } -/** - * dccp_feat_register_sp - Register requests to change SP feature values - * @sk: client or listening socket - * @feat: one of %dccp_feature_numbers - * @is_local: whether the local (1) or remote (0) @feat is meant - * @list: array of preferred values, in descending order of preference - * @len: length of @list in bytes - */ -int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, - u8 const *list, u8 len) -{ /* any changes must be registered before establishing the connection */ - if (sk->sk_state != DCCP_CLOSED) - return -EISCONN; - if (dccp_feat_type(feat) != FEAT_SP) - return -EINVAL; - return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local, - 0, list, len); -} - -/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */ -int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val) +int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len) { - /* any changes must be registered before establishing the connection */ - if (sk->sk_state != DCCP_CLOSED) - return -EISCONN; - if (dccp_feat_type(feat) != FEAT_NN) - return -EINVAL; - return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val); -} + int rc; -/** - * dccp_feat_signal_nn_change - Update NN values for an established connection - * @sk: DCCP socket of an established connection - * @feat: NN feature number from %dccp_feature_numbers - * @nn_val: the new value to use - * This function is used to communicate NN updates out-of-band. The difference - * to feature negotiation during connection setup is that values are activated - * immediately after validation, i.e. we don't wait for the Confirm: either the - * value is accepted by the peer (and then the waiting is futile), or it is not - * (Reset or empty Confirm). We don't accept empty Confirms - transmitted values - * are validated, and the peer "MUST accept any valid value" (RFC 4340, 6.3.2). - */ -int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) -{ - struct list_head *fn = &dccp_sk(sk)->dccps_featneg; - dccp_feat_val fval = { .nn = nn_val }; - struct dccp_feat_entry *entry; + dccp_feat_debug(type, feature, *val); - if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) - return 0; + /* figure out if it's SP or NN feature */ + switch (feature) { + /* deal with SP features */ + case DCCPF_CCID: + rc = dccp_feat_sp(sk, type, feature, val, len); + break; - if (dccp_feat_type(feat) != FEAT_NN || - !dccp_feat_is_valid_nn_val(feat, nn_val)) - return -EINVAL; + /* deal with NN features */ + case DCCPF_ACK_RATIO: + rc = dccp_feat_nn(sk, type, feature, val, len); + break; - entry = dccp_feat_list_lookup(fn, feat, 1); - if (entry != NULL) { - dccp_pr_debug("Ignoring %llu, entry %llu exists in state %s\n", - (unsigned long long)nn_val, - (unsigned long long)entry->val.nn, - dccp_feat_sname[entry->state]); - return 0; + /* XXX implement other features */ + default: + dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n", + dccp_feat_typename(type), feature); + rc = -EFAULT; + break; } - if (dccp_feat_activate(sk, feat, 1, &fval)) - return -EADV; - - inet_csk_schedule_ack(sk); - return dccp_feat_push_change(fn, feat, 1, 0, &fval); -} -EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); - -/* - * Tracking features whose value depend on the choice of CCID - * - * This is designed with an extension in mind so that a list walk could be done - * before activating any features. However, the existing framework was found to - * work satisfactorily up until now, the automatic verification is left open. - * When adding new CCIDs, add a corresponding dependency table here. - */ -static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local) -{ - static const struct ccid_dependency ccid2_dependencies[2][2] = { - /* - * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX - * feature and Send Ack Vector is an RX feature, `is_local' - * needs to be reversed. + /* check if there were problems changing features */ + if (rc) { + /* If we don't agree on SP, we sent a confirm for old value. + * However we propagate rc to caller in case option was + * mandatory */ - { /* Dependencies of the receiver-side (remote) CCID2 */ - { - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = true, - .is_mandatory = true, - .val = 1 - }, - { 0, 0, 0, 0 } - }, - { /* Dependencies of the sender-side (local) CCID2 */ - { - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = false, - .is_mandatory = true, - .val = 1 - }, - { 0, 0, 0, 0 } - } - }; - static const struct ccid_dependency ccid3_dependencies[2][5] = { - { /* - * Dependencies of the receiver-side CCID3 - */ - { /* locally disable Ack Vectors */ - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = true, - .is_mandatory = false, - .val = 0 - }, - { /* see below why Send Loss Event Rate is on */ - .dependent_feat = DCCPF_SEND_LEV_RATE, - .is_local = true, - .is_mandatory = true, - .val = 1 - }, - { /* NDP Count is needed as per RFC 4342, 6.1.1 */ - .dependent_feat = DCCPF_SEND_NDP_COUNT, - .is_local = false, - .is_mandatory = true, - .val = 1 - }, - { 0, 0, 0, 0 }, - }, - { /* - * CCID3 at the TX side: we request that the HC-receiver - * will not send Ack Vectors (they will be ignored, so - * Mandatory is not set); we enable Send Loss Event Rate - * (Mandatory since the implementation does not support - * the Loss Intervals option of RFC 4342, 8.6). - * The last two options are for peer's information only. - */ - { - .dependent_feat = DCCPF_SEND_ACK_VECTOR, - .is_local = false, - .is_mandatory = false, - .val = 0 - }, - { - .dependent_feat = DCCPF_SEND_LEV_RATE, - .is_local = false, - .is_mandatory = true, - .val = 1 - }, - { /* this CCID does not support Ack Ratio */ - .dependent_feat = DCCPF_ACK_RATIO, - .is_local = true, - .is_mandatory = false, - .val = 0 - }, - { /* tell receiver we are sending NDP counts */ - .dependent_feat = DCCPF_SEND_NDP_COUNT, - .is_local = true, - .is_mandatory = false, - .val = 1 - }, - { 0, 0, 0, 0 } - } - }; - switch (ccid) { - case DCCPC_CCID2: - return ccid2_dependencies[is_local]; - case DCCPC_CCID3: - return ccid3_dependencies[is_local]; - default: - return NULL; + if (rc != DCCP_FEAT_SP_NOAGREE) + dccp_feat_empty_confirm(dccp_msk(sk), type, feature); } -} -/** - * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID - * @fn: feature-negotiation list to update - * @id: CCID number to track - * @is_local: whether TX CCID (1) or RX CCID (0) is meant - * This function needs to be called after registering all other features. - */ -static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local) -{ - const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local); - int i, rc = (table == NULL); - - for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++) - if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP) - rc = __feat_register_sp(fn, table[i].dependent_feat, - table[i].is_local, - table[i].is_mandatory, - &table[i].val, 1); - else - rc = __feat_register_nn(fn, table[i].dependent_feat, - table[i].is_mandatory, - table[i].val); - return rc; -} - -/** - * dccp_feat_finalise_settings - Finalise settings before starting negotiation - * @dp: client or listening socket (settings will be inherited) - * This is called after all registrations (socket initialisation, sysctls, and - * sockopt calls), and before sending the first packet containing Change options - * (ie. client-Request or server-Response), to ensure internal consistency. - */ -int dccp_feat_finalise_settings(struct dccp_sock *dp) -{ - struct list_head *fn = &dp->dccps_featneg; - struct dccp_feat_entry *entry; - int i = 2, ccids[2] = { -1, -1 }; + /* generate the confirm [if required] */ + dccp_feat_flush_confirm(sk); - /* - * Propagating CCIDs: - * 1) not useful to propagate CCID settings if this host advertises more - * than one CCID: the choice of CCID may still change - if this is - * the client, or if this is the server and the client sends - * singleton CCID values. - * 2) since is that propagate_ccid changes the list, we defer changing - * the sorted list until after the traversal. - */ - list_for_each_entry(entry, fn, node) - if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1) - ccids[entry->is_local] = entry->val.sp.vec[0]; - while (i--) - if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) - return -1; - dccp_feat_print_fnlist(fn); - return 0; + return rc; } -/** - * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features - * It is the server which resolves the dependencies once the CCID has been - * fully negotiated. If no CCID has been negotiated, it uses the default CCID. - */ -int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq) -{ - struct list_head *fn = &dreq->dreq_featneg; - struct dccp_feat_entry *entry; - u8 is_local, ccid; - - for (is_local = 0; is_local <= 1; is_local++) { - entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local); - - if (entry != NULL && !entry->empty_confirm) - ccid = entry->val.sp.vec[0]; - else - ccid = dccp_feat_default_value(DCCPF_CCID); - - if (dccp_feat_propagate_ccid(fn, ccid, is_local)) - return -1; - } - return 0; -} +EXPORT_SYMBOL_GPL(dccp_feat_change_recv); -/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */ -static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen) +int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len) { - u8 c, s; + u8 t; + struct dccp_opt_pend *opt; + struct dccp_minisock *dmsk = dccp_msk(sk); + int found = 0; + int all_confirmed = 1; - for (s = 0; s < slen; s++) - for (c = 0; c < clen; c++) - if (servlist[s] == clilist[c]) - return servlist[s]; - return -1; -} + dccp_feat_debug(type, feature, *val); -/** - * dccp_feat_prefer - Move preferred entry to the start of array - * Reorder the @array_len elements in @array so that @preferred_value comes - * first. Returns >0 to indicate that @preferred_value does occur in @array. - */ -static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len) -{ - u8 i, does_occur = 0; + /* locate our change request */ + switch (type) { + case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; + case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; + default: DCCP_WARN("invalid type %d\n", type); + return 1; - if (array != NULL) { - for (i = 0; i < array_len; i++) - if (array[i] == preferred_value) { - array[i] = array[0]; - does_occur++; - } - if (does_occur) - array[0] = preferred_value; } - return does_occur; -} + /* XXX sanity check feature value */ -/** - * dccp_feat_reconcile - Reconcile SP preference lists - * @fval: SP list to reconcile into - * @arr: received SP preference list - * @len: length of @arr in bytes - * @is_server: whether this side is the server (and @fv is the server's list) - * @reorder: whether to reorder the list in @fv after reconciling with @arr - * When successful, > 0 is returned and the reconciled list is in @fval. - * A value of 0 means that negotiation failed (no shared entry). - */ -static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len, - bool is_server, bool reorder) -{ - int rc; + list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { + if (!opt->dccpop_conf && opt->dccpop_type == t && + opt->dccpop_feat == feature) { + found = 1; + dccp_pr_debug("feature %d found\n", opt->dccpop_feat); - if (!fv->sp.vec || !arr) { - DCCP_CRIT("NULL feature value or array"); - return 0; - } + /* XXX do sanity check */ - if (is_server) - rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len); - else - rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len); - - if (!reorder) - return rc; - if (rc < 0) - return 0; + opt->dccpop_conf = 1; - /* - * Reorder list: used for activating features and in dccp_insert_fn_opt. - */ - return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len); -} + /* We got a confirmation---change the option */ + dccp_feat_update(sk, opt->dccpop_type, + opt->dccpop_feat, *val); -/** - * dccp_feat_change_recv - Process incoming ChangeL/R options - * @fn: feature-negotiation list to update - * @is_mandatory: whether the Change was preceded by a Mandatory option - * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R - * @feat: one of %dccp_feature_numbers - * @val: NN value or SP value/preference list - * @len: length of @val in bytes - * @server: whether this node is the server (1) or the client (0) - */ -static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, - u8 feat, u8 *val, u8 len, const bool server) -{ - u8 defval, type = dccp_feat_type(feat); - const bool local = (opt == DCCPO_CHANGE_R); - struct dccp_feat_entry *entry; - dccp_feat_val fval; - - if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ - goto unknown_feature_or_value; - - dccp_feat_print_opt(opt, feat, val, len, is_mandatory); - - /* - * Negotiation of NN features: Change R is invalid, so there is no - * simultaneous negotiation; hence we do not look up in the list. - */ - if (type == FEAT_NN) { - if (local || len > sizeof(fval.nn)) - goto unknown_feature_or_value; - - /* 6.3.2: "The feature remote MUST accept any valid value..." */ - fval.nn = dccp_decode_value_var(val, len); - if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) - goto unknown_feature_or_value; + /* XXX check the return value of dccp_feat_update */ + break; + } - return dccp_feat_push_confirm(fn, feat, local, &fval); + if (!opt->dccpop_conf) + all_confirmed = 0; } - /* - * Unidirectional/simultaneous negotiation of SP features (6.3.1) + /* fix re-transmit timer */ + /* XXX gotta make sure that no option negotiation occurs during + * connection shutdown. Consider that the CLOSEREQ is sent and timer is + * on. if all options are confirmed it might kill timer which should + * remain alive until close is received. */ - entry = dccp_feat_list_lookup(fn, feat, local); - if (entry == NULL) { - /* - * No particular preferences have been registered. We deal with - * this situation by assuming that all valid values are equally - * acceptable, and apply the following checks: - * - if the peer's list is a singleton, we accept a valid value; - * - if we are the server, we first try to see if the peer (the - * client) advertises the default value. If yes, we use it, - * otherwise we accept the preferred value; - * - else if we are the client, we use the first list element. - */ - if (dccp_feat_clone_sp_val(&fval, val, 1)) - return DCCP_RESET_CODE_TOO_BUSY; - - if (len > 1 && server) { - defval = dccp_feat_default_value(feat); - if (dccp_feat_preflist_match(&defval, 1, val, len) > -1) - fval.sp.vec[0] = defval; - } else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) { - kfree(fval.sp.vec); - goto unknown_feature_or_value; - } - - /* Treat unsupported CCIDs like invalid values */ - if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) { - kfree(fval.sp.vec); - goto not_valid_or_not_known; - } - - return dccp_feat_push_confirm(fn, feat, local, &fval); - - } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */ - return 0; + if (all_confirmed) { + dccp_pr_debug("clear feat negotiation timer %p\n", sk); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } - if (dccp_feat_reconcile(&entry->val, val, len, server, true)) { - entry->empty_confirm = 0; - } else if (is_mandatory) { - return DCCP_RESET_CODE_MANDATORY_ERROR; - } else if (entry->state == FEAT_INITIALISING) { - /* - * Failed simultaneous negotiation (server only): try to `save' - * the connection by checking whether entry contains the default - * value for @feat. If yes, send an empty Confirm to signal that - * the received Change was not understood - which implies using - * the default value. - * If this also fails, we use Reset as the last resort. - */ - WARN_ON(!server); - defval = dccp_feat_default_value(feat); - if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true)) - return DCCP_RESET_CODE_OPTION_ERROR; - entry->empty_confirm = 1; - } - entry->needs_confirm = 1; - entry->needs_mandatory = 0; - entry->state = FEAT_STABLE; + if (!found) + dccp_pr_debug("%s(%d, ...) never requested\n", + dccp_feat_typename(type), feature); return 0; - -unknown_feature_or_value: - if (!is_mandatory) - return dccp_push_empty_confirm(fn, feat, local); - -not_valid_or_not_known: - return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR - : DCCP_RESET_CODE_OPTION_ERROR; } -/** - * dccp_feat_confirm_recv - Process received Confirm options - * @fn: feature-negotiation list to update - * @is_mandatory: whether @opt was preceded by a Mandatory option - * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R - * @feat: one of %dccp_feature_numbers - * @val: NN value or SP value/preference list - * @len: length of @val in bytes - * @server: whether this node is server (1) or client (0) - */ -static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt, - u8 feat, u8 *val, u8 len, const bool server) -{ - u8 *plist, plen, type = dccp_feat_type(feat); - const bool local = (opt == DCCPO_CONFIRM_R); - struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); - - dccp_feat_print_opt(opt, feat, val, len, is_mandatory); - - if (entry == NULL) { /* nothing queued: ignore or handle error */ - if (is_mandatory && type == FEAT_UNKNOWN) - return DCCP_RESET_CODE_MANDATORY_ERROR; - - if (!local && type == FEAT_NN) /* 6.3.2 */ - goto confirmation_failed; - return 0; - } - - if (entry->state != FEAT_CHANGING) /* 6.6.2 */ - return 0; - - if (len == 0) { - if (dccp_feat_must_be_understood(feat)) /* 6.6.7 */ - goto confirmation_failed; - /* - * Empty Confirm during connection setup: this means reverting - * to the `old' value, which in this case is the default. Since - * we handle default values automatically when no other values - * have been set, we revert to the old value by removing this - * entry from the list. - */ - dccp_feat_list_pop(entry); - return 0; - } +EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv); - if (type == FEAT_NN) { - if (len > sizeof(entry->val.nn)) - goto confirmation_failed; +void dccp_feat_clean(struct dccp_minisock *dmsk) +{ + struct dccp_opt_pend *opt, *next; - if (entry->val.nn == dccp_decode_value_var(val, len)) - goto confirmation_succeeded; + list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending, + dccpop_node) { + BUG_ON(opt->dccpop_val == NULL); + kfree(opt->dccpop_val); - DCCP_WARN("Bogus Confirm for non-existing value\n"); - goto confirmation_failed; - } + if (opt->dccpop_sc != NULL) { + BUG_ON(opt->dccpop_sc->dccpoc_val == NULL); + kfree(opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc); + } - /* - * Parsing SP Confirms: the first element of @val is the preferred - * SP value which the peer confirms, the remainder depends on @len. - * Note that only the confirmed value need to be a valid SP value. - */ - if (!dccp_feat_is_valid_sp_val(feat, *val)) - goto confirmation_failed; - - if (len == 1) { /* peer didn't supply a preference list */ - plist = val; - plen = len; - } else { /* preferred value + preference list */ - plist = val + 1; - plen = len - 1; + kfree(opt); } + INIT_LIST_HEAD(&dmsk->dccpms_pending); - /* Check whether the peer got the reconciliation right (6.6.8) */ - if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) { - DCCP_WARN("Confirm selected the wrong value %u\n", *val); - return DCCP_RESET_CODE_OPTION_ERROR; + list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) { + BUG_ON(opt == NULL); + if (opt->dccpop_val != NULL) + kfree(opt->dccpop_val); + kfree(opt); } - entry->val.sp.vec[0] = *val; - -confirmation_succeeded: - entry->state = FEAT_STABLE; - return 0; - -confirmation_failed: - DCCP_WARN("Confirmation failed\n"); - return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR - : DCCP_RESET_CODE_OPTION_ERROR; + INIT_LIST_HEAD(&dmsk->dccpms_conf); } -/** - * dccp_feat_handle_nn_established - Fast-path reception of NN options - * @sk: socket of an established DCCP connection - * @mandatory: whether @opt was preceded by a Mandatory option - * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) - * @feat: NN number, one of %dccp_feature_numbers - * @val: NN value - * @len: length of @val in bytes - * This function combines the functionality of change_recv/confirm_recv, with - * the following differences (reset codes are the same): - * - cleanup after receiving the Confirm; - * - values are directly activated after successful parsing; - * - deliberately restricted to NN features. - * The restriction to NN features is essential since SP features can have non- - * predictable outcomes (depending on the remote configuration), and are inter- - * dependent (CCIDs for instance cause further dependencies). +EXPORT_SYMBOL_GPL(dccp_feat_clean); + +/* this is to be called only when a listening sock creates its child. It is + * assumed by the function---the confirm is not duplicated, but rather it is + * "passed on". */ -static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, - u8 feat, u8 *val, u8 len) +int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) { - struct list_head *fn = &dccp_sk(sk)->dccps_featneg; - const bool local = (opt == DCCPO_CONFIRM_R); - struct dccp_feat_entry *entry; - u8 type = dccp_feat_type(feat); - dccp_feat_val fval; + struct dccp_minisock *olddmsk = dccp_msk(oldsk); + struct dccp_minisock *newdmsk = dccp_msk(newsk); + struct dccp_opt_pend *opt; + int rc = 0; - dccp_feat_print_opt(opt, feat, val, len, mandatory); + INIT_LIST_HEAD(&newdmsk->dccpms_pending); + INIT_LIST_HEAD(&newdmsk->dccpms_conf); - /* Ignore non-mandatory unknown and non-NN features */ - if (type == FEAT_UNKNOWN) { - if (local && !mandatory) - return 0; - goto fast_path_unknown; - } else if (type != FEAT_NN) { - return 0; - } - - /* - * We don't accept empty Confirms, since in fast-path feature - * negotiation the values are enabled immediately after sending - * the Change option. - * Empty Changes on the other hand are invalid (RFC 4340, 6.1). - */ - if (len == 0 || len > sizeof(fval.nn)) - goto fast_path_unknown; - - if (opt == DCCPO_CHANGE_L) { - fval.nn = dccp_decode_value_var(val, len); - if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) - goto fast_path_unknown; + list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) { + struct dccp_opt_pend *newopt; + /* copy the value of the option */ + u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC); - if (dccp_feat_push_confirm(fn, feat, local, &fval) || - dccp_feat_activate(sk, feat, local, &fval)) - return DCCP_RESET_CODE_TOO_BUSY; + if (val == NULL) + goto out_clean; - /* set the `Ack Pending' flag to piggyback a Confirm */ - inet_csk_schedule_ack(sk); - - } else if (opt == DCCPO_CONFIRM_R) { - entry = dccp_feat_list_lookup(fn, feat, local); - if (entry == NULL || entry->state != FEAT_CHANGING) - return 0; - - fval.nn = dccp_decode_value_var(val, len); - if (fval.nn != entry->val.nn) { - DCCP_WARN("Bogus Confirm for non-existing value\n"); - goto fast_path_failed; + newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC); + if (newopt == NULL) { + kfree(val); + goto out_clean; } - /* It has been confirmed - so remove the entry */ - dccp_feat_list_pop(entry); + /* insert the option */ + newopt->dccpop_val = val; + list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending); - } else { - DCCP_WARN("Received illegal option %u\n", opt); - goto fast_path_failed; + /* XXX what happens with backlogs and multiple connections at + * once... + */ + /* the master socket no longer needs to worry about confirms */ + opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */ + + /* reset state for a new socket */ + opt->dccpop_conf = 0; } - return 0; -fast_path_unknown: - if (!mandatory) - return dccp_push_empty_confirm(fn, feat, local); + /* XXX not doing anything about the conf queue */ + +out: + return rc; -fast_path_failed: - return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR - : DCCP_RESET_CODE_OPTION_ERROR; +out_clean: + dccp_feat_clean(newdmsk); + rc = -ENOMEM; + goto out; } -/** - * dccp_feat_parse_options - Process Feature-Negotiation Options - * @sk: for general use and used by the client during connection setup - * @dreq: used by the server during connection setup - * @mandatory: whether @opt was preceded by a Mandatory option - * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R - * @feat: one of %dccp_feature_numbers - * @val: value contents of @opt - * @len: length of @val in bytes - * Returns 0 on success, a Reset code for ending the connection otherwise. - */ -int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, - u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len) +EXPORT_SYMBOL_GPL(dccp_feat_clone); + +static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat, + u8 *val, u8 len) { - struct dccp_sock *dp = dccp_sk(sk); - struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg; - bool server = false; + int rc = -ENOMEM; + u8 *copy = kmemdup(val, len, GFP_KERNEL); - switch (sk->sk_state) { - /* - * Negotiation during connection setup - */ - case DCCP_LISTEN: - server = true; /* fall through */ - case DCCP_REQUESTING: - switch (opt) { - case DCCPO_CHANGE_L: - case DCCPO_CHANGE_R: - return dccp_feat_change_recv(fn, mandatory, opt, feat, - val, len, server); - case DCCPO_CONFIRM_R: - case DCCPO_CONFIRM_L: - return dccp_feat_confirm_recv(fn, mandatory, opt, feat, - val, len, server); - } - break; - /* - * Support for exchanging NN options on an established connection - * This is currently restricted to Ack Ratio (RFC 4341, 6.1.2) - */ - case DCCP_OPEN: - case DCCP_PARTOPEN: - return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, - val, len); + if (copy != NULL) { + rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL); + if (rc) + kfree(copy); } - return 0; /* ignore FN options in all other states */ + return rc; } -/** - * dccp_feat_init - Seed feature negotiation with host-specific defaults - * This initialises global defaults, depending on the value of the sysctls. - * These can later be overridden by registering changes via setsockopt calls. - * The last link in the chain is finalise_settings, to make sure that between - * here and the start of actual feature negotiation no inconsistencies enter. - * - * All features not appearing below use either defaults or are otherwise - * later adjusted through dccp_feat_finalise_settings(). - */ -int dccp_feat_init(struct sock *sk) +int dccp_feat_init(struct dccp_minisock *dmsk) { - struct list_head *fn = &dccp_sk(sk)->dccps_featneg; - u8 on = 1, off = 0; int rc; - struct { - u8 *val; - u8 len; - } tx, rx; - - /* Non-negotiable (NN) features */ - rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0, - sysctl_dccp_sequence_window); - if (rc) - return rc; - /* Server-priority (SP) features */ - - /* Advertise that short seqnos are not supported (7.6.1) */ - rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1); - if (rc) - return rc; + INIT_LIST_HEAD(&dmsk->dccpms_pending); + INIT_LIST_HEAD(&dmsk->dccpms_conf); - /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */ - rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1); + /* CCID L */ + rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID, + &dmsk->dccpms_tx_ccid, 1); if (rc) - return rc; - - /* - * We advertise the available list of CCIDs and reorder according to - * preferences, to avoid failure resulting from negotiating different - * singleton values (which always leads to failure). - * These settings can still (later) be overridden via sockopts. - */ - if (ccid_get_builtin_ccids(&tx.val, &tx.len) || - ccid_get_builtin_ccids(&rx.val, &rx.len)) - return -ENOBUFS; - - /* Pre-load all CCID modules that are going to be advertised */ - rc = -EUNATCH; - if (ccid_request_modules(tx.val, tx.len)) - goto free_ccid_lists; - - if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || - !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) - goto free_ccid_lists; + goto out; - rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len); + /* CCID R */ + rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID, + &dmsk->dccpms_rx_ccid, 1); if (rc) - goto free_ccid_lists; + goto out; - rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len); - -free_ccid_lists: - kfree(tx.val); - kfree(rx.val); + /* Ack ratio */ + rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO, + &dmsk->dccpms_ack_ratio, 1); +out: return rc; } -int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_feat_entry *cur, *next; - int idx; - dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = { - [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL } - }; - - list_for_each_entry(cur, fn_list, node) { - /* - * An empty Confirm means that either an unknown feature type - * or an invalid value was present. In the first case there is - * nothing to activate, in the other the default value is used. - */ - if (cur->empty_confirm) - continue; +EXPORT_SYMBOL_GPL(dccp_feat_init); - idx = dccp_feat_index(cur->feat_num); - if (idx < 0) { - DCCP_BUG("Unknown feature %u", cur->feat_num); - goto activation_failed; - } - if (cur->state != FEAT_STABLE) { - DCCP_CRIT("Negotiation of %s %s failed in state %s", - cur->is_local ? "local" : "remote", - dccp_feat_fname(cur->feat_num), - dccp_feat_sname[cur->state]); - goto activation_failed; - } - fvals[idx][cur->is_local] = &cur->val; +#ifdef CONFIG_IP_DCCP_DEBUG +const char *dccp_feat_typename(const u8 type) +{ + switch(type) { + case DCCPO_CHANGE_L: return("ChangeL"); + case DCCPO_CONFIRM_L: return("ConfirmL"); + case DCCPO_CHANGE_R: return("ChangeR"); + case DCCPO_CONFIRM_R: return("ConfirmR"); + /* the following case must not appear in feature negotation */ + default: dccp_pr_debug("unknown type %d [BUG!]\n", type); } + return NULL; +} - /* - * Activate in decreasing order of index, so that the CCIDs are always - * activated as the last feature. This avoids the case where a CCID - * relies on the initialisation of one or more features that it depends - * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features). - */ - for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;) - if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) || - __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) { - DCCP_CRIT("Could not activate %d", idx); - goto activation_failed; - } +EXPORT_SYMBOL_GPL(dccp_feat_typename); - /* Clean up Change options which have been confirmed already */ - list_for_each_entry_safe(cur, next, fn_list, node) - if (!cur->needs_confirm) - dccp_feat_list_pop(cur); +const char *dccp_feat_name(const u8 feat) +{ + static const char *feature_names[] = { + [DCCPF_RESERVED] = "Reserved", + [DCCPF_CCID] = "CCID", + [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", + [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", + [DCCPF_ECN_INCAPABLE] = "ECN Incapable", + [DCCPF_ACK_RATIO] = "Ack Ratio", + [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", + [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", + [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", + [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", + }; + if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) + return feature_names[DCCPF_RESERVED]; - dccp_pr_debug("Activation OK\n"); - return 0; + if (feat >= DCCPF_MIN_CCID_SPECIFIC) + return "CCID-specific"; -activation_failed: - /* - * We clean up everything that may have been allocated, since - * it is difficult to track at which stage negotiation failed. - * This is ok, since all allocation functions below are robust - * against NULL arguments. - */ - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - return -1; + return feature_names[feat]; } + +EXPORT_SYMBOL_GPL(dccp_feat_name); +#endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 2217066e22d..e272222c7ac 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -3,134 +3,38 @@ /* * net/dccp/feat.h * - * Feature negotiation for the DCCP protocol (RFC 4340, section 6) - * Copyright (c) 2008 Gerrit Renker + * An implementation of the DCCP protocol * Copyright (c) 2005 Andrea Bittau * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. */ + #include #include "dccp.h" -/* - * Known limit values - */ -/* Ack Ratio takes 2-byte integer values (11.3) */ -#define DCCPF_ACK_RATIO_MAX 0xFFFF -/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */ -#define DCCPF_SEQ_WMIN 32 -#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull -/* Maximum number of SP values that fit in a single (Confirm) option */ -#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2) - -enum dccp_feat_type { - FEAT_AT_RX = 1, /* located at RX side of half-connection */ - FEAT_AT_TX = 2, /* located at TX side of half-connection */ - FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */ - FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */ - FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */ -}; - -enum dccp_feat_state { - FEAT_DEFAULT = 0, /* using default values from 6.4 */ - FEAT_INITIALISING, /* feature is being initialised */ - FEAT_CHANGING, /* Change sent but not confirmed yet */ - FEAT_UNSTABLE, /* local modification in state CHANGING */ - FEAT_STABLE /* both ends (think they) agree */ -}; +#ifdef CONFIG_IP_DCCP_DEBUG +extern const char *dccp_feat_typename(const u8 type); +extern const char *dccp_feat_name(const u8 feat); -/** - * dccp_feat_val - Container for SP or NN feature values - * @nn: single NN value - * @sp.vec: single SP value plus optional preference list - * @sp.len: length of @sp.vec in bytes - */ -typedef union { - u64 nn; - struct { - u8 *vec; - u8 len; - } sp; -} dccp_feat_val; - -/** - * struct feat_entry - Data structure to perform feature negotiation - * @feat_num: one of %dccp_feature_numbers - * @val: feature's current value (SP features may have preference list) - * @state: feature's current state - * @needs_mandatory: whether Mandatory options should be sent - * @needs_confirm: whether to send a Confirm instead of a Change - * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm) - * @is_local: feature location (1) or feature-remote (0) - * @node: list pointers, entries arranged in FIFO order - */ -struct dccp_feat_entry { - u8 feat_num; - dccp_feat_val val; - enum dccp_feat_state state:8; - bool needs_mandatory:1, - needs_confirm:1, - empty_confirm:1, - is_local:1; - - struct list_head node; -}; - -static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry) +static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) { - if (entry->needs_confirm) - return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R; - return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R; + dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type), + dccp_feat_name(feat), feat, val); } +#else +#define dccp_feat_debug(type, feat, val) +#endif /* CONFIG_IP_DCCP_DEBUG */ + +extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, + u8 *val, u8 len, gfp_t gfp); +extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len); +extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, + u8 *val, u8 len); +extern void dccp_feat_clean(struct dccp_minisock *dmsk); +extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); +extern int dccp_feat_init(struct dccp_minisock *dmsk); -/** - * struct ccid_dependency - Track changes resulting from choosing a CCID - * @dependent_feat: one of %dccp_feature_numbers - * @is_local: local (1) or remote (0) @dependent_feat - * @is_mandatory: whether presence of @dependent_feat is mission-critical or not - * @val: corresponding default value for @dependent_feat (u8 is sufficient here) - */ -struct ccid_dependency { - u8 dependent_feat; - bool is_local:1, - is_mandatory:1; - u8 val; -}; - -/* - * Sysctls to seed defaults for feature negotiation - */ -extern unsigned long sysctl_dccp_sequence_window; -extern int sysctl_dccp_rx_ccid; -extern int sysctl_dccp_tx_ccid; - -extern int dccp_feat_init(struct sock *sk); -extern void dccp_feat_initialise_sysctls(void); -extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, - u8 const *list, u8 len); -extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); -extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, - u8 mand, u8 opt, u8 feat, u8 *val, u8 len); -extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); - -/* - * Encoding variable-length options and their maximum length. - * - * This affects NN options (SP options are all u8) and other variable-length - * options (see table 3 in RFC 4340). The limit is currently given the Sequence - * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other - * options consume less than 6 bytes (timestamps are 4 bytes). - * When updating this constant (e.g. due to new internet drafts / RFCs), make - * sure that you also update all code which refers to it. - */ -#define DCCP_OPTVAL_MAXLEN 6 - -extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); -extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); - -extern int dccp_insert_option_mandatory(struct sk_buff *skb); -extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len, bool repeat_first); #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index df0e6714aa1..779d0ed9ae9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -159,15 +159,13 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } -static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb) +static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) { - struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec; + struct dccp_sock *dp = dccp_sk(sk); - if (av == NULL) - return; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq); - dccp_ackvec_input(av, skb); + if (dccp_msk(sk)->dccpms_send_ack_vector) + dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); } static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) @@ -366,13 +364,22 @@ discard: int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned len) { + struct dccp_sock *dp = dccp_sk(sk); + if (dccp_check_seqno(sk, skb)) goto discard; if (dccp_parse_options(sk, NULL, skb)) return 1; - dccp_handle_ackvec_processing(sk, skb); + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + if (dccp_msk(sk)->dccpms_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; dccp_deliver_input_to_ccids(sk, skb); return __dccp_rcv_established(sk, skb, dh, len); @@ -414,33 +421,40 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, goto out_invalid_packet; } - /* - * If option processing (Step 8) failed, return 1 here so that - * dccp_v4_do_rcv() sends a Reset. The Reset code depends on - * the option type and is set in dccp_parse_options(). - */ if (dccp_parse_options(sk, NULL, skb)) - return 1; + goto out_invalid_packet; /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - dp->dccps_options_received.dccpor_timestamp_echo)); + if (dccp_msk(sk)->dccpms_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto out_invalid_packet; /* FIXME: change error code */ + /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); WARN_ON(sk->sk_send_head == NULL); kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; + dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_update_gsr(sk, dp->dccps_isr); /* - * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect - * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH - * is done as part of activating the feature values below, since - * these settings depend on the local/remote Sequence Window - * features, which were undefined or not confirmed until now. + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. + * + * AWL was adjusted in dccp_v4_connect -acme */ - dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_set_seqno(&dp->dccps_swl, + max48(dp->dccps_swl, dp->dccps_isr)); dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -461,15 +475,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, */ dccp_set_state(sk, DCCP_PARTOPEN); - /* - * If feature negotiation was successful, activate features now; - * an activation failure means that this host could not activate - * one ore more features (e.g. insufficient memory), which would - * leave at least one feature in an undefined state. - */ - if (dccp_feat_activate_values(sk, &dp->dccps_featneg)) - goto unable_to_proceed; - /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); @@ -504,16 +509,6 @@ out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; return 1; - -unable_to_proceed: - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED; - /* - * We mark this socket as no longer usable, so that the loop in - * dccp_sendmsg() terminates and the application gets notified. - */ - dccp_set_state(sk, DCCP_CLOSED); - sk->sk_err = ECOMM; - return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -595,6 +590,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; + + /* FIXME: do congestion control initialization */ goto discard; } if (dh->dccph_type == DCCP_PKT_RESET) @@ -603,35 +600,29 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; - } else if (sk->sk_state == DCCP_CLOSED) { - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - return 1; } - /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */ - if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb)) - goto discard; + if (sk->sk_state != DCCP_REQUESTING) { + if (dccp_check_seqno(sk, skb)) + goto discard; - /* - * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == Response) - * or (S.is_client and P.type == Request) - * or (S.state == RESPOND and P.type == Data), - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - if ((dp->dccps_role != DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_RESPONSE) || - (dp->dccps_role == DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); - goto discard; - } + /* + * Step 8: Process options and mark acknowledgeable + */ + if (dccp_parse_options(sk, NULL, skb)) + return 1; - /* Step 8: Process options */ - if (dccp_parse_options(sk, NULL, skb)) - return 1; + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + if (dccp_msk(sk)->dccpms_send_ack_vector && + dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; + + dccp_deliver_input_to_ccids(sk, skb); + } /* * Step 9: Process Reset @@ -640,22 +631,44 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ + */ if (dh->dccph_type == DCCP_PKT_RESET) { dccp_rcv_reset(sk, skb); return 0; - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */ + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_RESPONSE) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && + dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); + goto discard; + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { if (dccp_rcv_closereq(sk, skb)) return 0; goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */ + } else if (dh->dccph_type == DCCP_PKT_CLOSE) { if (dccp_rcv_close(sk, skb)) return 0; goto discard; } switch (sk->sk_state) { + case DCCP_CLOSED: + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + return 1; + case DCCP_REQUESTING: + /* FIXME: do congestion control initialization */ + queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) return queued; @@ -663,12 +676,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, __kfree_skb(skb); return 0; - case DCCP_PARTOPEN: - /* Step 8: if using Ack Vectors, mark packet acknowledgeable */ - dccp_handle_ackvec_processing(sk, skb); - dccp_deliver_input_to_ccids(sk, skb); - /* fall through */ case DCCP_RESPOND: + case DCCP_PARTOPEN: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; @@ -707,7 +716,16 @@ u32 dccp_sample_rtt(struct sock *sk, long delta) /* dccpor_elapsed_time is either zeroed out or set and > 0 */ delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; - return dccp_sane_rtt(delta); + if (unlikely(delta <= 0)) { + DCCP_WARN("unusable RTT sample %ld, using min\n", delta); + return DCCP_SANE_RTT_MIN; + } + if (unlikely(delta > DCCP_SANE_RTT_MAX)) { + DCCP_WARN("RTT sample %ld too large, using max\n", delta); + return DCCP_SANE_RTT_MAX; + } + + return delta; } EXPORT_SYMBOL_GPL(dccp_sample_rtt); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b623f6b2548..882c5c4de69 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -545,7 +545,6 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { - dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); kfree(inet_rsk(req)->opt); } @@ -596,8 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - if (dccp_reqsk_init(req, dccp_sk(sk), skb)) - goto drop_and_free; + dccp_reqsk_init(req, skb); dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ad6212e0043..5e1ee0da2c4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -302,7 +302,6 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { - dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); if (inet6_rsk(req)->pktopts != NULL) kfree_skb(inet6_rsk(req)->pktopts); } @@ -425,8 +424,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - if (dccp_reqsk_init(req, dccp_sk(sk), skb)) - goto drop_and_free; + dccp_reqsk_init(req, skb); dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index f4d9c8f60ed..b2804e2d1b8 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -42,6 +42,16 @@ struct inet_timewait_death_row dccp_death_row = { EXPORT_SYMBOL_GPL(dccp_death_row); +void dccp_minisock_init(struct dccp_minisock *dmsk) +{ + dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; + dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; + dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; + dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; + dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; + dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; +} + void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; @@ -102,9 +112,10 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); if (newsk != NULL) { - struct dccp_request_sock *dreq = dccp_rsk(req); + const struct dccp_request_sock *dreq = dccp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct dccp_sock *newdp = dccp_sk(newsk); + struct dccp_minisock *newdmsk = dccp_msk(newsk); newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; @@ -114,32 +125,65 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - INIT_LIST_HEAD(&newdp->dccps_featneg); + if (dccp_feat_clone(sk, newsk)) + goto out_free; + + if (newdmsk->dccpms_send_ack_vector) { + newdp->dccps_hc_rx_ackvec = + dccp_ackvec_alloc(GFP_ATOMIC); + if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) + goto out_free; + } + + newdp->dccps_hc_rx_ccid = + ccid_hc_rx_new(newdmsk->dccpms_rx_ccid, + newsk, GFP_ATOMIC); + newdp->dccps_hc_tx_ccid = + ccid_hc_tx_new(newdmsk->dccpms_tx_ccid, + newsk, GFP_ATOMIC); + if (unlikely(newdp->dccps_hc_rx_ccid == NULL || + newdp->dccps_hc_tx_ccid == NULL)) { + dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); + ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk); + ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk); +out_free: + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + return NULL; + } + /* * Step 3: Process LISTEN state * * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS - * Set S.ISR, S.GSR from packet (or Init Cookies) - * - * Setting AWL/AWH and SWL/SWH happens as part of the feature - * activation below, as these windows all depend on the local - * and remote Sequence Window feature values (7.5.2). + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies */ - newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss; - newdp->dccps_gar = newdp->dccps_iss; - newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr; + + /* See dccp_v4_conn_request */ + newdmsk->dccpms_sequence_window = req->rcv_wnd; + + newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; + dccp_update_gss(newsk, dreq->dreq_iss); + + newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); /* - * Activate features: initialise CCIDs, sequence windows etc. + * SWL and AWL are initially adjusted so that they are not less than + * the initial Sequence Numbers received and sent, respectively: + * SWL := max(GSR + 1 - floor(W/4), ISR), + * AWL := max(GSS - W' + 1, ISS). + * These adjustments MUST be applied only at the beginning of the + * connection. */ - if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { - /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - newsk->sk_destruct = NULL; - sk_free(newsk); - return NULL; - } + dccp_set_seqno(&newdp->dccps_swl, + max48(newdp->dccps_swl, newdp->dccps_isr)); + dccp_set_seqno(&newdp->dccps_awl, + max48(newdp->dccps_awl, newdp->dccps_iss)); + dccp_init_xmit_timers(newsk); DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); @@ -260,17 +304,14 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); -int dccp_reqsk_init(struct request_sock *req, - struct dccp_sock const *dp, struct sk_buff const *skb) +void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) { struct dccp_request_sock *dreq = dccp_rsk(req); inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->acked = 0; + req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; - - /* inherit feature negotiation options from listening socket */ - return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); } EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/options.c b/net/dccp/options.c index e5a32979d7d..0809b63cb05 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -23,20 +23,23 @@ #include "dccp.h" #include "feat.h" -u64 dccp_decode_value_var(const u8 *bf, const u8 len) +int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; +int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; +int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; +int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; +int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; +int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; + +static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) { - u64 value = 0; + u32 value = 0; - if (len >= DCCP_OPTVAL_MAXLEN) - value += ((u64)*bf++) << 40; - if (len > 4) - value += ((u64)*bf++) << 32; if (len > 3) - value += ((u64)*bf++) << 24; + value += *bf++ << 24; if (len > 2) - value += ((u64)*bf++) << 16; + value += *bf++ << 16; if (len > 1) - value += ((u64)*bf++) << 8; + value += *bf++ << 8; if (len > 0) value += *bf; @@ -54,6 +57,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct dccp_sock *dp = dccp_sk(sk); const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; + u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; const unsigned char *opt_end = (unsigned char *)dh + @@ -95,11 +99,18 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, } /* + * CCID-Specific Options (from RFC 4340, sec. 10.3): + * + * Option numbers 128 through 191 are for options sent from the + * HC-Sender to the HC-Receiver; option numbers 192 through 255 + * are for options sent from the HC-Receiver to the HC-Sender. + * * CCID-specific options are ignored during connection setup, as * negotiation may still be in progress (see RFC 4340, 10.3). * The same applies to Ack Vectors, as these depend on the CCID. + * */ - if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC || + if (dreq != NULL && (opt >= 128 || opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) goto ignore_option; @@ -120,13 +131,43 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk), (unsigned long long)opt_recv->dccpor_ndp); break; - case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: - if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ + case DCCPO_CHANGE_L: + /* fall through */ + case DCCPO_CHANGE_R: + if (pkt_type == DCCP_PKT_DATA) break; - rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, - *value, value + 1, len - 1); - if (rc) - goto out_featneg_failed; + if (len < 2) + goto out_invalid_option; + rc = dccp_feat_change_recv(sk, opt, *value, value + 1, + len - 1); + /* + * When there is a change error, change_recv is + * responsible for dealing with it. i.e. reply with an + * empty confirm. + * If the change was mandatory, then we need to die. + */ + if (rc && mandatory) + goto out_invalid_option; + break; + case DCCPO_CONFIRM_L: + /* fall through */ + case DCCPO_CONFIRM_R: + if (pkt_type == DCCP_PKT_DATA) + break; + if (len < 2) /* FIXME this disallows empty confirm */ + goto out_invalid_option; + if (dccp_feat_confirm_recv(sk, opt, *value, + value + 1, len - 1)) + goto out_invalid_option; + break; + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ + break; + + if (dccp_msk(sk)->dccpms_send_ack_vector && + dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) + goto out_invalid_option; break; case DCCPO_TIMESTAMP: if (len != 4) @@ -154,8 +195,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_role(sk), ntohl(opt_val), (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); - /* schedule an Ack in case this sender is quiescent */ - inet_csk_schedule_ack(sk); break; case DCCPO_TIMESTAMP_ECHO: if (len != 4 && len != 6 && len != 8) @@ -212,25 +251,23 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", dccp_role(sk), elapsed_time); break; - case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC: + case 128 ... 191: { + const u16 idx = value - options; + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, - pkt_type, opt, value, len)) + opt, len, idx, + value) != 0) goto out_invalid_option; + } break; - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ - break; - /* - * Ack vectors are processed by the TX CCID if it is - * interested. The RX CCID need not parse Ack Vectors, - * since it is only interested in clearing old state. - * Fall through. - */ - case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: + case 192 ... 255: { + const u16 idx = value - options; + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, - pkt_type, opt, value, len)) + opt, len, idx, + value) != 0) goto out_invalid_option; + } break; default: DCCP_CRIT("DCCP(%p): option %d(len=%d) not " @@ -252,10 +289,8 @@ out_nonsensical_length: out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); - rc = DCCP_RESET_CODE_OPTION_ERROR; -out_featneg_failed: - DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); - DCCP_SKB_CB(skb)->dccpd_reset_code = rc; + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; + DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; @@ -264,12 +299,9 @@ out_featneg_failed: EXPORT_SYMBOL_GPL(dccp_parse_options); -void dccp_encode_value_var(const u64 value, u8 *to, const u8 len) +static void dccp_encode_value_var(const u32 value, unsigned char *to, + const unsigned int len) { - if (len >= DCCP_OPTVAL_MAXLEN) - *to++ = (value & 0xFF0000000000ull) >> 40; - if (len > 4) - *to++ = (value & 0xFF00000000ull) >> 32; if (len > 3) *to++ = (value & 0xFF000000) >> 24; if (len > 2) @@ -429,140 +461,92 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, return 0; } -static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const u16 buflen = dccp_ackvec_buflen(av); - /* Figure out how many options do we need to represent the ackvec */ - const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); - u16 len = buflen + 2 * nr_opts; - u8 i, nonce = 0; - const unsigned char *tail, *from; - unsigned char *to; + u8 *to; - if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { - DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, - dccp_packet_name(dcb->dccpd_type)); + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { + DCCP_WARN("packet too small for feature %d option!\n", feat); return -1; } - /* - * Since Ack Vectors are variable-length, we can not always predict - * their size. To catch exception cases where the space is running out - * on the skb, a separate Sync is scheduled to carry the Ack Vector. - */ - if (len > DCCPAV_MIN_OPTLEN && - len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { - DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " - "MPS=%u ==> reduce payload size?\n", len, skb->len, - dcb->dccpd_opt_len, dp->dccps_mss_cache); - dp->dccps_sync_scheduled = 1; - return 0; - } - dcb->dccpd_opt_len += len; - to = skb_push(skb, len); - len = buflen; - from = av->av_buf + av->av_buf_head; - tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; + DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3; - for (i = 0; i < nr_opts; ++i) { - int copylen = len; - - if (len > DCCP_SINGLE_OPT_MAXLEN) - copylen = DCCP_SINGLE_OPT_MAXLEN; - - /* - * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via - * its type; ack_nonce is the sum of all individual buf_nonce's. - */ - nonce ^= av->av_buf_nonce[i]; - - *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i]; - *to++ = copylen + 2; - - /* Check if buf_head wraps */ - if (from + copylen > tail) { - const u16 tailsize = tail - from; - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - copylen -= tailsize; - from = av->av_buf; - } - - memcpy(to, from, copylen); - from += copylen; - to += copylen; - len -= copylen; - } - /* - * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. - */ - if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) - return -ENOBUFS; - return 0; -} + to = skb_push(skb, len + 3); + *to++ = type; + *to++ = len + 3; + *to++ = feat; -/** - * dccp_insert_option_mandatory - Mandatory option (5.8.2) - * Note that since we are using skb_push, this function needs to be called - * _after_ inserting the option it is supposed to influence (stack order). - */ -int dccp_insert_option_mandatory(struct sk_buff *skb) -{ - if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN) - return -1; + if (len) + memcpy(to, val, len); - DCCP_SKB_CB(skb)->dccpd_opt_len++; - *skb_push(skb, 1) = DCCPO_MANDATORY; + dccp_pr_debug("%s(%s (%d), ...), length %d\n", + dccp_feat_typename(type), + dccp_feat_name(feat), feat, len); return 0; } -/** - * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb - * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R - * @feat: one out of %dccp_feature_numbers - * @val: NN value or SP array (preferred element first) to copy - * @len: true length of @val in bytes (excluding first element repetition) - * @repeat_first: whether to copy the first element of @val twice - * The last argument is used to construct Confirm options, where the preferred - * value and the preference list appear separately (RFC 4340, 6.3.1). Preference - * lists are kept such that the preferred entry is always first, so we only need - * to copy twice, and avoid the overhead of cloning into a bigger array. - */ -int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len, bool repeat_first) +static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) { - u8 tot_len, *to; + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); + struct dccp_opt_pend *opt, *next; + int change = 0; + + /* confirm any options [NN opts] */ + list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) { + dccp_insert_feat_opt(skb, opt->dccpop_type, + opt->dccpop_feat, opt->dccpop_val, + opt->dccpop_len); + /* fear empty confirms */ + if (opt->dccpop_val) + kfree(opt->dccpop_val); + kfree(opt); + } + INIT_LIST_HEAD(&dmsk->dccpms_conf); + + /* see which features we need to send */ + list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { + /* see if we need to send any confirm */ + if (opt->dccpop_sc) { + dccp_insert_feat_opt(skb, opt->dccpop_type + 1, + opt->dccpop_feat, + opt->dccpop_sc->dccpoc_val, + opt->dccpop_sc->dccpoc_len); + + BUG_ON(!opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc->dccpoc_val); + kfree(opt->dccpop_sc); + opt->dccpop_sc = NULL; + } - /* take the `Feature' field and possible repetition into account */ - if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) { - DCCP_WARN("length %u for feature %u too large\n", len, feat); - return -1; + /* any option not confirmed, re-send it */ + if (!opt->dccpop_conf) { + dccp_insert_feat_opt(skb, opt->dccpop_type, + opt->dccpop_feat, opt->dccpop_val, + opt->dccpop_len); + change++; + } } - if (unlikely(val == NULL || len == 0)) - len = repeat_first = 0; - tot_len = 3 + repeat_first + len; + /* Retransmit timer. + * If this is the master listening sock, we don't set a timer on it. It + * should be fine because if the dude doesn't receive our RESPONSE + * [which will contain the CHANGE] he will send another REQUEST which + * will "retrnasmit" the change. + */ + if (change && dp->dccps_role != DCCP_ROLE_LISTEN) { + dccp_pr_debug("reset feat negotiation timer %p\n", sk); - if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { - DCCP_WARN("packet too small for feature %d option!\n", feat); - return -1; + /* XXX don't reset the timer on re-transmissions. I.e. reset it + * only when sending new stuff i guess. Currently the timer + * never backs off because on re-transmission it just resets it! + */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); } - DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len; - - to = skb_push(skb, tot_len); - *to++ = type; - *to++ = tot_len; - *to++ = feat; - if (repeat_first) - *to++ = *val; - if (len) - memcpy(to, val, len); return 0; } @@ -581,30 +565,19 @@ static void dccp_insert_option_padding(struct sk_buff *skb) int dccp_insert_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb)) + if (dmsk->dccpms_send_ndp_count && + dccp_insert_option_ndp(sk, skb)) return -1; - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { - - /* Feature Negotiation */ - if (dccp_feat_insert_opts(dp, NULL, skb)) + if (!dccp_packet_without_ack(skb)) { + if (dmsk->dccpms_send_ack_vector && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && + dccp_insert_option_ackvec(sk, skb)) return -1; - - if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { - /* - * Obtain RTT sample from Request/Response exchange. - * This is currently used in CCID 3 initialisation. - */ - if (dccp_insert_option_timestamp(sk, skb)) - return -1; - - } else if (dccp_ackvec_pending(sk) && - dccp_insert_option_ackvec(sk, skb)) { - return -1; - } } if (dp->dccps_hc_rx_insert_options) { @@ -613,6 +586,21 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dp->dccps_hc_rx_insert_options = 0; } + /* Feature negotiation */ + /* Data packets can't do feat negotiation */ + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA && + DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK && + dccp_insert_options_feat(sk, skb)) + return -1; + + /* + * Obtain RTT sample from Request/Response exchange. + * This is currently used in CCID 3 initialisation. + */ + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST && + dccp_insert_option_timestamp(sk, skb)) + return -1; + if (dp->dccps_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(dp, NULL, skb)) return -1; @@ -625,9 +613,6 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) { DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - if (dccp_feat_insert_opts(NULL, dreq, skb)) - return -1; - if (dreq->dreq_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(NULL, dreq, skb)) return -1; diff --git a/net/dccp/output.c b/net/dccp/output.c index 2532797a800..d06945c7d3d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -26,13 +26,11 @@ static inline void dccp_event_ack_sent(struct sock *sk) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } -/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ -static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) +static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) { skb_set_owner_w(skb, sk); WARN_ON(sk->sk_send_head); sk->sk_send_head = skb; - return skb_clone(sk->sk_send_head, gfp_any()); } /* @@ -163,27 +161,21 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); u32 ccmps = dccp_determine_ccmps(dp); - u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; + int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; /* Account for header lengths and IPv4/v6 option overhead */ cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); /* - * Leave enough headroom for common DCCP header options. - * This only considers options which may appear on DCCP-Data packets, as - * per table 3 in RFC 4340, 5.8. When running out of space for other - * options (eg. Ack Vector which can take up to 255 bytes), it is better - * to schedule a separate Ack. Thus we leave headroom for the following: - * - 1 byte for Slow Receiver (11.6) - * - 6 bytes for Timestamp (13.1) - * - 10 bytes for Timestamp Echo (13.3) - * - 8 bytes for NDP count (7.7, when activated) - * - 6 bytes for Data Checksum (9.3) - * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) + * FIXME: this should come from the CCID infrastructure, where, say, + * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets + * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED + * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to + * make it a multiple of 4 */ - cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + - (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); + + cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; /* And store cached results */ icsk->icsk_pmtu_cookie = pmtu; @@ -208,158 +200,95 @@ void dccp_write_space(struct sock *sk) } /** - * dccp_wait_for_ccid - Await CCID send permission + * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet * @sk: socket to wait for - * @delay: timeout in jiffies - * This is used by CCIDs which need to delay the send time in process context. + * @skb: current skb to pass on for waiting + * @delay: sleep timeout in milliseconds (> 0) + * This function is called by default when the socket is closed, and + * when a non-zero linger time is set on the socket. For consistency */ -static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) { + struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - long remaining; - - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - sk->sk_write_pending++; - release_sock(sk); + unsigned long jiffdelay; + int rc; - remaining = schedule_timeout(delay); - - lock_sock(sk); - sk->sk_write_pending--; - finish_wait(sk->sk_sleep, &wait); + do { + dccp_pr_debug("delayed send by %d msec\n", delay); + jiffdelay = msecs_to_jiffies(delay); - if (signal_pending(current) || sk->sk_err) - return -1; - return remaining; -} - -/** - * dccp_xmit_packet - Send data packet under control of CCID - * Transmits next-queued payload and informs CCID to account for the packet. - */ -static void dccp_xmit_packet(struct sock *sk) -{ - int err, len; - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb = dccp_qpolicy_pop(sk); + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (unlikely(skb == NULL)) - return; - len = skb->len; + sk->sk_write_pending++; + release_sock(sk); + schedule_timeout(jiffdelay); + lock_sock(sk); + sk->sk_write_pending--; - if (sk->sk_state == DCCP_PARTOPEN) { - const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; - /* - * See 8.1.5 - Handshake Completion. - * - * For robustness we resend Confirm options until the client has - * entered OPEN. During the initial feature negotiation, the MPS - * is smaller than usual, reduced by the Change/Confirm options. - */ - if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { - DCCP_WARN("Payload too large (%d) for featneg.\n", len); - dccp_send_ack(sk); - dccp_feat_list_purge(&dp->dccps_featneg); - } + if (sk->sk_err) + goto do_error; + if (signal_pending(current)) + goto do_interrupted; - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - inet_csk(sk)->icsk_rto, - DCCP_RTO_MAX); - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) { - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; - } else { - DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; - } - - err = dccp_transmit_skb(sk, skb); - if (err) - dccp_pr_debug("transmit_skb() returned err=%d\n", err); - /* - * Register this one as sent even if an error occurred. To the remote - * end a local packet drop is indistinguishable from network loss, i.e. - * any local drop will eventually be reported via receiver feedback. - */ - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); - - /* - * If the CCID needs to transfer additional header options out-of-band - * (e.g. Ack Vectors or feature-negotiation options), it activates this - * flag to schedule a Sync. The Sync will automatically incorporate all - * currently pending header options, thus clearing the backlog. - */ - if (dp->dccps_sync_scheduled) - dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); + } while ((delay = rc) > 0); +out: + finish_wait(sk->sk_sleep, &wait); + return rc; + +do_error: + rc = -EPIPE; + goto out; +do_interrupted: + rc = -EINTR; + goto out; } -/** - * dccp_flush_write_queue - Drain queue at end of connection - * Since dccp_sendmsg queues packets without waiting for them to be sent, it may - * happen that the TX queue is not empty at the end of a connection. We give the - * HC-sender CCID a grace period of up to @time_budget jiffies. If this function - * returns with a non-empty write queue, it will be purged later. - */ -void dccp_flush_write_queue(struct sock *sk, long *time_budget) +void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - long delay, rc; - - while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) { - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - switch (ccid_packet_dequeue_eval(rc)) { - case CCID_PACKET_WILL_DEQUEUE_LATER: - /* - * If the CCID determines when to send, the next sending - * time is unknown or the CCID may not even send again - * (e.g. remote host crashes or lost Ack packets). - */ - DCCP_WARN("CCID did not manage to send all packets\n"); - return; - case CCID_PACKET_DELAY: - delay = msecs_to_jiffies(rc); - if (delay > *time_budget) - return; - rc = dccp_wait_for_ccid(sk, delay); - if (rc < 0) - return; - *time_budget -= (delay - rc); - /* check again if we can send now */ - break; - case CCID_PACKET_SEND_AT_ONCE: - dccp_xmit_packet(sk); - break; - case CCID_PACKET_ERR: - skb_dequeue(&sk->sk_write_queue); - kfree_skb(skb); - dccp_pr_debug("packet discarded due to err=%ld\n", rc); + while ((skb = skb_peek(&sk->sk_write_queue))) { + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); + + if (err > 0) { + if (!block) { + sk_reset_timer(sk, &dp->dccps_xmit_timer, + msecs_to_jiffies(err)+jiffies); + break; + } else + err = dccp_wait_for_ccid(sk, skb, err); + if (err && err != -EINTR) + DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } - } -} -void dccp_write_xmit(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb; + skb_dequeue(&sk->sk_write_queue); + if (err == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; - while ((skb = dccp_qpolicy_top(sk))) { - int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - - switch (ccid_packet_dequeue_eval(rc)) { - case CCID_PACKET_WILL_DEQUEUE_LATER: - return; - case CCID_PACKET_DELAY: - sk_reset_timer(sk, &dp->dccps_xmit_timer, - jiffies + msecs_to_jiffies(rc)); - return; - case CCID_PACKET_SEND_AT_ONCE: - dccp_xmit_packet(sk); - break; - case CCID_PACKET_ERR: - dccp_qpolicy_drop(sk, skb); - dccp_pr_debug("packet discarded due to err=%d\n", rc); + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); + dcb->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + if (err) + DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", + err); + } else { + dccp_pr_debug("packet discarded due to err=%d\n", err); + kfree_skb(skb); } } } @@ -410,12 +339,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; - /* Resolve feature dependencies resulting from choice of CCID */ - if (dccp_feat_server_ccid_dependencies(dreq)) - goto response_failed; - - if (dccp_insert_options_rsk(dreq, skb)) - goto response_failed; + if (dccp_insert_options_rsk(dreq, skb)) { + kfree_skb(skb); + return NULL; + } /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); @@ -436,9 +363,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, inet_rsk(req)->acked = 1; DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; -response_failed: - kfree_skb(skb); - return NULL; } EXPORT_SYMBOL_GPL(dccp_make_response); @@ -523,9 +447,8 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) /* * Do all connect socket setups that can be done AF independent. */ -int dccp_connect(struct sock *sk) +static inline void dccp_connect_init(struct sock *sk) { - struct sk_buff *skb; struct dccp_sock *dp = dccp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -535,13 +458,19 @@ int dccp_connect(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* do not connect if feature negotiation setup fails */ - if (dccp_feat_finalise_settings(dccp_sk(sk))) - return -EPROTO; - /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ dp->dccps_gar = dp->dccps_iss; + icsk->icsk_retransmits = 0; +} + +int dccp_connect(struct sock *sk) +{ + struct sk_buff *skb; + struct inet_connection_sock *icsk = inet_csk(sk); + + dccp_connect_init(sk); + skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); if (unlikely(skb == NULL)) return -ENOBUFS; @@ -551,11 +480,11 @@ int dccp_connect(struct sock *sk) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); + dccp_skb_entail(sk, skb); + dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ - icsk->icsk_retransmits = 0; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); return 0; @@ -642,12 +571,6 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; - /* - * Clear the flag in case the Sync was scheduled for out-of-band data, - * such as carrying a long Ack Vector. - */ - dccp_sk(sk)->dccps_sync_scheduled = 0; - dccp_transmit_skb(sk, skb); } @@ -676,7 +599,9 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; if (active) { - skb = dccp_skb_entail(sk, skb); + dccp_write_xmit(sk, 1); + dccp_skb_entail(sk, skb); + dccp_transmit_skb(sk, skb_clone(skb, prio)); /* * Retransmission timer for active-close: RFC 4340, 8.3 requires * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ @@ -689,6 +614,6 @@ void dccp_send_close(struct sock *sk, const int active) */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); - } - dccp_transmit_skb(sk, skb); + } else + dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index eaa59d82ab0..81368a7f537 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -46,54 +46,75 @@ static struct { struct kfifo *fifo; spinlock_t lock; wait_queue_head_t wait; - ktime_t start; + struct timespec tstart; } dccpw; -static void jdccp_write_xmit(struct sock *sk) +static void printl(const char *fmt, ...) { - const struct inet_sock *inet = inet_sk(sk); - struct ccid3_hc_tx_sock *hctx = NULL; - struct timespec tv; - char buf[256]; - int len, ccid = ccid_get_current_tx_ccid(dccp_sk(sk)); + va_list args; + int len; + struct timespec now; + char tbuf[256]; - if (ccid == DCCPC_CCID3) - hctx = ccid3_hc_tx_sk(sk); + va_start(args, fmt); + getnstimeofday(&now); - if (!port || ntohs(inet->dport) == port || ntohs(inet->sport) == port) { + now = timespec_sub(now, dccpw.tstart); - tv = ktime_to_timespec(ktime_sub(ktime_get(), dccpw.start)); - len = sprintf(buf, "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u %d", - (unsigned long)tv.tv_sec, - (unsigned long)tv.tv_nsec, - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport), ccid); + len = sprintf(tbuf, "%lu.%06lu ", + (unsigned long) now.tv_sec, + (unsigned long) now.tv_nsec / NSEC_PER_USEC); + len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); + va_end(args); + kfifo_put(dccpw.fifo, tbuf, len); + wake_up(&dccpw.wait); +} + +static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + const struct dccp_minisock *dmsk = dccp_msk(sk); + const struct inet_sock *inet = inet_sk(sk); + const struct ccid3_hc_tx_sock *hctx; + + if (dmsk->dccpms_tx_ccid == DCCPC_CCID3) + hctx = ccid3_hc_tx_sk(sk); + else + hctx = NULL; + + if (port == 0 || ntohs(inet->dport) == port || + ntohs(inet->sport) == port) { if (hctx) - len += sprintf(buf + len, " %d %d %d %u %u %u %d", - hctx->s, hctx->rtt, hctx->p, hctx->x_calc, - (unsigned)(hctx->x_recv >> 6), - (unsigned)(hctx->x >> 6), hctx->t_ipi); - - len += sprintf(buf + len, "\n"); - kfifo_put(dccpw.fifo, buf, len); - wake_up(&dccpw.wait); + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u " + "%llu %llu %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size, + hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, + hctx->ccid3hctx_x_recv >> 6, + hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi); + else + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size); } jprobe_return(); + return 0; } static struct jprobe dccp_send_probe = { .kp = { - .symbol_name = "dccp_write_xmit", + .symbol_name = "dccp_sendmsg", }, - .entry = jdccp_write_xmit, + .entry = jdccp_sendmsg, }; static int dccpprobe_open(struct inode *inode, struct file *file) { kfifo_reset(dccpw.fifo); - dccpw.start = ktime_get(); + getnstimeofday(&dccpw.tstart); return 0; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ecf3be961e1..d0bd3481976 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -67,9 +67,6 @@ void dccp_set_state(struct sock *sk, const int state) case DCCP_OPEN: if (oldstate != DCCP_OPEN) DCCP_INC_STATS(DCCP_MIB_CURRESTAB); - /* Client retransmits all Confirm options until entering OPEN */ - if (oldstate == DCCP_PARTOPEN) - dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); break; case DCCP_CLOSED: @@ -178,25 +175,63 @@ EXPORT_SYMBOL_GPL(dccp_state_name); int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + dccp_minisock_init(&dp->dccps_minisock); + icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; icsk->icsk_sync_mss = dccp_sync_mss; - dp->dccps_mss_cache = TCP_MIN_RCVMSS; + dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; + dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; dccp_init_xmit_timers(sk); - INIT_LIST_HEAD(&dp->dccps_featneg); - /* control socket doesn't need feat nego */ - if (likely(ctl_sock_initialized)) - return dccp_feat_init(sk); + /* + * FIXME: We're hardcoding the CCID, and doing this at this point makes + * the listening (master) sock get CCID control blocks, which is not + * necessary, but for now, to not mess with the test userspace apps, + * lets leave it here, later the real solution is to do this in a + * setsockopt(CCIDs-I-want/accept). -acme + */ + if (likely(ctl_sock_initialized)) { + int rc = dccp_feat_init(dmsk); + + if (rc) + return rc; + + if (dmsk->dccpms_send_ack_vector) { + dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); + if (dp->dccps_hc_rx_ackvec == NULL) + return -ENOMEM; + } + dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid, + sk, GFP_KERNEL); + dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, + sk, GFP_KERNEL); + if (unlikely(dp->dccps_hc_rx_ccid == NULL || + dp->dccps_hc_tx_ccid == NULL)) { + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + if (dmsk->dccpms_send_ack_vector) { + dccp_ackvec_free(dp->dccps_hc_rx_ackvec); + dp->dccps_hc_rx_ackvec = NULL; + } + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + return -ENOMEM; + } + } else { + /* control socket doesn't need feat nego */ + INIT_LIST_HEAD(&dmsk->dccpms_pending); + INIT_LIST_HEAD(&dmsk->dccpms_conf); + } + return 0; } @@ -205,6 +240,7 @@ EXPORT_SYMBOL_GPL(dccp_init_sock); void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_minisock *dmsk = dccp_msk(sk); /* * DCCP doesn't use sk_write_queue, just sk_send_head @@ -222,7 +258,7 @@ void dccp_destroy_sock(struct sock *sk) kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; - if (dp->dccps_hc_rx_ackvec != NULL) { + if (dmsk->dccpms_send_ack_vector) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } @@ -231,7 +267,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; /* clean up feature negotiation state */ - dccp_feat_list_purge(&dp->dccps_featneg); + dccp_feat_clean(dmsk); } EXPORT_SYMBOL_GPL(dccp_destroy_sock); @@ -241,9 +277,6 @@ static inline int dccp_listen_start(struct sock *sk, int backlog) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - /* do not start to listen if feature negotiation setup fails */ - if (dccp_feat_finalise_settings(dp)) - return -EPROTO; return inet_csk_listen_start(sk, backlog); } @@ -433,70 +466,42 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return 0; } -static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) +/* byte 1 is feature. the rest is the preference list */ +static int dccp_setsockopt_change(struct sock *sk, int type, + struct dccp_so_feat __user *optval) { - u8 *list, len; - int i, rc; + struct dccp_so_feat opt; + u8 *val; + int rc; - if (cscov < 0 || cscov > 15) - return -EINVAL; + if (copy_from_user(&opt, optval, sizeof(opt))) + return -EFAULT; /* - * Populate a list of permissible values, in the range cscov...15. This - * is necessary since feature negotiation of single values only works if - * both sides incidentally choose the same value. Since the list starts - * lowest-value first, negotiation will pick the smallest shared value. + * rfc4340: 6.1. Change Options */ - if (cscov == 0) - return 0; - len = 16 - cscov; - - list = kmalloc(len, GFP_KERNEL); - if (list == NULL) - return -ENOBUFS; - - for (i = 0; i < len; i++) - list[i] = cscov++; - - rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); - - if (rc == 0) { - if (rx) - dccp_sk(sk)->dccps_pcrlen = cscov; - else - dccp_sk(sk)->dccps_pcslen = cscov; - } - kfree(list); - return rc; -} - -static int dccp_setsockopt_ccid(struct sock *sk, int type, - char __user *optval, int optlen) -{ - u8 *val; - int rc = 0; - - if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) + if (opt.dccpsf_len < 1) return -EINVAL; - val = kmalloc(optlen, GFP_KERNEL); - if (val == NULL) + val = kmalloc(opt.dccpsf_len, GFP_KERNEL); + if (!val) return -ENOMEM; - if (copy_from_user(val, optval, optlen)) { - kfree(val); - return -EFAULT; + if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { + rc = -EFAULT; + goto out_free_val; } - lock_sock(sk); - if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) - rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); + rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, + val, opt.dccpsf_len, GFP_KERNEL); + if (rc) + goto out_free_val; - if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) - rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); - release_sock(sk); +out: + return rc; +out_free_val: kfree(val); - return rc; + goto out; } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, @@ -505,21 +510,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; - switch (optname) { - case DCCP_SOCKOPT_PACKET_SIZE: - DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - return 0; - case DCCP_SOCKOPT_CHANGE_L: - case DCCP_SOCKOPT_CHANGE_R: - DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); - return 0; - case DCCP_SOCKOPT_CCID: - case DCCP_SOCKOPT_RX_CCID: - case DCCP_SOCKOPT_TX_CCID: - return dccp_setsockopt_ccid(sk, optname, optval, optlen); - } - - if (optlen < (int)sizeof(int)) + if (optlen < sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) @@ -530,38 +521,53 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + err = 0; + break; + case DCCP_SOCKOPT_CHANGE_L: + if (optlen != sizeof(struct dccp_so_feat)) + err = -EINVAL; + else + err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, + (struct dccp_so_feat __user *) + optval); + break; + case DCCP_SOCKOPT_CHANGE_R: + if (optlen != sizeof(struct dccp_so_feat)) + err = -EINVAL; + else + err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, + (struct dccp_so_feat __user *) + optval); + break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) err = -EOPNOTSUPP; else dp->dccps_server_timewait = (val != 0); break; - case DCCP_SOCKOPT_SEND_CSCOV: - err = dccp_setsockopt_cscov(sk, val, false); - break; - case DCCP_SOCKOPT_RECV_CSCOV: - err = dccp_setsockopt_cscov(sk, val, true); - break; - case DCCP_SOCKOPT_QPOLICY_ID: - if (sk->sk_state != DCCP_CLOSED) - err = -EISCONN; - else if (val < 0 || val >= DCCPQ_POLICY_MAX) + case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ + if (val < 0 || val > 15) err = -EINVAL; else - dp->dccps_qpolicy = val; + dp->dccps_pcslen = val; break; - case DCCP_SOCKOPT_QPOLICY_TXQLEN: - if (val < 0) + case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ + if (val < 0 || val > 15) err = -EINVAL; - else - dp->dccps_tx_qlen = val; + else { + dp->dccps_pcrlen = val; + /* FIXME: add feature negotiation, + * ChangeL(MinimumChecksumCoverage, val) */ + } break; default: err = -ENOPROTOOPT; break; } - release_sock(sk); + release_sock(sk); return err; } @@ -642,18 +648,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_GET_CUR_MPS: val = dp->dccps_mss_cache; break; - case DCCP_SOCKOPT_AVAILABLE_CCIDS: - return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); - case DCCP_SOCKOPT_TX_CCID: - val = ccid_get_current_tx_ccid(dp); - if (val < 0) - return -ENOPROTOOPT; - break; - case DCCP_SOCKOPT_RX_CCID: - val = ccid_get_current_rx_ccid(dp); - if (val < 0) - return -ENOPROTOOPT; - break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; @@ -663,12 +657,6 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; - case DCCP_SOCKOPT_QPOLICY_ID: - val = dp->dccps_qpolicy; - break; - case DCCP_SOCKOPT_QPOLICY_TXQLEN: - val = dp->dccps_tx_qlen; - break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); @@ -711,47 +699,6 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); #endif -static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) -{ - struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); - - /* - * Assign an (opaque) qpolicy priority value to skb->priority. - * - * We are overloading this skb field for use with the qpolicy subystem. - * The skb->priority is normally used for the SO_PRIORITY option, which - * is initialised from sk_priority. Since the assignment of sk_priority - * to skb->priority happens later (on layer 3), we overload this field - * for use with queueing priorities as long as the skb is on layer 4. - * The default priority value (if nothing is set) is 0. - */ - skb->priority = 0; - - for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { - - if (!CMSG_OK(msg, cmsg)) - return -EINVAL; - - if (cmsg->cmsg_level != SOL_DCCP) - continue; - - if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && - !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) - return -EINVAL; - - switch (cmsg->cmsg_type) { - case DCCP_SCM_PRIORITY: - if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) - return -EINVAL; - skb->priority = *(__u32 *)CMSG_DATA(cmsg); - break; - default: - return -EINVAL; - } - } - return 0; -} - int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -767,7 +714,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - if (dccp_qpolicy_full(sk)) { + if (sysctl_dccp_tx_qlen && + (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { rc = -EAGAIN; goto out_release; } @@ -795,12 +743,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - rc = dccp_msghdr_parse(msg, skb); - if (rc != 0) - goto out_discard; - - dccp_qpolicy_push(sk, skb); - dccp_write_xmit(sk); + skb_queue_tail(&sk->sk_write_queue, skb); + dccp_write_xmit(sk,0); out_release: release_sock(sk); return rc ? : len; @@ -1023,22 +967,9 @@ void dccp_close(struct sock *sk, long timeout) /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (sk->sk_state != DCCP_CLOSED) { - /* - * Normal connection termination. May need to wait if there are - * still packets in the TX queue that are delayed by the CCID. - */ - dccp_flush_write_queue(sk, &timeout); dccp_terminate_connection(sk); } - /* - * Flush write queue. This may be necessary in several cases: - * - we have been closed by the peer but still have application data; - * - abortive termination (unread data or zero linger time), - * - normal termination but queue could not be flushed within time limit - */ - __skb_queue_purge(&sk->sk_write_queue); - sk_stream_wait_close(sk, timeout); adjudge_to_death: diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c deleted file mode 100644 index 27383f88c75..00000000000 --- a/net/dccp/qpolicy.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * net/dccp/qpolicy.c - * - * Policy-based packet dequeueing interface for DCCP. - * - * Copyright (c) 2008 Tomasz Grobelny - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License v2 - * as published by the Free Software Foundation. - */ -#include "dccp.h" - -/* - * Simple Dequeueing Policy: - * If tx_qlen is different from 0, enqueue up to tx_qlen elements. - */ -static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) -{ - skb_queue_tail(&sk->sk_write_queue, skb); -} - -static bool qpolicy_simple_full(struct sock *sk) -{ - return dccp_sk(sk)->dccps_tx_qlen && - sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; -} - -static struct sk_buff *qpolicy_simple_top(struct sock *sk) -{ - return skb_peek(&sk->sk_write_queue); -} - -/* - * Priority-based Dequeueing Policy: - * If tx_qlen is different from 0 and the queue has reached its upper bound - * of tx_qlen elements, replace older packets lowest-priority-first. - */ -static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) -{ - struct sk_buff *skb, *best = NULL; - - skb_queue_walk(&sk->sk_write_queue, skb) - if (best == NULL || skb->priority > best->priority) - best = skb; - return best; -} - -static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) -{ - struct sk_buff *skb, *worst = NULL; - - skb_queue_walk(&sk->sk_write_queue, skb) - if (worst == NULL || skb->priority < worst->priority) - worst = skb; - return worst; -} - -static bool qpolicy_prio_full(struct sock *sk) -{ - if (qpolicy_simple_full(sk)) - dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); - return false; -} - -/** - * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface - * @push: add a new @skb to the write queue - * @full: indicates that no more packets will be admitted - * @top: peeks at whatever the queueing policy defines as its `top' - */ -static struct dccp_qpolicy_operations { - void (*push) (struct sock *sk, struct sk_buff *skb); - bool (*full) (struct sock *sk); - struct sk_buff* (*top) (struct sock *sk); - __be32 params; - -} qpol_table[DCCPQ_POLICY_MAX] = { - [DCCPQ_POLICY_SIMPLE] = { - .push = qpolicy_simple_push, - .full = qpolicy_simple_full, - .top = qpolicy_simple_top, - .params = 0, - }, - [DCCPQ_POLICY_PRIO] = { - .push = qpolicy_simple_push, - .full = qpolicy_prio_full, - .top = qpolicy_prio_best_skb, - .params = DCCP_SCM_PRIORITY, - }, -}; - -/* - * Externally visible interface - */ -void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) -{ - qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); -} - -bool dccp_qpolicy_full(struct sock *sk) -{ - return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); -} - -void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) -{ - if (skb != NULL) { - skb_unlink(skb, &sk->sk_write_queue); - kfree_skb(skb); - } -} - -struct sk_buff *dccp_qpolicy_top(struct sock *sk) -{ - return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); -} - -struct sk_buff *dccp_qpolicy_pop(struct sock *sk) -{ - struct sk_buff *skb = dccp_qpolicy_top(sk); - - /* Clear any skb fields that we used internally */ - skb->priority = 0; - - if (skb) - skb_unlink(skb, &sk->sk_write_queue); - return skb; -} - -bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param) -{ - /* check if exactly one bit is set */ - if (!param || (param & (param - 1))) - return false; - return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param; -} diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index a5a1856234e..21295993fdb 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -18,72 +18,76 @@ #error This file should not be compiled without CONFIG_SYSCTL defined #endif -/* Boundary values */ -static int zero = 0, - u8_max = 0xFF; -static unsigned long seqw_min = 32; - static struct ctl_table dccp_default_table[] = { { .procname = "seq_window", - .data = &sysctl_dccp_sequence_window, - .maxlen = sizeof(sysctl_dccp_sequence_window), + .data = &sysctl_dccp_feat_sequence_window, + .maxlen = sizeof(sysctl_dccp_feat_sequence_window), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ + .proc_handler = proc_dointvec, }, { .procname = "rx_ccid", - .data = &sysctl_dccp_rx_ccid, - .maxlen = sizeof(sysctl_dccp_rx_ccid), + .data = &sysctl_dccp_feat_rx_ccid, + .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &u8_max, /* RFC 4340, 10. */ + .proc_handler = proc_dointvec, }, { .procname = "tx_ccid", - .data = &sysctl_dccp_tx_ccid, - .maxlen = sizeof(sysctl_dccp_tx_ccid), + .data = &sysctl_dccp_feat_tx_ccid, + .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ack_ratio", + .data = &sysctl_dccp_feat_ack_ratio, + .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "send_ackvec", + .data = &sysctl_dccp_feat_send_ack_vector, + .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "send_ndp", + .data = &sysctl_dccp_feat_send_ndp_count, + .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &u8_max, /* RFC 4340, 10. */ + .proc_handler = proc_dointvec, }, { .procname = "request_retries", .data = &sysctl_dccp_request_retries, .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &u8_max, + .proc_handler = proc_dointvec, }, { .procname = "retries1", .data = &sysctl_dccp_retries1, .maxlen = sizeof(sysctl_dccp_retries1), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &u8_max, + .proc_handler = proc_dointvec, }, { .procname = "retries2", .data = &sysctl_dccp_retries2, .maxlen = sizeof(sysctl_dccp_retries2), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &u8_max, + .proc_handler = proc_dointvec, }, { .procname = "tx_qlen", .data = &sysctl_dccp_tx_qlen, .maxlen = sizeof(sysctl_dccp_tx_qlen), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .proc_handler = proc_dointvec, }, { .procname = "sync_ratelimit", diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 16359e29e7f..54b3c7e9e01 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -87,6 +87,17 @@ static void dccp_retransmit_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + /* retransmit timer is used for feature negotiation throughout + * connection. In this case, no packet is re-transmitted, but rather an + * ack is generated and pending changes are placed into its options. + */ + if (sk->sk_send_head == NULL) { + dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk); + if (sk->sk_state == DCCP_OPEN) + dccp_send_ack(sk); + goto backoff; + } + /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. @@ -115,6 +126,7 @@ static void dccp_retransmit_timer(struct sock *sk) return; } +backoff: icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); @@ -237,35 +249,32 @@ out: sock_put(sk); } -/** - * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface - * See the comments above %ccid_dequeueing_decision for supported modes. - */ -static void dccp_write_xmitlet(unsigned long data) +/* Transmit-delay timer: used by the CCIDs to delay actual send time */ +static void dccp_write_xmit_timer(unsigned long data) { struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) - sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); + sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); else - dccp_write_xmit(sk); + dccp_write_xmit(sk, 0); bh_unlock_sock(sk); + sock_put(sk); } -static void dccp_write_xmit_timer(unsigned long data) +static void dccp_init_write_xmit_timer(struct sock *sk) { - dccp_write_xmitlet(data); - sock_put((struct sock *)data); + struct dccp_sock *dp = dccp_sk(sk); + + setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, + (unsigned long)sk); } void dccp_init_xmit_timers(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - - tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); - setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, - (unsigned long)sk); + dccp_init_write_xmit_timer(sk); inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } @@ -281,7 +290,8 @@ u32 dccp_timestamp(void) { s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); - return div_u64(delta, DCCP_TIME_RESOLUTION); + do_div(delta, 10); + return delta; } EXPORT_SYMBOL_GPL(dccp_timestamp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9da9f19ece8..f79a5160729 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -811,12 +811,25 @@ void tcp_update_metrics(struct sock *sk) } } +/* Numbers are taken from RFC3390. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than use a multiplier in the relevant range. + */ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); - if (!cwnd) - cwnd = rfc3390_bytes_to_packets(tp->mss_cache); + if (!cwnd) { + if (tp->mss_cache > 1460) + cwnd = 2; + else + cwnd = (tp->mss_cache > 1095) ? 3 : 4; + } return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -- cgit v1.2.3-70-g09d2 From abb81c4f3cb9b8d421f1e5474811ef1d461d341c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Sep 2008 19:58:29 -0700 Subject: ipsec: Use RCU-like construct for saved state within a walk Now that we save states within a walk we need synchronisation so that the list the saved state is on doesn't disappear from under us. As it stands this is done by keeping the state on the list which is bad because it gets in the way of the management of the state life-cycle. An alternative is to make our own pseudo-RCU system where we use counters to indicate which state can't be freed immediately as it may be referenced by an ongoing walk when that resumes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 15 +++++---------- net/xfrm/xfrm_state.c | 52 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2933d7474a7..4bb94992b5f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -120,9 +120,11 @@ extern struct mutex xfrm_cfg_mutex; /* Full description of state of transformer. */ struct xfrm_state { - /* Note: bydst is re-used during gc */ struct list_head all; - struct hlist_node bydst; + union { + struct list_head gclist; + struct hlist_node bydst; + }; struct hlist_node bysrc; struct hlist_node byspi; @@ -1286,16 +1288,9 @@ static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) walk->count = 0; } -static inline void xfrm_state_walk_done(struct xfrm_state_walk *walk) -{ - if (walk->state != NULL) { - xfrm_state_put(walk->state); - walk->state = NULL; - } -} - extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); +extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); extern struct xfrm_state *xfrm_state_alloc(void); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0a8f09c3144..aaafcee02fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -59,6 +59,11 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; +/* Counter indicating ongoing walk, protected by xfrm_state_lock. */ +static unsigned long xfrm_state_walk_ongoing; +/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ +static unsigned long xfrm_state_walk_completed; + static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -191,7 +196,8 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); +static LIST_HEAD(xfrm_state_gc_leftovers); +static LIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -403,17 +409,22 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *data) { - struct xfrm_state *x; - struct hlist_node *entry, *tmp; - struct hlist_head gc_list; + struct xfrm_state *x, *tmp; + unsigned long completed; + mutex_lock(&xfrm_cfg_mutex); spin_lock_bh(&xfrm_state_gc_lock); - gc_list.first = xfrm_state_gc_list.first; - INIT_HLIST_HEAD(&xfrm_state_gc_list); + list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) + completed = xfrm_state_walk_completed; + mutex_unlock(&xfrm_cfg_mutex); + + list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) { + if ((long)(x->lastused - completed) > 0) + break; xfrm_state_gc_destroy(x); + } wake_up(&km_waitq); } @@ -540,12 +551,8 @@ void __xfrm_state_destroy(struct xfrm_state *x) { WARN_ON(x->km.state != XFRM_STATE_DEAD); - spin_lock_bh(&xfrm_state_lock); - list_del(&x->all); - spin_unlock_bh(&xfrm_state_lock); - spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->bydst, &xfrm_state_gc_list); + list_add_tail(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -558,6 +565,8 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); + x->lastused = xfrm_state_walk_ongoing; + list_del_rcu(&x->all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -1574,6 +1583,7 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, if (err) { xfrm_state_hold(last); walk->state = last; + xfrm_state_walk_ongoing++; goto out; } } @@ -1588,12 +1598,28 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) + if (old != NULL) { xfrm_state_put(old); + xfrm_state_walk_completed++; + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); + } return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_done(struct xfrm_state_walk *walk) +{ + if (walk->state != NULL) { + xfrm_state_put(walk->state); + walk->state = NULL; + xfrm_state_walk_completed++; + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); + } +} +EXPORT_SYMBOL(xfrm_state_walk_done); + void xfrm_replay_notify(struct xfrm_state *x, int event) { -- cgit v1.2.3-70-g09d2 From fe3fa827314b877486c515a001c3e6f604f6f16f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Sep 2008 11:05:09 +0200 Subject: mac80211: make conf_tx non-atomic The conf_tx callback currently needs to be atomic, this requirement is just because it can be called from scanning. This rearranges it slightly to only update while not scanning (which is fine, we'll be getting beacons when associated) and thus removes the atomic requirement. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- net/mac80211/mlme.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7c399a9c11d..fb9e62211c3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1142,7 +1142,7 @@ enum ieee80211_ampdu_mlme_action { * of assocaited station or AP. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), - * bursting) for a hardware TX queue. Must be atomic. + * bursting) for a hardware TX queue. * * @get_tx_stats: Get statistics of the current TX queue status. This is used * to get number of currently queued packets (queue length), maximum queue diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0abd5a4fe38..a03245255ed 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2872,15 +2872,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; - ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, - elems.wmm_param_len); - /* Do not send changes to driver if we are scanning. This removes - * requirement that driver's bss_info_changed function needs to be - * atomic. */ + * requirement that a driver's bss_info_changed/conf_tx functions + * need to be atomic. + * This is really ugly code, we should rewrite scanning and make + * all this more understandable for humans. + */ if (local->sta_sw_scanning || local->sta_hw_scanning) return; + ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, + elems.wmm_param_len); + if (elems.erp_info && elems.erp_info_len >= 1) changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]); else { -- cgit v1.2.3-70-g09d2 From 1045b03e07d85f3545118510a587035536030c1c Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 11 Sep 2008 19:05:29 -0700 Subject: netlink: fix overrun in attribute iteration kmemcheck reported this: kmemcheck: Caught 16-bit read from uninitialized memory (f6c1ba30) 0500110001508abf050010000500000002017300140000006f72672e66726565 i i i i i i i i i i i i i u u u u u u u u u u u u u u u u u u u ^ Pid: 3462, comm: wpa_supplicant Not tainted (2.6.27-rc3-00054-g6397ab9-dirty #13) EIP: 0060:[] EFLAGS: 00010296 CPU: 0 EIP is at nla_parse+0x5a/0xf0 EAX: 00000008 EBX: fffffffd ECX: c06f16c0 EDX: 00000005 ESI: 00000010 EDI: f6c1ba30 EBP: f6367c6c ESP: c0a11e88 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 8005003b CR2: f781cc84 CR3: 3632f000 CR4: 000006d0 DR0: c0ead9bc DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff4ff0 DR7: 00000400 [] rtnl_setlink+0x63/0x130 [] rtnetlink_rcv_msg+0x165/0x200 [] netlink_rcv_skb+0x76/0xa0 [] rtnetlink_rcv+0x1e/0x30 [] netlink_unicast+0x281/0x290 [] netlink_sendmsg+0x1b9/0x2b0 [] sock_sendmsg+0xd2/0x100 [] sys_sendto+0xa5/0xd0 [] sys_send+0x36/0x40 [] sys_socketcall+0x1e6/0x2c0 [] sysenter_do_call+0x12/0x3f [] 0xffffffff This is the line in nla_ok(): /** * nla_ok - check if the netlink attribute fits into the remaining bytes * @nla: netlink attribute * @remaining: number of bytes remaining in attribute stream */ static inline int nla_ok(const struct nlattr *nla, int remaining) { return remaining >= sizeof(*nla) && nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } It turns out that remaining can become negative due to alignment in nla_next(). But GCC promotes "remaining" to unsigned in the test against sizeof(*nla) above. Therefore the test succeeds, and the nla_for_each_attr() may access memory outside the received buffer. A short example illustrating this point is here: #include main(void) { printf("%d\n", -1 >= sizeof(int)); } ...which prints "1". This patch adds a cast in front of the sizeof so that GCC will make a signed comparison and fix the illegal memory dereference. With the patch applied, there is no kmemcheck report. Signed-off-by: Vegard Nossum Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 18024b8cecb..208fe5a3854 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -702,7 +702,7 @@ static inline int nla_len(const struct nlattr *nla) */ static inline int nla_ok(const struct nlattr *nla, int remaining) { - return remaining >= sizeof(*nla) && + return remaining >= (int) sizeof(*nla) && nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } -- cgit v1.2.3-70-g09d2 From ca9b0e27e072be4cef2f5f0cbc0b0fd94eae3520 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2008 16:30:20 -0700 Subject: pkt_action: add new action skbedit This new action will have the ability to change the priority and/or queue_mapping fields on an sk_buff. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- Documentation/networking/multiqueue.txt | 9 +- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_skbedit.h | 44 +++++++ include/net/tc_act/tc_skbedit.h | 34 ++++++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/act_skbedit.c | 203 ++++++++++++++++++++++++++++++++ 7 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 include/linux/tc_act/tc_skbedit.h create mode 100644 include/net/tc_act/tc_skbedit.h create mode 100644 net/sched/act_skbedit.c (limited to 'include/net') diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt index 5787ee6eca4..10113ffa807 100644 --- a/Documentation/networking/multiqueue.txt +++ b/Documentation/networking/multiqueue.txt @@ -66,7 +66,14 @@ band 3 => queue 3 Traffic will begin flowing through each queue if your base device has either the default simple_tx_hash or a custom netdev->select_queue() defined. -The behavior of tc filters remains the same. +The behavior of tc filters remains the same. However a new tc action, +skbedit, has been added. Assuming you wanted to route all traffic to a +specific host, for example 192.168.0.3, though a specific queue you could use +this action and establish a filter such as: + +tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \ + match ip dst 192.168.0.3 \ + action skbedit queue_mapping 3 Author: Alexander Duyck Original Author: Peter P. Waskiewicz Jr. diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 6dac0d7365c..76990937f4c 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -3,3 +3,4 @@ header-y += tc_ipt.h header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h +header-y += tc_skbedit.h diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h new file mode 100644 index 00000000000..a14e461a7af --- /dev/null +++ b/include/linux/tc_act/tc_skbedit.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#ifndef __LINUX_TC_SKBEDIT_H +#define __LINUX_TC_SKBEDIT_H + +#include + +#define TCA_ACT_SKBEDIT 11 + +#define SKBEDIT_F_PRIORITY 0x1 +#define SKBEDIT_F_QUEUE_MAPPING 0x2 + +struct tc_skbedit { + tc_gen; +}; + +enum { + TCA_SKBEDIT_UNSPEC, + TCA_SKBEDIT_TM, + TCA_SKBEDIT_PARMS, + TCA_SKBEDIT_PRIORITY, + TCA_SKBEDIT_QUEUE_MAPPING, + __TCA_SKBEDIT_MAX +}; +#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) + +#endif diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h new file mode 100644 index 00000000000..6abb3ed3ebf --- /dev/null +++ b/include/net/tc_act/tc_skbedit.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#ifndef __NET_TC_SKBEDIT_H +#define __NET_TC_SKBEDIT_H + +#include + +struct tcf_skbedit { + struct tcf_common common; + u32 flags; + u32 priority; + u16 queue_mapping; +}; +#define to_skbedit(pc) \ + container_of(pc, struct tcf_skbedit, common) + +#endif /* __NET_TC_SKBEDIT_H */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index efaa7a75e7f..6767e54155d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -485,6 +485,17 @@ config NET_ACT_SIMP To compile this code as a module, choose M here: the module will be called simple. +config NET_ACT_SKBEDIT + tristate "SKB Editing" + depends on NET_CLS_ACT + ---help--- + Say Y here to change skb priority or queue_mapping settings. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called skbedit. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 3d9b953f7f6..e60c9925b26 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o +obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c new file mode 100644 index 00000000000..fe9777e77f3 --- /dev/null +++ b/net/sched/act_skbedit.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SKBEDIT_TAB_MASK 15 +static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1]; +static u32 skbedit_idx_gen; +static DEFINE_RWLOCK(skbedit_lock); + +static struct tcf_hashinfo skbedit_hash_info = { + .htab = tcf_skbedit_ht, + .hmask = SKBEDIT_TAB_MASK, + .lock = &skbedit_lock, +}; + +static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbedit *d = a->priv; + + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); + d->tcf_bstats.packets++; + + if (d->flags & SKBEDIT_F_PRIORITY) + skb->priority = d->priority; + if (d->flags & SKBEDIT_F_QUEUE_MAPPING && + skb->dev->real_num_tx_queues > d->queue_mapping) + skb_set_queue_mapping(skb, d->queue_mapping); + + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { + [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, + [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, +}; + +static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tc_skbedit *parm; + struct tcf_skbedit *d; + struct tcf_common *pc; + u32 flags = 0, *priority = NULL; + u16 *queue_mapping = NULL; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy); + if (err < 0) + return err; + + if (tb[TCA_SKBEDIT_PARMS] == NULL) + return -EINVAL; + + if (tb[TCA_SKBEDIT_PRIORITY] != NULL) { + flags |= SKBEDIT_F_PRIORITY; + priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]); + } + + if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { + flags |= SKBEDIT_F_QUEUE_MAPPING; + queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); + } + if (!flags) + return -EINVAL; + + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &skbedit_idx_gen, &skbedit_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_skbedit(pc); + ret = ACT_P_CREATED; + } else { + d = to_skbedit(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &skbedit_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&d->tcf_lock); + + d->flags = flags; + if (flags & SKBEDIT_F_PRIORITY) + d->priority = *priority; + if (flags & SKBEDIT_F_QUEUE_MAPPING) + d->queue_mapping = *queue_mapping; + d->tcf_action = parm->action; + + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &skbedit_hash_info); + return ret; +} + +static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbedit *d = a->priv; + + if (d) + return tcf_hash_release(&d->common, bind, &skbedit_hash_info); + return 0; +} + +static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbedit *d = a->priv; + struct tc_skbedit opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); + if (d->flags & SKBEDIT_F_PRIORITY) + NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), + &d->priority); + if (d->flags & SKBEDIT_F_QUEUE_MAPPING) + NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING, + sizeof(d->queue_mapping), &d->queue_mapping); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t); + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_skbedit_ops = { + .kind = "skbedit", + .hinfo = &skbedit_hash_info, + .type = TCA_ACT_SKBEDIT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_skbedit, + .dump = tcf_skbedit_dump, + .cleanup = tcf_skbedit_cleanup, + .init = tcf_skbedit_init, + .walk = tcf_generic_walker, +}; + +MODULE_AUTHOR("Alexander Duyck, "); +MODULE_DESCRIPTION("SKB Editing"); +MODULE_LICENSE("GPL"); + +static int __init skbedit_init_module(void) +{ + return tcf_register_action(&act_skbedit_ops); +} + +static void __exit skbedit_cleanup_module(void) +{ + tcf_unregister_action(&act_skbedit_ops); +} + +module_init(skbedit_init_module); +module_exit(skbedit_cleanup_module); -- cgit v1.2.3-70-g09d2 From b2e1b30290539b344cbaff0d9da38012e03aa347 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 9 Sep 2008 23:19:48 -0700 Subject: cfg80211: Add new wireless regulatory infrastructure This adds the new wireless regulatory infrastructure. The main motiviation behind this was to centralize regulatory code as each driver was implementing their own regulatory solution, and to replace the initial centralized code we have where: * only 3 regulatory domains are supported: US, JP and EU * regulatory domains can only be changed through module parameter * all rules were built statically in the kernel We now have support for regulatory domains for many countries and regulatory domains are now queried through a userspace agent through udev allowing distributions to update regulatory rules without updating the kernel. Each driver can regulatory_hint() a regulatory domain based on either their EEPROM mapped regulatory domain value to a respective ISO/IEC 3166-1 country code or pass an internally built regulatory domain. We also add support to let the user set the regulatory domain through userspace in case of faulty EEPROMs to further help compliance. Support for world roaming will be added soon for cards capable of this. For more information see: http://wireless.kernel.org/en/developers/Regulatory/CRDA For now we leave an option to enable the old module parameter, ieee80211_regdom, and to build the 3 old regdomains statically (US, JP and EU). This option is CONFIG_WIRELESS_OLD_REGULATORY. These old static definitions and the module parameter is being scheduled for removal for 2.6.29. Note that if you use this you won't make use of a world regulatory domain as its pointless. If you leave this option enabled and if CRDA is present and you use US or JP we will try to ask CRDA to update us a regulatory domain for us. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- Documentation/feature-removal-schedule.txt | 18 + Documentation/networking/regulatory.txt | 194 +++++++ include/linux/nl80211.h | 96 +++- include/net/cfg80211.h | 60 +++ include/net/mac80211.h | 2 + include/net/wireless.h | 58 +++ net/mac80211/cfg.c | 7 + net/wireless/Kconfig | 32 ++ net/wireless/core.c | 162 +++++- net/wireless/core.h | 2 +- net/wireless/nl80211.c | 151 ++++++ net/wireless/reg.c | 805 +++++++++++++++++++++++++---- net/wireless/reg.h | 44 ++ 13 files changed, 1513 insertions(+), 118 deletions(-) create mode 100644 Documentation/networking/regulatory.txt create mode 100644 net/wireless/reg.h (limited to 'include/net') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index eb1a47b9742..c93fcdec246 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,24 @@ be removed from this file. --------------------------- +What: old static regulatory information and ieee80211_regdom module parameter +When: 2.6.29 +Why: The old regulatory infrastructure has been replaced with a new one + which does not require statically defined regulatory domains. We do + not want to keep static regulatory domains in the kernel due to the + the dynamic nature of regulatory law and localization. We kept around + the old static definitions for the regulatory domains of: + * US + * JP + * EU + and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was + set. We also kept around the ieee80211_regdom module parameter in case + some applications were relying on it. Changing regulatory domains + can now be done instead by using nl80211, as is done with iw. +Who: Luis R. Rodriguez + +--------------------------- + What: dev->power.power_state When: July 2007 Why: Broken design for runtime control over driver power states, confusing diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt new file mode 100644 index 00000000000..a96989a8ff3 --- /dev/null +++ b/Documentation/networking/regulatory.txt @@ -0,0 +1,194 @@ +Linux wireless regulatory documentation +--------------------------------------- + +This document gives a brief review over how the Linux wireless +regulatory infrastructure works. + +More up to date information can be obtained at the project's web page: + +http://wireless.kernel.org/en/developers/Regulatory + +Keeping regulatory domains in userspace +--------------------------------------- + +Due to the dynamic nature of regulatory domains we keep them +in userspace and provide a framework for userspace to upload +to the kernel one regulatory domain to be used as the central +core regulatory domain all wireless devices should adhere to. + +How to get regulatory domains to the kernel +------------------------------------------- + +Userspace gets a regulatory domain in the kernel by having +a userspace agent build it and send it via nl80211. Only +expected regulatory domains will be respected by the kernel. + +A currently available userspace agent which can accomplish this +is CRDA - central regulatory domain agent. Its documented here: + +http://wireless.kernel.org/en/developers/Regulatory/CRDA + +Essentially the kernel will send a udev event when it knows +it needs a new regulatory domain. A udev rule can be put in place +to trigger crda to send the respective regulatory domain for a +specific ISO/IEC 3166 alpha2. + +Below is an example udev rule which can be used: + +# Example file, should be put in /etc/udev/rules.d/regulatory.rules +KERNEL=="regulatory*", ACTION=="change", SUBSYSTEM=="platform", RUN+="/sbin/crda" + +The alpha2 is passed as an environment variable under the variable COUNTRY. + +Who asks for regulatory domains? +-------------------------------- + +* Users + +Users can use iw: + +http://wireless.kernel.org/en/users/Documentation/iw + +An example: + + # set regulatory domain to "Costa Rica" + iw reg set CR + +This will request the kernel to set the regulatory domain to +the specificied alpha2. The kernel in turn will then ask userspace +to provide a regulatory domain for the alpha2 specified by the user +by sending a uevent. + +* Wireless subsystems for Country Information elements + +The kernel will send a uevent to inform userspace a new +regulatory domain is required. More on this to be added +as its integration is added. + +* Drivers + +If drivers determine they need a specific regulatory domain +set they can inform the wireless core using regulatory_hint(). +They have two options -- they either provide an alpha2 so that +crda can provide back a regulatory domain for that country or +they can build their own regulatory domain based on internal +custom knowledge so the wireless core can respect it. + +*Most* drivers will rely on the first mechanism of providing a +regulatory hint with an alpha2. For these drivers there is an additional +check that can be used to ensure compliance based on custom EEPROM +regulatory data. This additional check can be used by drivers by +registering on its struct wiphy a reg_notifier() callback. This notifier +is called when the core's regulatory domain has been changed. The driver +can use this to review the changes made and also review who made them +(driver, user, country IE) and determine what to allow based on its +internal EEPROM data. Devices drivers wishing to be capable of world +roaming should use this callback. More on world roaming will be +added to this document when its support is enabled. + +Device drivers who provide their own built regulatory domain +do not need a callback as the channels registered by them are +the only ones that will be allowed and therefore *additional* +cannels cannot be enabled. + +Example code - drivers hinting an alpha2: +------------------------------------------ + +This example comes from the zd1211rw device driver. You can start +by having a mapping of your device's EEPROM country/regulatory +domain value to to a specific alpha2 as follows: + +static struct zd_reg_alpha2_map reg_alpha2_map[] = { + { ZD_REGDOMAIN_FCC, "US" }, + { ZD_REGDOMAIN_IC, "CA" }, + { ZD_REGDOMAIN_ETSI, "DE" }, /* Generic ETSI, use most restrictive */ + { ZD_REGDOMAIN_JAPAN, "JP" }, + { ZD_REGDOMAIN_JAPAN_ADD, "JP" }, + { ZD_REGDOMAIN_SPAIN, "ES" }, + { ZD_REGDOMAIN_FRANCE, "FR" }, + +Then you can define a routine to map your read EEPROM value to an alpha2, +as follows: + +static int zd_reg2alpha2(u8 regdomain, char *alpha2) +{ + unsigned int i; + struct zd_reg_alpha2_map *reg_map; + for (i = 0; i < ARRAY_SIZE(reg_alpha2_map); i++) { + reg_map = ®_alpha2_map[i]; + if (regdomain == reg_map->reg) { + alpha2[0] = reg_map->alpha2[0]; + alpha2[1] = reg_map->alpha2[1]; + return 0; + } + } + return 1; +} + +Lastly, you can then hint to the core of your discovered alpha2, if a match +was found. You need to do this after you have registered your wiphy. You +are expected to do this during initialization. + + r = zd_reg2alpha2(mac->regdomain, alpha2); + if (!r) + regulatory_hint(hw->wiphy, alpha2, NULL); + +Example code - drivers providing a built in regulatory domain: +-------------------------------------------------------------- + +If you have regulatory information you can obtain from your +driver and you *need* to use this we let you build a regulatory domain +structure and pass it to the wireless core. To do this you should +kmalloc() a structure big enough to hold your regulatory domain +structure and you should then fill it with your data. Finally you simply +call regulatory_hint() with the regulatory domain structure in it. + +Bellow is a simple example, with a regulatory domain cached using the stack. +Your implementation may vary (read EEPROM cache instead, for example). + +Example cache of some regulatory domain + +struct ieee80211_regdomain mydriver_jp_regdom = { + .n_reg_rules = 3, + .alpha2 = "JP", + //.alpha2 = "99", /* If I have no alpha2 to map it to */ + .reg_rules = { + /* IEEE 802.11b/g, channels 1..14 */ + REG_RULE(2412-20, 2484+20, 40, 6, 20, 0), + /* IEEE 802.11a, channels 34..48 */ + REG_RULE(5170-20, 5240+20, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 52..64 */ + REG_RULE(5260-20, 5320+20, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } +}; + +Then in some part of your code after your wiphy has been registered: + + int r; + struct ieee80211_regdomain *rd; + int size_of_regd; + int num_rules = mydriver_jp_regdom.n_reg_rules; + unsigned int i; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + + memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain)); + + for (i=0; i < num_rules; i++) { + memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i], + sizeof(struct ieee80211_reg_rule)); + } + r = regulatory_hint(hw->wiphy, NULL, rd); + if (r) { + kfree(rd); + return r; + } + diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5e51f4e7600..9bad65400fb 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -92,6 +92,20 @@ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by * %NL80211_ATTR_IFINDEX. * + * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command + * after being queried by the kernel. CRDA replies by sending a regulatory + * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our + * current alpha2 if it found a match. It also provides + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain + * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -131,7 +145,10 @@ enum nl80211_commands { NL80211_CMD_SET_BSS, - /* add commands here */ + NL80211_CMD_SET_REG, + NL80211_CMD_REQ_SET_REG, + + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ __NL80211_CMD_AFTER_LAST, @@ -197,10 +214,21 @@ enum nl80211_commands { * info given for %NL80211_CMD_GET_MPATH, nested attribute described at * &enum nl80211_mpath_info. * - * * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of * &enum nl80211_mntr_flags. * + * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11d country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. + * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory + * rules. + * * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled * (u8, 0 or 1) @@ -265,6 +293,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORTED_IFTYPES, + NL80211_ATTR_REG_ALPHA2, + NL80211_ATTR_REG_RULES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -278,6 +309,7 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_REG_RULES 32 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 @@ -472,6 +504,66 @@ enum nl80211_bitrate_attr { NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_reg_rule_attr - regulatory rule attributes + * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. + * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this + * frequency range, in KHz. + * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. + * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for + * a given frequency range. The value is in mBm (100 * dBm). + */ +enum nl80211_reg_rule_attr { + __NL80211_REG_RULE_ATTR_INVALID, + NL80211_ATTR_REG_RULE_FLAGS, + + NL80211_ATTR_FREQ_RANGE_START, + NL80211_ATTR_FREQ_RANGE_END, + NL80211_ATTR_FREQ_RANGE_MAX_BW, + + NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + NL80211_ATTR_POWER_RULE_MAX_EIRP, + + /* keep last */ + __NL80211_REG_RULE_ATTR_AFTER_LAST, + NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_reg_rule_flags - regulatory rule flags + * + * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed + * @NL80211_RRF_NO_CCK: CCK modulation not allowed + * @NL80211_RRF_NO_INDOOR: indoor operation not allowed + * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed + * @NL80211_RRF_DFS: DFS support is required to be used + * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links + * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links + * @NL80211_RRF_PASSIVE_SCAN: passive scan is required + * @NL80211_RRF_NO_IBSS: no IBSS is allowed + */ +enum nl80211_reg_rule_flags { + NL80211_RRF_NO_OFDM = 1<<0, + NL80211_RRF_NO_CCK = 1<<1, + NL80211_RRF_NO_INDOOR = 1<<2, + NL80211_RRF_NO_OUTDOOR = 1<<3, + NL80211_RRF_DFS = 1<<4, + NL80211_RRF_PTP_ONLY = 1<<5, + NL80211_RRF_PTMP_ONLY = 1<<6, + NL80211_RRF_PASSIVE_SCAN = 1<<7, + NL80211_RRF_NO_IBSS = 1<<8, +}; + /** * enum nl80211_mntr_flags - monitor configuration flags * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0a72d1e3d3a..9f40c4d417d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -287,6 +287,66 @@ struct bss_parameters { int use_short_slot_time; }; +/** + * enum reg_set_by - Indicates who is trying to set the regulatory domain + * @REGDOM_SET_BY_INIT: regulatory domain was set by initialization. We will be + * using a static world regulatory domain by default. + * @REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world regulatory domain. + * @REGDOM_SET_BY_USER: User asked the wireless core to set the + * regulatory domain. + * @REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the wireless core + * it thinks its knows the regulatory domain we should be in. + * @REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an 802.11 country + * information element with regulatory information it thinks we + * should consider. + */ +enum reg_set_by { + REGDOM_SET_BY_INIT, + REGDOM_SET_BY_CORE, + REGDOM_SET_BY_USER, + REGDOM_SET_BY_DRIVER, + REGDOM_SET_BY_COUNTRY_IE, +}; + +struct ieee80211_freq_range { + u32 start_freq_khz; + u32 end_freq_khz; + u32 max_bandwidth_khz; +}; + +struct ieee80211_power_rule { + u32 max_antenna_gain; + u32 max_eirp; +}; + +struct ieee80211_reg_rule { + struct ieee80211_freq_range freq_range; + struct ieee80211_power_rule power_rule; + u32 flags; +}; + +struct ieee80211_regdomain { + u32 n_reg_rules; + char alpha2[2]; + struct ieee80211_reg_rule reg_rules[]; +}; + +#define MHZ_TO_KHZ(freq) (freq * 1000) +#define KHZ_TO_MHZ(freq) (freq / 1000) +#define DBI_TO_MBI(gain) (gain * 100) +#define MBI_TO_DBI(gain) (gain / 100) +#define DBM_TO_MBM(gain) (gain * 100) +#define MBM_TO_DBM(gain) (gain / 100) + +#define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \ + .freq_range.start_freq_khz = (start) * 1000, \ + .freq_range.end_freq_khz = (end) * 1000, \ + .freq_range.max_bandwidth_khz = (bw) * 1000, \ + .power_rule.max_antenna_gain = (gain) * 100, \ + .power_rule.max_eirp = (eirp) * 100, \ + .flags = reg_flags, \ + } + /* from net/wireless.h */ struct wiphy; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fb9e62211c3..f504e3eca7d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -833,6 +833,8 @@ struct ieee80211_hw { s8 max_signal; }; +struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy); + /** * SET_IEEE80211_DEV - set device for 802.11 hardware * diff --git a/include/net/wireless.h b/include/net/wireless.h index 1dc8ec3daa2..e4378cc6bf8 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -60,6 +60,7 @@ enum ieee80211_channel_flags { * with cfg80211. * * @center_freq: center frequency in MHz + * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz * @hw_value: hardware-specific value for the channel * @flags: channel flags from &enum ieee80211_channel_flags. * @orig_flags: channel flags at registration time, used by regulatory @@ -73,6 +74,7 @@ enum ieee80211_channel_flags { struct ieee80211_channel { enum ieee80211_band band; u16 center_freq; + u8 max_bandwidth; u16 hw_value; u32 flags; int max_antenna_gain; @@ -178,6 +180,7 @@ struct ieee80211_supported_band { * struct wiphy - wireless hardware description * @idx: the wiphy index assigned to this item * @class_dev: the class device representing /sys/class/ieee80211/ + * @reg_notifier: the driver's regulatory notification callback */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -197,6 +200,9 @@ struct wiphy { struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; + /* Lets us get back the wiphy on the callback */ + int (*reg_notifier)(struct wiphy *wiphy, enum reg_set_by setby); + /* fields below are read-only, assigned by cfg80211 */ /* the item in /sys/class/ieee80211/ points to this, @@ -322,6 +328,58 @@ extern int ieee80211_frequency_to_channel(int freq); */ extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, int freq); +/** + * __regulatory_hint - hint to the wireless core a regulatory domain + * @wiphy: if a driver is providing the hint this is the driver's very + * own &struct wiphy + * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain + * should be in. If @rd is set this should be NULL + * @rd: a complete regulatory domain, if passed the caller need not worry + * about freeing it + * + * The Wireless subsystem can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by + * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory + * domain should be in or by providing a completely build regulatory domain. + * + * Returns -EALREADY if *a regulatory domain* has already been set. Note that + * this could be by another driver. It is safe for drivers to continue if + * -EALREADY is returned, if drivers are not capable of world roaming they + * should not register more channels than they support. Right now we only + * support listening to the first driver hint. If the driver is capable + * of world roaming but wants to respect its own EEPROM mappings for + * specific regulatory domains it should register the @reg_notifier callback + * on the &struct wiphy. Returns 0 if the hint went through fine or through an + * intersection operation. Otherwise a standard error code is returned. + * + */ +extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, struct ieee80211_regdomain *rd); +/** + * regulatory_hint - driver hint to the wireless core a regulatory domain + * @wiphy: the driver's very own &struct wiphy + * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain + * should be in. If @rd is set this should be NULL. Note that if you + * set this to NULL you should still set rd->alpha2 to some accepted + * alpha2. + * @rd: a complete regulatory domain provided by the driver. If passed + * the driver does not need to worry about freeing it. + * + * Wireless drivers can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by + * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory + * domain should be in or by providing a completely build regulatory domain. + * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried + * for a regulatory domain structure for the respective country. If + * a regulatory domain is build and passed you should set the alpha2 + * if possible, otherwise set it to the special value of "99" which tells + * the wireless core it is unknown. If you pass a built regulatory domain + * and we return non zero you are in charge of kfree()'ing the structure. + * + * See __regulatory_hint() documentation for possible return values. + */ +extern int regulatory_hint(struct wiphy *wiphy, + const char *alpha2, struct ieee80211_regdomain *rd); /** * ieee80211_get_channel - get channel struct from wiphy for specified frequency diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 928813ce08e..5a3bdaad6c1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -17,6 +17,13 @@ #include "rate.h" #include "mesh.h" +struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + return &local->hw; +} +EXPORT_SYMBOL(wiphy_to_hw); + static enum ieee80211_if_types nl80211_type_to_mac80211_type(enum nl80211_iftype type) { diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 833b024f8f6..b97bd9fe6b7 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -14,6 +14,38 @@ config NL80211 If unsure, say Y. +config WIRELESS_OLD_REGULATORY + bool "Old wireless static regulatory defintions" + default n + ---help--- + This option enables the old static regulatory information + and uses it within the new framework. This is available + temporarily as an option to help prevent immediate issues + due to the switch to the new regulatory framework which + does require a new userspace application which has the + database of regulatory information (CRDA) and another for + setting regulatory domains (iw). + + For more information see: + + http://wireless.kernel.org/en/developers/Regulatory/CRDA + http://wireless.kernel.org/en/users/Documentation/iw + + It is important to note though that if you *do* have CRDA present + and if this option is enabled CRDA *will* be called to update the + regulatory domain (for US and JP only). Support for letting the user + set the regulatory domain through iw is also supported. This option + mainly exists to leave around for a kernel release some old static + regulatory domains that were defined and to keep around the old + ieee80211_regdom module parameter. This is being phased out and you + should stop using them ASAP. + + Say N unless you cannot install a new userspace application + or have one currently depending on the ieee80211_regdom module + parameter and cannot port it to use the new userspace interfaces. + + This is scheduled for removal for 2.6.29. + config WIRELESS_EXT bool "Wireless extensions" default n diff --git a/net/wireless/core.c b/net/wireless/core.c index 7e995ac06a0..a910cd2d0fd 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -13,12 +13,14 @@ #include #include #include +#include #include #include #include #include "nl80211.h" #include "core.h" #include "sysfs.h" +#include "reg.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -27,6 +29,107 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); +struct list_head regulatory_requests; + +/* Central wireless core regulatory domains, we only need two, + * the current one and a world regulatory domain in case we have no + * information to give us an alpha2 */ +struct ieee80211_regdomain *cfg80211_regdomain; + +/* We keep a static world regulatory domain in case of the absence of CRDA */ +const struct ieee80211_regdomain world_regdom = { + .n_reg_rules = 1, + .alpha2 = "00", + .reg_rules = { + REG_RULE(2402, 2472, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), + } +}; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY +/* All this fucking static junk will be removed soon, so + * don't fucking count on it !@#$ */ + +static char *ieee80211_regdom = "US"; +module_param(ieee80211_regdom, charp, 0444); +MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); + +/* We assume 40 MHz bandwidth for the old regulatory work. + * We make emphasis we are using the exact same frequencies + * as before */ + +const struct ieee80211_regdomain us_regdom = { + .n_reg_rules = 6, + .alpha2 = "US", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..11 */ + REG_RULE(2412-20, 2462+20, 40, 6, 27, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-20, 5180+20, 40, 6, 23, 0), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-20, 5200+20, 40, 6, 23, 0), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-20, 5220+20, 40, 6, 23, 0), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-20, 5320+20, 40, 6, 23, 0), + /* IEEE 802.11a, channels 149..165, outdoor */ + REG_RULE(5745-20, 5825+20, 40, 6, 30, 0), + } +}; + +const struct ieee80211_regdomain jp_regdom = { + .n_reg_rules = 3, + .alpha2 = "JP", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..14 */ + REG_RULE(2412-20, 2484+20, 40, 6, 20, 0), + /* IEEE 802.11a, channels 34..48 */ + REG_RULE(5170-20, 5240+20, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 52..64 */ + REG_RULE(5260-20, 5320+20, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } +}; + +const struct ieee80211_regdomain eu_regdom = { + .n_reg_rules = 6, + /* This alpha2 is bogus, we leave it here just for stupid + * backward compatibility */ + .alpha2 = "EU", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..13 */ + REG_RULE(2412-20, 2472+20, 40, 6, 20, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-20, 5180+20, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-20, 5200+20, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-20, 5220+20, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-20, 5320+20, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + /* IEEE 802.11a, channels 100..140 */ + REG_RULE(5500-20, 5700+20, 40, 6, 30, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } +}; + +#endif + +struct ieee80211_regdomain *cfg80211_world_regdom = + (struct ieee80211_regdomain *) &world_regdom; + +LIST_HEAD(regulatory_requests); +DEFINE_MUTEX(cfg80211_reg_mutex); + /* RCU might be appropriate here since we usually * only read the list, and that can happen quite * often because we need to do it for each command */ @@ -302,7 +405,9 @@ int wiphy_register(struct wiphy *wiphy) ieee80211_set_bitrate_flags(wiphy); /* set up regulatory info */ - wiphy_update_regulatory(wiphy); + mutex_lock(&cfg80211_reg_mutex); + wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); + mutex_unlock(&cfg80211_reg_mutex); mutex_lock(&cfg80211_drv_mutex); @@ -409,9 +514,35 @@ static struct notifier_block cfg80211_netdev_notifier = { .notifier_call = cfg80211_netdev_notifier_call, }; +#ifdef CONFIG_WIRELESS_OLD_REGULATORY +const struct ieee80211_regdomain *static_regdom(char *alpha2) +{ + if (alpha2[0] == 'U' && alpha2[1] == 'S') + return &us_regdom; + if (alpha2[0] == 'J' && alpha2[1] == 'P') + return &jp_regdom; + if (alpha2[0] == 'E' && alpha2[1] == 'U') + return &eu_regdom; + /* Default, as per the old rules */ + return &us_regdom; +} +#endif + static int cfg80211_init(void) { - int err = wiphy_sysfs_init(); + int err; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + cfg80211_regdomain = + (struct ieee80211_regdomain *) static_regdom(ieee80211_regdom); + /* Used during reset_regdomains_static() */ + cfg80211_world_regdom = cfg80211_regdomain; +#else + cfg80211_regdomain = + (struct ieee80211_regdomain *) cfg80211_world_regdom; +#endif + + err = wiphy_sysfs_init(); if (err) goto out_fail_sysfs; @@ -425,8 +556,33 @@ static int cfg80211_init(void) ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL); + err = regulatory_init(); + if (err) + goto out_fail_reg; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + printk(KERN_INFO "cfg80211: Using old static regulatory domain:\n"); + print_regdomain_info(cfg80211_regdomain); + /* The old code still requests for a new regdomain and if + * you have CRDA you get it updated, otherwise you get + * stuck with the static values. We ignore "EU" code as + * that is not a valid ISO / IEC 3166 alpha2 */ + if (ieee80211_regdom[0] != 'E' && + ieee80211_regdom[1] != 'U') + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, + ieee80211_regdom, NULL); +#else + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); + if (err) + printk(KERN_ERR "cfg80211: calling CRDA failed - " + "unable to update world regulatory domain, " + "using static definition\n"); +#endif + return 0; +out_fail_reg: + debugfs_remove(ieee80211_debugfs_dir); out_fail_nl80211: unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: @@ -434,6 +590,7 @@ out_fail_notifier: out_fail_sysfs: return err; } + subsys_initcall(cfg80211_init); static void cfg80211_exit(void) @@ -442,5 +599,6 @@ static void cfg80211_exit(void) nl80211_exit(); unregister_netdevice_notifier(&cfg80211_netdev_notifier); wiphy_sysfs_exit(); + regulatory_exit(); } module_exit(cfg80211_exit); diff --git a/net/wireless/core.h b/net/wireless/core.h index 7a02c356d63..771cc5cc765 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -79,6 +79,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void wiphy_update_regulatory(struct wiphy *wiphy); +void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 77880ba8b61..1221d726ed5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -18,6 +18,7 @@ #include #include "core.h" #include "nl80211.h" +#include "reg.h" /* the netlink family */ static struct genl_family nl80211_fam = { @@ -88,6 +89,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, + [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, @@ -1599,6 +1603,141 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) return err; } +static const struct nla_policy + reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { + [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, +}; + +static int parse_reg_rule(struct nlattr *tb[], + struct ieee80211_reg_rule *reg_rule) +{ + struct ieee80211_freq_range *freq_range = ®_rule->freq_range; + struct ieee80211_power_rule *power_rule = ®_rule->power_rule; + + if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_START]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_END]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) + return -EINVAL; + if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) + return -EINVAL; + + reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); + + freq_range->start_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); + freq_range->end_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); + freq_range->max_bandwidth_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); + + power_rule->max_eirp = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); + + if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) + power_rule->max_antenna_gain = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); + + return 0; +} + +static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) +{ + int r; + char *data = NULL; + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + /* We ignore world regdom requests with the old regdom setup */ + if (is_world_regdom(data)) + return -EINVAL; +#endif + mutex_lock(&cfg80211_drv_mutex); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL); + mutex_unlock(&cfg80211_drv_mutex); + return r; +} + +static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; + struct nlattr *nl_reg_rule; + char *alpha2 = NULL; + int rem_reg_rules = 0, r = 0; + u32 num_rules = 0, rule_idx = 0, size_of_regd; + struct ieee80211_regdomain *rd = NULL; + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_REG_RULES]) + return -EINVAL; + + alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], + rem_reg_rules) { + num_rules++; + if (num_rules > NL80211_MAX_SUPP_REG_RULES) + goto bad_reg; + } + + if (!reg_is_valid_request(alpha2)) + return -EINVAL; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = alpha2[0]; + rd->alpha2[1] = alpha2[1]; + + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], + rem_reg_rules) { + nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, + nla_data(nl_reg_rule), nla_len(nl_reg_rule), + reg_rule_policy); + r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); + if (r) + goto bad_reg; + + rule_idx++; + + if (rule_idx > NL80211_MAX_SUPP_REG_RULES) + goto bad_reg; + } + + BUG_ON(rule_idx != num_rules); + + mutex_lock(&cfg80211_drv_mutex); + r = set_regdom(rd); + mutex_unlock(&cfg80211_drv_mutex); + if (r) + goto bad_reg; + + return r; + +bad_reg: + kfree(rd); + return -EINVAL; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -1736,6 +1875,18 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_REG, + .doit = nl80211_set_reg, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_REQ_SET_REG, + .doit = nl80211_req_set_reg, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 855bff4b325..592b2e391d4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2,179 +2,758 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2007 Johannes Berg + * Copyright 2008 Luis R. Rodriguez * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* - * This regulatory domain control implementation is highly incomplete, it - * only exists for the purpose of not regressing mac80211. - * - * For now, drivers can restrict the set of allowed channels by either - * not registering those channels or setting the IEEE80211_CHAN_DISABLED - * flag; that flag will only be *set* by this code, never *cleared. +/** + * DOC: Wireless regulatory infrastructure * * The usual implementation is for a driver to read a device EEPROM to * determine which regulatory domain it should be operating under, then * looking up the allowable channels in a driver-local table and finally * registering those channels in the wiphy structure. * - * Alternatively, drivers that trust the regulatory domain control here - * will register a complete set of capabilities and the control code - * will restrict the set by setting the IEEE80211_CHAN_* flags. + * Another set of compliance enforcement is for drivers to use their + * own compliance limits which can be stored on the EEPROM. The host + * driver or firmware may ensure these are used. + * + * In addition to all this we provide an extra layer of regulatory + * conformance. For drivers which do not have any regulatory + * information CRDA provides the complete regulatory solution. + * For others it provides a community effort on further restrictions + * to enhance compliance. + * + * Note: When number of rules --> infinity we will not be able to + * index on alpha2 any more, instead we'll probably have to + * rely on some SHA1 checksum of the regdomain for example. + * */ #include +#include +#include +#include +#include #include +#include #include "core.h" +#include "reg.h" -static char *ieee80211_regdom = "US"; -module_param(ieee80211_regdom, charp, 0444); -MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); +/* To trigger userspace events */ +static struct platform_device *reg_pdev; -struct ieee80211_channel_range { - short start_freq; - short end_freq; - int max_power; - int max_antenna_gain; - u32 flags; +/* Keep the ordering from large to small */ +static u32 supported_bandwidths[] = { + MHZ_TO_KHZ(40), + MHZ_TO_KHZ(20), }; -struct ieee80211_regdomain { - const char *code; - const struct ieee80211_channel_range *ranges; - int n_ranges; -}; +bool is_world_regdom(char *alpha2) +{ + if (!alpha2) + return false; + if (alpha2[0] == '0' && alpha2[1] == '0') + return true; + return false; +} -#define RANGE_PWR(_start, _end, _pwr, _ag, _flags) \ - { _start, _end, _pwr, _ag, _flags } +static bool is_alpha2_set(char *alpha2) +{ + if (!alpha2) + return false; + if (alpha2[0] != 0 && alpha2[1] != 0) + return true; + return false; +} +static bool is_alpha_upper(char letter) +{ + /* ASCII A - Z */ + if (letter >= 65 && letter <= 90) + return true; + return false; +} -/* - * Ideally, in the future, these definitions will be loaded from a - * userspace table via some daemon. - */ -static const struct ieee80211_channel_range ieee80211_US_channels[] = { - /* IEEE 802.11b/g, channels 1..11 */ - RANGE_PWR(2412, 2462, 27, 6, 0), - /* IEEE 802.11a, channel 36*/ - RANGE_PWR(5180, 5180, 23, 6, 0), - /* IEEE 802.11a, channel 40*/ - RANGE_PWR(5200, 5200, 23, 6, 0), - /* IEEE 802.11a, channel 44*/ - RANGE_PWR(5220, 5220, 23, 6, 0), - /* IEEE 802.11a, channels 48..64 */ - RANGE_PWR(5240, 5320, 23, 6, 0), - /* IEEE 802.11a, channels 149..165, outdoor */ - RANGE_PWR(5745, 5825, 30, 6, 0), -}; +static bool is_unknown_alpha2(char *alpha2) +{ + if (!alpha2) + return false; + /* Special case where regulatory domain was built by driver + * but a specific alpha2 cannot be determined */ + if (alpha2[0] == '9' && alpha2[1] == '9') + return true; + return false; +} -static const struct ieee80211_channel_range ieee80211_JP_channels[] = { - /* IEEE 802.11b/g, channels 1..14 */ - RANGE_PWR(2412, 2484, 20, 6, 0), - /* IEEE 802.11a, channels 34..48 */ - RANGE_PWR(5170, 5240, 20, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channels 52..64 */ - RANGE_PWR(5260, 5320, 20, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), -}; +static bool is_an_alpha2(char *alpha2) +{ + if (!alpha2) + return false; + if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) + return true; + return false; +} -static const struct ieee80211_channel_range ieee80211_EU_channels[] = { - /* IEEE 802.11b/g, channels 1..13 */ - RANGE_PWR(2412, 2472, 20, 6, 0), - /* IEEE 802.11a, channel 36*/ - RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channel 40*/ - RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channel 44*/ - RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channels 48..64 */ - RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), - /* IEEE 802.11a, channels 100..140 */ - RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), -}; +static bool alpha2_equal(char *alpha2_x, char *alpha2_y) +{ + if (!alpha2_x || !alpha2_y) + return false; + if (alpha2_x[0] == alpha2_y[0] && + alpha2_x[1] == alpha2_y[1]) + return true; + return false; +} + +static bool regdom_changed(char *alpha2) +{ + if (!cfg80211_regdomain) + return true; + if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return false; + return true; +} + +/* This lets us keep regulatory code which is updated on a regulatory + * basis in userspace. */ +static int call_crda(const char *alpha2) +{ + char country_env[9 + 2] = "COUNTRY="; + char *envp[] = { + country_env, + NULL + }; + + if (!is_world_regdom((char *) alpha2)) + printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", + alpha2[0], alpha2[1]); + else +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + return -EINVAL; +#else + printk(KERN_INFO "cfg80211: Calling CRDA to update world " + "regulatory domain\n"); +#endif + + country_env[8] = alpha2[0]; + country_env[9] = alpha2[1]; + + return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); +} + +/* This has the logic which determines when a new request + * should be ignored. */ +static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, + char *alpha2, struct ieee80211_regdomain *rd) +{ + struct regulatory_request *last_request = NULL; -#define REGDOM(_code) \ - { \ - .code = __stringify(_code), \ - .ranges = ieee80211_ ##_code## _channels, \ - .n_ranges = ARRAY_SIZE(ieee80211_ ##_code## _channels), \ + /* All initial requests are respected */ + if (list_empty(®ulatory_requests)) + return 0; + + last_request = list_first_entry(®ulatory_requests, + struct regulatory_request, list); + + switch (set_by) { + case REGDOM_SET_BY_INIT: + return -EINVAL; + case REGDOM_SET_BY_CORE: + /* Always respect new wireless core hints, should only + * come in for updating the world regulatory domain at init + * anyway */ + return 0; + case REGDOM_SET_BY_COUNTRY_IE: + if (last_request->initiator == set_by) { + if (last_request->wiphy != wiphy) { + /* Two cards with two APs claiming different + * different Country IE alpha2s! + * You're special!! */ + if (!alpha2_equal(last_request->alpha2, + cfg80211_regdomain->alpha2)) { + /* XXX: Deal with conflict, consider + * building a new one out of the + * intersection */ + WARN_ON(1); + return -EOPNOTSUPP; + } + return -EALREADY; + } + /* Two consecutive Country IE hints on the same wiphy */ + if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return 0; + return -EALREADY; + } + if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)), + "Invalid Country IE regulatory hint passed " + "to the wireless core\n") + return -EINVAL; + /* We ignore Country IE hints for now, as we haven't yet + * added the dot11MultiDomainCapabilityEnabled flag + * for wiphys */ + return 1; + case REGDOM_SET_BY_DRIVER: + BUG_ON(!wiphy); + if (last_request->initiator == set_by) { + /* Two separate drivers hinting different things, + * this is possible if you have two devices present + * on a system with different EEPROM regulatory + * readings. XXX: Do intersection, we support only + * the first regulatory hint for now */ + if (last_request->wiphy != wiphy) + return -EALREADY; + if (rd) + return -EALREADY; + /* Driver should not be trying to hint different + * regulatory domains! */ + BUG_ON(!alpha2_equal(alpha2, + cfg80211_regdomain->alpha2)); + return -EALREADY; + } + if (last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + /* XXX: Handle intersection, and add the + * dot11MultiDomainCapabilityEnabled flag to wiphy. For now + * we assume the driver has this set to false, following the + * 802.11d dot11MultiDomainCapabilityEnabled documentation */ + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return 0; + return 0; + case REGDOM_SET_BY_USER: + if (last_request->initiator == set_by || + last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + /* Drivers can use their wiphy's reg_notifier() + * to override any information */ + if (last_request->initiator == REGDOM_SET_BY_DRIVER) + return 0; + /* XXX: Handle intersection */ + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return -EOPNOTSUPP; + return 0; + default: + return -EINVAL; } +} -static const struct ieee80211_regdomain ieee80211_regdoms[] = { - REGDOM(US), - REGDOM(JP), - REGDOM(EU), -}; +static bool __reg_is_valid_request(char *alpha2, + struct regulatory_request **request) +{ + struct regulatory_request *req; + if (list_empty(®ulatory_requests)) + return false; + list_for_each_entry(req, ®ulatory_requests, list) { + if (alpha2_equal(req->alpha2, alpha2)) { + *request = req; + return true; + } + } + return false; +} +/* Used by nl80211 before kmalloc'ing our regulatory domain */ +bool reg_is_valid_request(char *alpha2) +{ + struct regulatory_request *request = NULL; + return __reg_is_valid_request(alpha2, &request); +} -static const struct ieee80211_regdomain *get_regdom(void) +/* Sanity check on a regulatory rule */ +static bool is_valid_reg_rule(struct ieee80211_reg_rule *rule) { - static const struct ieee80211_channel_range - ieee80211_world_channels[] = { - /* IEEE 802.11b/g, channels 1..11 */ - RANGE_PWR(2412, 2462, 27, 6, 0), - }; - static const struct ieee80211_regdomain regdom_world = REGDOM(world); - int i; + struct ieee80211_freq_range *freq_range = &rule->freq_range; + u32 freq_diff; + + if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0) + return false; + + if (freq_range->start_freq_khz > freq_range->end_freq_khz) + return false; + + freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; + + if (freq_range->max_bandwidth_khz > freq_diff) + return false; + + return true; +} + +static bool is_valid_rd(struct ieee80211_regdomain *rd) +{ + struct ieee80211_reg_rule *reg_rule = NULL; + unsigned int i; - for (i = 0; i < ARRAY_SIZE(ieee80211_regdoms); i++) - if (strcmp(ieee80211_regdom, ieee80211_regdoms[i].code) == 0) - return &ieee80211_regdoms[i]; + if (!rd->n_reg_rules) + return false; - return ®dom_world; + for (i = 0; i < rd->n_reg_rules; i++) { + reg_rule = &rd->reg_rules[i]; + if (!is_valid_reg_rule(reg_rule)) + return false; + } + + return true; } +/* Returns value in KHz */ +static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, + u32 freq) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(supported_bandwidths); i++) { + u32 start_freq_khz = freq - supported_bandwidths[i]/2; + u32 end_freq_khz = freq + supported_bandwidths[i]/2; + if (start_freq_khz >= freq_range->start_freq_khz && + end_freq_khz <= freq_range->end_freq_khz) + return supported_bandwidths[i]; + } + return 0; +} -static void handle_channel(struct ieee80211_channel *chan, - const struct ieee80211_regdomain *rd) +/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may + * want to just have the channel structure use these */ +static u32 map_regdom_flags(u32 rd_flags) +{ + u32 channel_flags = 0; + if (rd_flags & NL80211_RRF_PASSIVE_SCAN) + channel_flags |= IEEE80211_CHAN_PASSIVE_SCAN; + if (rd_flags & NL80211_RRF_NO_IBSS) + channel_flags |= IEEE80211_CHAN_NO_IBSS; + if (rd_flags & NL80211_RRF_DFS) + channel_flags |= IEEE80211_CHAN_RADAR; + return channel_flags; +} + +/** + * freq_reg_info - get regulatory information for the given frequency + * @center_freq: Frequency in KHz for which we want regulatory information for + * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one + * you can set this to 0. If this frequency is allowed we then set + * this value to the maximum allowed bandwidth. + * @reg_rule: the regulatory rule which we have for this frequency + * + * Use this function to get the regulatory rule for a specific frequency. + */ +static int freq_reg_info(u32 center_freq, u32 *bandwidth, + const struct ieee80211_reg_rule **reg_rule) { int i; - u32 flags = chan->orig_flags; - const struct ieee80211_channel_range *rg = NULL; + u32 max_bandwidth = 0; - for (i = 0; i < rd->n_ranges; i++) { - if (rd->ranges[i].start_freq <= chan->center_freq && - chan->center_freq <= rd->ranges[i].end_freq) { - rg = &rd->ranges[i]; + if (!cfg80211_regdomain) + return -EINVAL; + + for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + const struct ieee80211_reg_rule *rr; + const struct ieee80211_freq_range *fr = NULL; + const struct ieee80211_power_rule *pr = NULL; + + rr = &cfg80211_regdomain->reg_rules[i]; + fr = &rr->freq_range; + pr = &rr->power_rule; + max_bandwidth = freq_max_bandwidth(fr, center_freq); + if (max_bandwidth && *bandwidth <= max_bandwidth) { + *reg_rule = rr; + *bandwidth = max_bandwidth; break; } } - if (!rg) { - /* not found */ + return !max_bandwidth; +} + +static void handle_channel(struct ieee80211_channel *chan) +{ + int r; + u32 flags = chan->orig_flags; + u32 max_bandwidth = 0; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_power_rule *power_rule = NULL; + + r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), + &max_bandwidth, ®_rule); + + if (r) { flags |= IEEE80211_CHAN_DISABLED; chan->flags = flags; return; } - chan->flags = flags; + power_rule = ®_rule->power_rule; + + chan->flags = flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, - rg->max_antenna_gain); + (int) MBI_TO_DBI(power_rule->max_antenna_gain)); + chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); if (chan->orig_mpwr) - chan->max_power = min(chan->orig_mpwr, rg->max_power); + chan->max_power = min(chan->orig_mpwr, + (int) MBM_TO_DBM(power_rule->max_eirp)); else - chan->max_power = rg->max_power; + chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct ieee80211_supported_band *sband, - const struct ieee80211_regdomain *rd) +static void handle_band(struct ieee80211_supported_band *sband) { int i; for (i = 0; i < sband->n_channels; i++) - handle_channel(&sband->channels[i], rd); + handle_channel(&sband->channels[i]); } -void wiphy_update_regulatory(struct wiphy *wiphy) +static void update_all_wiphy_regulatory(enum reg_set_by setby) { - enum ieee80211_band band; - const struct ieee80211_regdomain *rd = get_regdom(); + struct cfg80211_registered_device *drv; - for (band = 0; band < IEEE80211_NUM_BANDS; band++) + list_for_each_entry(drv, &cfg80211_drv_list, list) + wiphy_update_regulatory(&drv->wiphy, setby); +} + +void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) +{ + enum ieee80211_band band; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy->bands[band], rd); + handle_band(wiphy->bands[band]); + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, setby); + } +} + +/* Caller must hold &cfg80211_drv_mutex */ +int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, struct ieee80211_regdomain *rd) +{ + struct regulatory_request *request; + char *rd_alpha2; + int r = 0; + + r = ignore_request(wiphy, set_by, (char *) alpha2, rd); + if (r) + return r; + + if (rd) + rd_alpha2 = rd->alpha2; + else + rd_alpha2 = (char *) alpha2; + + switch (set_by) { + case REGDOM_SET_BY_CORE: + case REGDOM_SET_BY_COUNTRY_IE: + case REGDOM_SET_BY_DRIVER: + case REGDOM_SET_BY_USER: + request = kzalloc(sizeof(struct regulatory_request), + GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->alpha2[0] = rd_alpha2[0]; + request->alpha2[1] = rd_alpha2[1]; + request->initiator = set_by; + request->wiphy = wiphy; + + list_add_tail(&request->list, ®ulatory_requests); + if (rd) + break; + r = call_crda(alpha2); +#ifndef CONFIG_WIRELESS_OLD_REGULATORY + if (r) + printk(KERN_ERR "cfg80211: Failed calling CRDA\n"); +#endif + break; + default: + r = -ENOTSUPP; + break; + } + + return r; +} + +/* If rd is not NULL and if this call fails the caller must free it */ +int regulatory_hint(struct wiphy *wiphy, const char *alpha2, + struct ieee80211_regdomain *rd) +{ + int r; + BUG_ON(!rd && !alpha2); + + mutex_lock(&cfg80211_drv_mutex); + + r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd); + if (r || !rd) + goto unlock_and_exit; + + /* If the driver passed a regulatory domain we skipped asking + * userspace for one so we can now go ahead and set it */ + r = set_regdom(rd); + +unlock_and_exit: + mutex_unlock(&cfg80211_drv_mutex); + return r; +} +EXPORT_SYMBOL(regulatory_hint); + + +static void print_rd_rules(struct ieee80211_regdomain *rd) +{ + unsigned int i; + struct ieee80211_reg_rule *reg_rule = NULL; + struct ieee80211_freq_range *freq_range = NULL; + struct ieee80211_power_rule *power_rule = NULL; + + printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), " + "(max_antenna_gain, max_eirp)\n"); + + for (i = 0; i < rd->n_reg_rules; i++) { + reg_rule = &rd->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + /* There may not be documentation for max antenna gain + * in certain regions */ + if (power_rule->max_antenna_gain) + printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + "(%d mBi, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, + power_rule->max_antenna_gain, + power_rule->max_eirp); + else + printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + "(N/A, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, + power_rule->max_eirp); + } +} + +static void print_regdomain(struct ieee80211_regdomain *rd) +{ + + if (is_world_regdom(rd->alpha2)) + printk(KERN_INFO "cfg80211: World regulatory " + "domain updated:\n"); + else { + if (is_unknown_alpha2(rd->alpha2)) + printk(KERN_INFO "cfg80211: Regulatory domain " + "changed to driver built-in settings " + "(unknown country)\n"); + else + printk(KERN_INFO "cfg80211: Regulatory domain " + "changed to country: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + } + print_rd_rules(rd); +} + +void print_regdomain_info(struct ieee80211_regdomain *rd) +{ + printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + print_rd_rules(rd); +} + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + +static bool is_old_static_regdom(struct ieee80211_regdomain *rd) +{ + if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) + return true; + return false; +} + +/* The old crap never deals with a world regulatory domain, it only + * deals with the static regulatory domain passed and if possible + * an updated "US" or "JP" regulatory domain. We do however store the + * old static regulatory domain in cfg80211_world_regdom for convenience + * of use here */ +static void reset_regdomains_static(void) +{ + if (!is_old_static_regdom(cfg80211_regdomain)) + kfree(cfg80211_regdomain); + /* This is setting the regdom to the old static regdom */ + cfg80211_regdomain = + (struct ieee80211_regdomain *) cfg80211_world_regdom; +} +#else +static void reset_regdomains(void) +{ + if (cfg80211_world_regdom && cfg80211_world_regdom != &world_regdom) { + if (cfg80211_world_regdom == cfg80211_regdomain) { + kfree(cfg80211_regdomain); + } else { + kfree(cfg80211_world_regdom); + kfree(cfg80211_regdomain); + } + } else if (cfg80211_regdomain && cfg80211_regdomain != &world_regdom) + kfree(cfg80211_regdomain); + + cfg80211_world_regdom = (struct ieee80211_regdomain *) &world_regdom; + cfg80211_regdomain = NULL; +} + +/* Dynamic world regulatory domain requested by the wireless + * core upon initialization */ +static void update_world_regdomain(struct ieee80211_regdomain *rd) +{ + BUG_ON(list_empty(®ulatory_requests)); + + reset_regdomains(); + + cfg80211_world_regdom = rd; + cfg80211_regdomain = rd; +} +#endif + +static int __set_regdom(struct ieee80211_regdomain *rd) +{ + struct regulatory_request *request = NULL; + + /* Some basic sanity checks first */ + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + /* We ignore the world regdom with the old static regdomains setup + * as there is no point to it with satic regulatory definitions :( + * Don't worry this shit will be removed soon... */ + if (is_world_regdom(rd->alpha2)) + return -EINVAL; +#else + if (is_world_regdom(rd->alpha2)) { + if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + return -EINVAL; + update_world_regdomain(rd); + return 0; + } +#endif + + if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && + !is_unknown_alpha2(rd->alpha2)) + return -EINVAL; + + if (list_empty(®ulatory_requests)) + return -EINVAL; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + /* Static "US" and "JP" will be overridden, but just once */ + if (!is_old_static_regdom(cfg80211_regdomain) && + !regdom_changed(rd->alpha2)) + return -EINVAL; +#else + if (!regdom_changed(rd->alpha2)) + return -EINVAL; +#endif + + /* Now lets set the regulatory domain, update all driver channels + * and finally inform them of what we have done, in case they want + * to review or adjust their own settings based on their own + * internal EEPROM data */ + + if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + return -EINVAL; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + reset_regdomains_static(); +#else + reset_regdomains(); +#endif + + /* Country IE parsing coming soon */ + switch (request->initiator) { + case REGDOM_SET_BY_CORE: + case REGDOM_SET_BY_DRIVER: + case REGDOM_SET_BY_USER: + if (!is_valid_rd(rd)) { + printk(KERN_ERR "cfg80211: Invalid " + "regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; + } + break; + case REGDOM_SET_BY_COUNTRY_IE: /* Not yet */ + WARN_ON(1); + default: + return -EOPNOTSUPP; + } + + /* Tada! */ + cfg80211_regdomain = rd; + request->granted = 1; + + return 0; +} + + +/* Use this call to set the current regulatory domain. Conflicts with + * multiple drivers can be ironed out later. Caller must've already + * kmalloc'd the rd structure. If this calls fails you should kfree() + * the passed rd. Caller must hold cfg80211_drv_mutex */ +int set_regdom(struct ieee80211_regdomain *rd) +{ + struct regulatory_request *this_request = NULL, *prev_request = NULL; + int r; + + if (!list_empty(®ulatory_requests)) + prev_request = list_first_entry(®ulatory_requests, + struct regulatory_request, list); + + /* Note that this doesn't update the wiphys, this is done below */ + r = __set_regdom(rd); + if (r) + return r; + + BUG_ON((!__reg_is_valid_request(rd->alpha2, &this_request))); + + /* The initial standard core update of the world regulatory domain, no + * need to keep that request info around if it didn't fail. */ + if (is_world_regdom(rd->alpha2) && + this_request->initiator == REGDOM_SET_BY_CORE && + this_request->granted) { + list_del(&this_request->list); + kfree(this_request); + this_request = NULL; + } + + /* Remove old requests, we only leave behind the last one */ + if (prev_request) { + list_del(&prev_request->list); + kfree(prev_request); + prev_request = NULL; + } + + /* This would make this whole thing pointless */ + BUG_ON(rd != cfg80211_regdomain); + + /* update all wiphys now with the new established regulatory domain */ + update_all_wiphy_regulatory(this_request->initiator); + + print_regdomain(rd); + + return r; +} + +int regulatory_init(void) +{ + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); + if (IS_ERR(reg_pdev)) + return PTR_ERR(reg_pdev); + return 0; +} + +void regulatory_exit(void) +{ + struct regulatory_request *req, *req_tmp; + mutex_lock(&cfg80211_drv_mutex); +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + reset_regdomains_static(); +#else + reset_regdomains(); +#endif + list_for_each_entry_safe(req, req_tmp, ®ulatory_requests, list) { + list_del(&req->list); + kfree(req); + } + platform_device_unregister(reg_pdev); + mutex_unlock(&cfg80211_drv_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h new file mode 100644 index 00000000000..d75fd023297 --- /dev/null +++ b/net/wireless/reg.h @@ -0,0 +1,44 @@ +#ifndef __NET_WIRELESS_REG_H +#define __NET_WIRELESS_REG_H + +extern const struct ieee80211_regdomain world_regdom; +#ifdef CONFIG_WIRELESS_OLD_REGULATORY +extern const struct ieee80211_regdomain us_regdom; +extern const struct ieee80211_regdomain jp_regdom; +extern const struct ieee80211_regdomain eu_regdom; +#endif + +extern struct ieee80211_regdomain *cfg80211_regdomain; +extern struct ieee80211_regdomain *cfg80211_world_regdom; +extern struct list_head regulatory_requests; + +struct regdom_last_setby { + struct wiphy *wiphy; + u8 initiator; +}; + +/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ +struct regulatory_request { + struct list_head list; + struct wiphy *wiphy; + int granted; + enum reg_set_by initiator; + char alpha2[2]; +}; + +bool is_world_regdom(char *alpha2); +bool reg_is_valid_request(char *alpha2); + +int set_regdom(struct ieee80211_regdomain *rd); +int __regulatory_hint_alpha2(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2); + +int regulatory_init(void); +void regulatory_exit(void); + +void print_regdomain_info(struct ieee80211_regdomain *); + +/* If a char is A-Z */ +#define IS_ALPHA(letter) (letter >= 65 && letter <= 90) + +#endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.3-70-g09d2 From 5bc75728fd43bb15b46f16ef465bcf9d487393cf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:01:51 +0200 Subject: mac80211: fix scan vs. interface removal race When we remove an interface, we can currently end up having a pointer to it left in local->scan_sdata after it has been set down, and then with a hardware scan the scan completion can try to access it which is a bug. Alternatively, a scan that started as a hardware scan may terminate as though it was a software scan, if the timing is just right. On SMP systems, software scan also has a similar problem, just canceling the delayed work and setting a flag isn't enough since it may be running concurrently; in this case we would also never restore state of other interfaces. This patch hopefully fixes the problems by always invoking ieee80211_scan_completed or requiring it to be invoked by the driver, I suspect the drivers that have ->hw_scan() are buggy. The bug will not manifest itself unless you remove the interface while hw-scanning which will also turn off the hw, and then add a new interface which will be unusable until you scan once. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 +++- net/mac80211/main.c | 33 +++++++++++++++++++++++++-------- net/mac80211/mlme.c | 2 +- net/mac80211/scan.c | 38 +++++++++++++++++++++++++++----------- 4 files changed, 56 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f504e3eca7d..d67882dd360 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1124,7 +1124,9 @@ enum ieee80211_ampdu_mlme_action { * @hw_scan: Ask the hardware to service the scan request, no need to start * the scan state machine in stack. The scan must honour the channel * configuration done by the regulatory agent in the wiphy's registered - * bands. + * bands. When the scan finishes, ieee80211_scan_completed() must be + * called; note that it also must be called when the scan cannot finish + * because the hardware is turned off! Anything else is a bug! * * @get_stats: return low-level statistics * diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ebdec7106d6..4bfac4b41c5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -564,14 +564,6 @@ static int ieee80211_stop(struct net_device *dev) synchronize_rcu(); skb_queue_purge(&sdata->u.sta.skb_queue); - if (local->scan_sdata == sdata) { - if (!local->ops->hw_scan) { - local->sta_sw_scanning = 0; - cancel_delayed_work(&local->scan_work); - } else - local->sta_hw_scanning = 0; - } - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; kfree(sdata->u.sta.extra_ie); sdata->u.sta.extra_ie = NULL; @@ -585,6 +577,31 @@ static int ieee80211_stop(struct net_device *dev) } /* fall through */ default: + if (local->scan_sdata == sdata) { + if (!local->ops->hw_scan) + cancel_delayed_work_sync(&local->scan_work); + /* + * The software scan can no longer run now, so we can + * clear out the scan_sdata reference. However, the + * hardware scan may still be running. The complete + * function must be prepared to handle a NULL value. + */ + local->scan_sdata = NULL; + /* + * The memory barrier guarantees that another CPU + * that is hardware-scanning will now see the fact + * that this interface is gone. + */ + smp_mb(); + /* + * If software scanning, complete the scan but since + * the scan_sdata is NULL already don't send out a + * scan event to userspace -- the scan is incomplete. + */ + if (local->sta_sw_scanning) + ieee80211_scan_completed(&local->hw); + } + conf.vif = &sdata->vif; conf.type = sdata->vif.type; conf.mac_addr = dev->dev_addr; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9e20a0c20a4..19c7f21e49d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2530,7 +2530,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata = local->scan_sdata; struct ieee80211_if_sta *ifsta; - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata && sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { ifsta = &sdata->u.sta; if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) && diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f4399e9ac92..27727027d76 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -430,9 +430,20 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) struct ieee80211_sub_if_data *sdata; union iwreq_data wrqu; + if (WARN_ON(!local->sta_hw_scanning && !local->sta_sw_scanning)) + return; + local->last_scan_completed = jiffies; memset(&wrqu, 0, sizeof(wrqu)); - wireless_send_event(local->scan_sdata->dev, SIOCGIWSCAN, &wrqu, NULL); + + /* + * local->scan_sdata could have been NULLed by the interface + * down code in case we were scanning on an interface that is + * being taken down. + */ + sdata = local->scan_sdata; + if (sdata) + wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL); if (local->sta_hw_scanning) { local->sta_hw_scanning = 0; @@ -491,7 +502,10 @@ void ieee80211_sta_scan_work(struct work_struct *work) int skip; unsigned long next_delay = 0; - if (!local->sta_sw_scanning) + /* + * Avoid re-scheduling when the sdata is going away. + */ + if (!netif_running(sdata->dev)) return; switch (local->scan_state) { @@ -570,9 +584,8 @@ void ieee80211_sta_scan_work(struct work_struct *work) break; } - if (local->sta_sw_scanning) - queue_delayed_work(local->hw.workqueue, &local->scan_work, - next_delay); + queue_delayed_work(local->hw.workqueue, &local->scan_work, + next_delay); } @@ -609,13 +622,16 @@ int ieee80211_sta_start_scan(struct ieee80211_sub_if_data *scan_sdata, } if (local->ops->hw_scan) { - int rc = local->ops->hw_scan(local_to_hw(local), - ssid, ssid_len); - if (!rc) { - local->sta_hw_scanning = 1; - local->scan_sdata = scan_sdata; + int rc; + + local->sta_hw_scanning = 1; + rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len); + if (rc) { + local->sta_hw_scanning = 0; + return rc; } - return rc; + local->scan_sdata = scan_sdata; + return 0; } local->sta_sw_scanning = 1; -- cgit v1.2.3-70-g09d2 From 96dd22ac06b0dbfb069fdf530c72046a941e9694 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:01:57 +0200 Subject: mac80211: inform driver of basic rateset Drivers need to know the basic rateset to be able to configure the ACK/CTS programming in hardware correctly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/ieee80211_i.h | 7 ++----- net/mac80211/iface.c | 4 +++- net/mac80211/mlme.c | 40 +++++++++------------------------------- net/mac80211/tx.c | 4 ++-- net/mac80211/util.c | 28 ++++++++++++++++++++++++++++ 6 files changed, 50 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d67882dd360..c81e579c17f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -160,6 +160,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ERP_PREAMBLE: preamble changed * @BSS_CHANGED_ERP_SLOT: slot timing changed * @BSS_CHANGED_HT: 802.11n parameters changed + * @BSS_CHANGED_BASIC_RATES: Basic rateset changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -167,6 +168,7 @@ enum ieee80211_bss_change { BSS_CHANGED_ERP_PREAMBLE = 1<<2, BSS_CHANGED_ERP_SLOT = 1<<3, BSS_CHANGED_HT = 1<<4, + BSS_CHANGED_BASIC_RATES = 1<<5, }; /** @@ -187,6 +189,9 @@ enum ieee80211_bss_change { * @assoc_ht: association in HT mode * @ht_conf: ht capabilities * @ht_bss_conf: ht extended capabilities + * @basic_rates: bitmap of basic rates, each bit stands for an + * index into the rate table configured by the driver in + * the current band. */ struct ieee80211_bss_conf { /* association related data */ @@ -200,6 +205,7 @@ struct ieee80211_bss_conf { u16 beacon_int; u16 assoc_capability; u64 timestamp; + u64 basic_rates; /* ht related data */ bool assoc_ht; struct ieee80211_ht_info *ht_conf; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1f9336a30e3..21cc6d07b02 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -433,11 +433,6 @@ struct ieee80211_sub_if_data { int drop_unencrypted; - /* - * basic rates of this AP or the AP we're associated to - */ - u64 basic_rates; - /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; @@ -1017,6 +1012,8 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, void ieee802_11_parse_elems(u8 *start, size_t len, struct ieee802_11_elems *elems); int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); +u64 ieee80211_mandatory_rates(struct ieee80211_local *local, + enum ieee80211_band band); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 61b19340488..dab8eba2602 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -144,7 +144,9 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ieee80211_setup_sdata(sdata, type); /* reset some values that shouldn't be kept across type changes */ - sdata->basic_rates = 0; + sdata->bss_conf.basic_rates = + ieee80211_mandatory_rates(sdata->local, + sdata->local->hw.conf.channel->band); sdata->drop_unencrypted = 0; return 0; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5b748447eb7..55bc6071393 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -710,6 +710,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); sdata->bss_conf.assoc = 1; + /* + * For now just always ask the driver to update the basic rateset + * when we have associated, we aren't checking whether it actually + * changed or not. + */ + changed |= BSS_CHANGED_BASIC_RATES; ieee80211_bss_info_change_notify(sdata, changed); netif_tx_start_all_queues(sdata->dev); @@ -1296,7 +1302,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } sta->supp_rates[local->hw.conf.channel->band] = rates; - sdata->basic_rates = basic_rates; + sdata->bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && @@ -1453,34 +1459,6 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, return res; } -static u64 ieee80211_sta_get_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - u64 mandatory_rates; - enum ieee80211_rate_flags mandatory_flag; - int i; - - sband = local->hw.wiphy->bands[band]; - if (!sband) { - WARN_ON(1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - } - - if (band == IEEE80211_BAND_2GHZ) - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_A; - - bitrates = sband->bitrates; - mandatory_rates = 0; - for (i = 0; i < sband->n_bitrates; i++) - if (bitrates[i].flags & mandatory_flag) - mandatory_rates |= BIT(i); - return mandatory_rates; -} - static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, @@ -1522,7 +1500,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, prev_rates = sta->supp_rates[band]; /* make sure mandatory rates are always added */ sta->supp_rates[band] = supp_rates | - ieee80211_sta_get_mandatory_rates(local, band); + ieee80211_mandatory_rates(local, band); #ifdef CONFIG_MAC80211_IBSS_DEBUG if (sta->supp_rates[band] != prev_rates) @@ -2361,7 +2339,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, /* make sure mandatory rates are always added */ sta->supp_rates[band] = supp_rates | - ieee80211_sta_get_mandatory_rates(local, band); + ieee80211_mandatory_rates(local, band); rate_control_rate_init(sta, local); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1bed3be01c2..a523189f10c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -153,7 +153,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, if (r->bitrate > txrate->bitrate) break; - if (tx->sdata->basic_rates & BIT(i)) + if (tx->sdata->bss_conf.basic_rates & BIT(i)) rate = r->bitrate; switch (sband->band) { @@ -594,7 +594,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) for (idx = 0; idx < sband->n_bitrates; idx++) { if (sband->bitrates[idx].bitrate > rate->bitrate) continue; - if (tx->sdata->basic_rates & BIT(idx) && + if (tx->sdata->bss_conf.basic_rates & BIT(idx) && (baserate < 0 || (sband->bitrates[baserate].bitrate < sband->bitrates[idx].bitrate))) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a7968df9dac..d6aca91e612 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -640,3 +640,31 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) return ret; } + +u64 ieee80211_mandatory_rates(struct ieee80211_local *local, + enum ieee80211_band band) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_rate *bitrates; + u64 mandatory_rates; + enum ieee80211_rate_flags mandatory_flag; + int i; + + sband = local->hw.wiphy->bands[band]; + if (!sband) { + WARN_ON(1); + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + } + + if (band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + mandatory_rates = 0; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} -- cgit v1.2.3-70-g09d2 From 05c914fe330fa8e1cc67870dc0d3809dfd96c107 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:01:58 +0200 Subject: mac80211: use nl80211 interface types There's really no reason for mac80211 to be using its own interface type defines. Use the nl80211 types and simplify the configuration code a bit: there's no need to translate them any more now. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/adm8211.c | 22 +++---- drivers/net/wireless/ath5k/ath5k.h | 4 +- drivers/net/wireless/ath5k/attach.c | 2 +- drivers/net/wireless/ath5k/base.c | 50 ++++++++-------- drivers/net/wireless/ath5k/base.h | 2 +- drivers/net/wireless/ath5k/pcu.c | 12 ++-- drivers/net/wireless/ath5k/reset.c | 2 +- drivers/net/wireless/ath9k/main.c | 28 ++++----- drivers/net/wireless/b43/main.c | 38 ++++++------ drivers/net/wireless/b43/phy_common.c | 4 +- drivers/net/wireless/b43legacy/main.c | 30 +++++----- drivers/net/wireless/b43legacy/phy.c | 4 +- drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 2 +- drivers/net/wireless/iwlwifi/iwl-3945.c | 6 +- drivers/net/wireless/iwlwifi/iwl-3945.h | 2 +- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 8 +-- drivers/net/wireless/iwlwifi/iwl-agn.c | 64 ++++++++++----------- drivers/net/wireless/iwlwifi/iwl-core.c | 8 +-- drivers/net/wireless/iwlwifi/iwl-dev.h | 2 +- drivers/net/wireless/iwlwifi/iwl-power.c | 2 +- drivers/net/wireless/iwlwifi/iwl-rx.c | 8 +-- drivers/net/wireless/iwlwifi/iwl-scan.c | 4 +- drivers/net/wireless/iwlwifi/iwl-sta.c | 20 +++---- drivers/net/wireless/iwlwifi/iwl-tx.c | 4 +- drivers/net/wireless/iwlwifi/iwl3945-base.c | 84 +++++++++++++-------------- drivers/net/wireless/libertas_tf/main.c | 18 +++--- drivers/net/wireless/mac80211_hwsim.c | 2 +- drivers/net/wireless/p54/p54common.c | 14 ++--- drivers/net/wireless/p54/p54pci.c | 4 +- drivers/net/wireless/rt2x00/rt2500usb.c | 2 +- drivers/net/wireless/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/rt2x00/rt2x00config.c | 8 +-- drivers/net/wireless/rt2x00/rt2x00dev.c | 8 +-- drivers/net/wireless/rt2x00/rt2x00lib.h | 2 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 18 +++--- drivers/net/wireless/rtl8180_dev.c | 10 ++-- drivers/net/wireless/rtl8187_dev.c | 8 +-- drivers/net/wireless/zd1211rw/zd_mac.c | 20 +++---- include/net/mac80211.h | 35 ++---------- net/mac80211/cfg.c | 57 ++++++++---------- net/mac80211/debugfs_netdev.c | 28 ++++----- net/mac80211/ht.c | 6 +- net/mac80211/ieee80211_i.h | 6 +- net/mac80211/iface.c | 40 +++++++------ net/mac80211/key.c | 6 +- net/mac80211/main.c | 89 ++++++++++++++--------------- net/mac80211/mlme.c | 40 ++++++------- net/mac80211/rx.c | 65 ++++++++++----------- net/mac80211/scan.c | 8 +-- net/mac80211/sta_info.c | 4 +- net/mac80211/tx.c | 28 ++++----- net/mac80211/util.c | 44 +++++++------- net/mac80211/wext.c | 84 +++++++++++++-------------- 53 files changed, 517 insertions(+), 551 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index c6a55cd12db..b2c050b6889 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -765,11 +765,11 @@ static void adm8211_update_mode(struct ieee80211_hw *dev) priv->soft_rx_crc = 0; switch (priv->mode) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->nar &= ~(ADM8211_NAR_PR | ADM8211_NAR_EA); priv->nar |= ADM8211_NAR_ST | ADM8211_NAR_SR; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: priv->nar &= ~ADM8211_NAR_PR; priv->nar |= ADM8211_NAR_EA | ADM8211_NAR_ST | ADM8211_NAR_SR; @@ -777,7 +777,7 @@ static void adm8211_update_mode(struct ieee80211_hw *dev) if (priv->pdev->revision >= ADM8211_REV_BA) priv->soft_rx_crc = 1; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: priv->nar &= ~(ADM8211_NAR_EA | ADM8211_NAR_ST); priv->nar |= ADM8211_NAR_PR | ADM8211_NAR_SR; break; @@ -1410,11 +1410,11 @@ static int adm8211_add_interface(struct ieee80211_hw *dev, struct ieee80211_if_init_conf *conf) { struct adm8211_priv *priv = dev->priv; - if (priv->mode != IEEE80211_IF_TYPE_MNTR) + if (priv->mode != NL80211_IFTYPE_MONITOR) return -EOPNOTSUPP; switch (conf->type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->mode = conf->type; break; default: @@ -1437,7 +1437,7 @@ static void adm8211_remove_interface(struct ieee80211_hw *dev, struct ieee80211_if_init_conf *conf) { struct adm8211_priv *priv = dev->priv; - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; } static int adm8211_init_rings(struct ieee80211_hw *dev) @@ -1556,7 +1556,7 @@ static int adm8211_start(struct ieee80211_hw *dev) ADM8211_CSR_WRITE(IER, ADM8211_IER_NIE | ADM8211_IER_AIE | ADM8211_IER_RCIE | ADM8211_IER_TCIE | ADM8211_IER_TDUIE | ADM8211_IER_GPTIE); - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; adm8211_update_mode(dev); ADM8211_CSR_WRITE(RDR, 0); @@ -1571,7 +1571,7 @@ static void adm8211_stop(struct ieee80211_hw *dev) { struct adm8211_priv *priv = dev->priv; - priv->mode = IEEE80211_IF_TYPE_INVALID; + priv->mode = NL80211_IFTYPE_UNSPECIFIED; priv->nar = 0; ADM8211_CSR_WRITE(NAR, 0); ADM8211_CSR_WRITE(IER, 0); @@ -1896,7 +1896,7 @@ static int __devinit adm8211_probe(struct pci_dev *pdev, priv->tx_power = 0x40; priv->lpf_cutoff = 0xFF; priv->lnags_threshold = 0xFF; - priv->mode = IEEE80211_IF_TYPE_INVALID; + priv->mode = NL80211_IFTYPE_UNSPECIFIED; /* Power-on issue. EEPROM won't read correctly without */ if (pdev->revision >= ADM8211_REV_BA) { @@ -1986,7 +1986,7 @@ static int adm8211_suspend(struct pci_dev *pdev, pm_message_t state) struct ieee80211_hw *dev = pci_get_drvdata(pdev); struct adm8211_priv *priv = dev->priv; - if (priv->mode != IEEE80211_IF_TYPE_INVALID) { + if (priv->mode != NL80211_IFTYPE_UNSPECIFIED) { ieee80211_stop_queues(dev); adm8211_stop(dev); } @@ -2004,7 +2004,7 @@ static int adm8211_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (priv->mode != IEEE80211_IF_TYPE_INVALID) { + if (priv->mode != NL80211_IFTYPE_UNSPECIFIED) { adm8211_start(dev); ieee80211_wake_queues(dev); } diff --git a/drivers/net/wireless/ath5k/ath5k.h b/drivers/net/wireless/ath5k/ath5k.h index 4c0211798a7..20018869051 100644 --- a/drivers/net/wireless/ath5k/ath5k.h +++ b/drivers/net/wireless/ath5k/ath5k.h @@ -1008,7 +1008,7 @@ struct ath5k_hw { enum ath5k_int ah_imr; - enum ieee80211_if_types ah_op_mode; + enum nl80211_iftype ah_op_mode; enum ath5k_power_mode ah_power_mode; struct ieee80211_channel ah_current_channel; bool ah_turbo; @@ -1117,7 +1117,7 @@ extern void ath5k_hw_detach(struct ath5k_hw *ah); /* Reset Functions */ extern int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial); -extern int ath5k_hw_reset(struct ath5k_hw *ah, enum ieee80211_if_types op_mode, struct ieee80211_channel *channel, bool change_channel); +extern int ath5k_hw_reset(struct ath5k_hw *ah, enum nl80211_iftype op_mode, struct ieee80211_channel *channel, bool change_channel); /* Power management functions */ extern int ath5k_hw_set_power(struct ath5k_hw *ah, enum ath5k_power_mode mode, bool set_chip, u16 sleep_duration); diff --git a/drivers/net/wireless/ath5k/attach.c b/drivers/net/wireless/ath5k/attach.c index 0eb2511fe14..153c4111fab 100644 --- a/drivers/net/wireless/ath5k/attach.c +++ b/drivers/net/wireless/ath5k/attach.c @@ -124,7 +124,7 @@ struct ath5k_hw *ath5k_hw_attach(struct ath5k_softc *sc, u8 mac_version) /* * HW information */ - ah->ah_op_mode = IEEE80211_IF_TYPE_STA; + ah->ah_op_mode = NL80211_IFTYPE_STATION; ah->ah_radar.r_enabled = AR5K_TUNE_RADAR_ALERT; ah->ah_turbo = false; ah->ah_txpower.txp_tpc = AR5K_TUNE_TPC_TXPOWER; diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 85260c39aa2..e09ed2ce675 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -507,7 +507,7 @@ ath5k_pci_probe(struct pci_dev *pdev, sc->iobase = mem; /* So we can unmap it on detach */ sc->cachelsz = csz * sizeof(u32); /* convert to bytes */ - sc->opmode = IEEE80211_IF_TYPE_STA; + sc->opmode = NL80211_IFTYPE_STATION; mutex_init(&sc->lock); spin_lock_init(&sc->rxbuflock); spin_lock_init(&sc->txbuflock); @@ -1377,8 +1377,8 @@ ath5k_beaconq_config(struct ath5k_softc *sc) ret = ath5k_hw_get_tx_queueprops(ah, sc->bhalq, &qi); if (ret) return ret; - if (sc->opmode == IEEE80211_IF_TYPE_AP || - sc->opmode == IEEE80211_IF_TYPE_MESH_POINT) { + if (sc->opmode == NL80211_IFTYPE_AP || + sc->opmode == NL80211_IFTYPE_MESH_POINT) { /* * Always burst out beacon and CAB traffic * (aifs = cwmin = cwmax = 0) @@ -1386,7 +1386,7 @@ ath5k_beaconq_config(struct ath5k_softc *sc) qi.tqi_aifs = 0; qi.tqi_cw_min = 0; qi.tqi_cw_max = 0; - } else if (sc->opmode == IEEE80211_IF_TYPE_IBSS) { + } else if (sc->opmode == NL80211_IFTYPE_ADHOC) { /* * Adhoc mode; backoff between 0 and (2 * cw_min). */ @@ -1714,7 +1714,7 @@ ath5k_tasklet_rx(unsigned long data) /* let crypto-error packets fall through in MNTR */ if ((rs.rs_status & ~(AR5K_RXERR_DECRYPT|AR5K_RXERR_MIC)) || - sc->opmode != IEEE80211_IF_TYPE_MNTR) + sc->opmode != NL80211_IFTYPE_MONITOR) goto next; } accept: @@ -1777,7 +1777,7 @@ accept: ath5k_debug_dump_skb(sc, skb, "RX ", 0); /* check beacons in IBSS mode */ - if (sc->opmode == IEEE80211_IF_TYPE_IBSS) + if (sc->opmode == NL80211_IFTYPE_ADHOC) ath5k_check_ibss_tsf(sc, skb, &rxs); __ieee80211_rx(sc->hw, skb, &rxs); @@ -1892,7 +1892,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) ds = bf->desc; flags = AR5K_TXDESC_NOACK; - if (sc->opmode == IEEE80211_IF_TYPE_IBSS && ath5k_hw_hasveol(ah)) { + if (sc->opmode == NL80211_IFTYPE_ADHOC && ath5k_hw_hasveol(ah)) { ds->ds_link = bf->daddr; /* self-linked */ flags |= AR5K_TXDESC_VEOL; /* @@ -1941,8 +1941,8 @@ ath5k_beacon_send(struct ath5k_softc *sc) ATH5K_DBG_UNLIMIT(sc, ATH5K_DEBUG_BEACON, "in beacon_send\n"); - if (unlikely(bf->skb == NULL || sc->opmode == IEEE80211_IF_TYPE_STA || - sc->opmode == IEEE80211_IF_TYPE_MNTR)) { + if (unlikely(bf->skb == NULL || sc->opmode == NL80211_IFTYPE_STATION || + sc->opmode == NL80211_IFTYPE_MONITOR)) { ATH5K_WARN(sc, "bf=%p bf_skb=%p\n", bf, bf ? bf->skb : NULL); return; } @@ -2116,9 +2116,9 @@ ath5k_beacon_config(struct ath5k_softc *sc) sc->bmisscount = 0; sc->imask &= ~(AR5K_INT_BMISS | AR5K_INT_SWBA); - if (sc->opmode == IEEE80211_IF_TYPE_STA) { + if (sc->opmode == NL80211_IFTYPE_STATION) { sc->imask |= AR5K_INT_BMISS; - } else if (sc->opmode == IEEE80211_IF_TYPE_IBSS) { + } else if (sc->opmode == NL80211_IFTYPE_ADHOC) { /* * In IBSS mode we use a self-linked tx descriptor and let the * hardware send the beacons automatically. We have to load it @@ -2323,7 +2323,7 @@ ath5k_intr(int irq, void *dev_id) * transmission time) in order to detect wether * automatic TSF updates happened. */ - if (sc->opmode == IEEE80211_IF_TYPE_IBSS) { + if (sc->opmode == NL80211_IFTYPE_ADHOC) { /* XXX: only if VEOL suppported */ u64 tsf = ath5k_hw_get_tsf64(ah); sc->nexttbtt += sc->bintval; @@ -2553,7 +2553,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb) ath5k_debug_dump_skb(sc, skb, "TX ", 1); - if (sc->opmode == IEEE80211_IF_TYPE_MNTR) + if (sc->opmode == NL80211_IFTYPE_MONITOR) ATH5K_DBG(sc, ATH5K_DEBUG_XMIT, "tx in monitor (scan?)\n"); /* @@ -2688,9 +2688,9 @@ static int ath5k_add_interface(struct ieee80211_hw *hw, sc->vif = conf->vif; switch (conf->type) { - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MONITOR: sc->opmode = conf->type; break; default: @@ -2761,7 +2761,7 @@ ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } if (conf->changed & IEEE80211_IFCC_BEACON && - vif->type == IEEE80211_IF_TYPE_IBSS) { + vif->type == NL80211_IFTYPE_ADHOC) { struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); if (!beacon) { ret = -ENOMEM; @@ -2880,17 +2880,17 @@ static void ath5k_configure_filter(struct ieee80211_hw *hw, /* XXX move these to mac80211, and add a beacon IFF flag to mac80211 */ - if (sc->opmode == IEEE80211_IF_TYPE_MNTR) + if (sc->opmode == NL80211_IFTYPE_MONITOR) rfilt |= AR5K_RX_FILTER_CONTROL | AR5K_RX_FILTER_BEACON | AR5K_RX_FILTER_PROBEREQ | AR5K_RX_FILTER_PROM; - if (sc->opmode != IEEE80211_IF_TYPE_STA) + if (sc->opmode != NL80211_IFTYPE_STATION) rfilt |= AR5K_RX_FILTER_PROBEREQ; - if (sc->opmode != IEEE80211_IF_TYPE_AP && - sc->opmode != IEEE80211_IF_TYPE_MESH_POINT && + if (sc->opmode != NL80211_IFTYPE_AP && + sc->opmode != NL80211_IFTYPE_MESH_POINT && test_bit(ATH_STAT_PROMISC, sc->status)) rfilt |= AR5K_RX_FILTER_PROM; - if (sc->opmode == IEEE80211_IF_TYPE_STA || - sc->opmode == IEEE80211_IF_TYPE_IBSS) { + if (sc->opmode == NL80211_IFTYPE_STATION || + sc->opmode == NL80211_IFTYPE_ADHOC) { rfilt |= AR5K_RX_FILTER_BEACON; } @@ -2995,7 +2995,7 @@ ath5k_reset_tsf(struct ieee80211_hw *hw) * in IBSS mode we need to update the beacon timers too. * this will also reset the TSF if we call it with 0 */ - if (sc->opmode == IEEE80211_IF_TYPE_IBSS) + if (sc->opmode == NL80211_IFTYPE_ADHOC) ath5k_beacon_update_timers(sc, 0); else ath5k_hw_reset_tsf(sc->ah); @@ -3010,7 +3010,7 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct sk_buff *skb) ath5k_debug_dump_skb(sc, skb, "BC ", 1); - if (sc->opmode != IEEE80211_IF_TYPE_IBSS) { + if (sc->opmode != NL80211_IFTYPE_ADHOC) { ret = -EIO; goto end; } diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h index 1549b63d613..9d0b728928e 100644 --- a/drivers/net/wireless/ath5k/base.h +++ b/drivers/net/wireless/ath5k/base.h @@ -113,7 +113,7 @@ struct ath5k_softc { struct ieee80211_channel channels[ATH_CHAN_MAX]; struct ieee80211_rate rates[IEEE80211_NUM_BANDS][AR5K_MAX_RATES]; u8 rate_idx[IEEE80211_NUM_BANDS][AR5K_MAX_RATES]; - enum ieee80211_if_types opmode; + enum nl80211_iftype opmode; struct ath5k_hw *ah; /* Atheros HW */ struct ieee80211_supported_band *curband; diff --git a/drivers/net/wireless/ath5k/pcu.c b/drivers/net/wireless/ath5k/pcu.c index 5a896d1e2a2..c77cee2a558 100644 --- a/drivers/net/wireless/ath5k/pcu.c +++ b/drivers/net/wireless/ath5k/pcu.c @@ -52,26 +52,26 @@ int ath5k_hw_set_opmode(struct ath5k_hw *ah) ATH5K_TRACE(ah->ah_sc); switch (ah->ah_op_mode) { - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: pcu_reg |= AR5K_STA_ID1_ADHOC | AR5K_STA_ID1_DESC_ANTENNA | (ah->ah_version == AR5K_AR5210 ? AR5K_STA_ID1_NO_PSPOLL : 0); beacon_reg |= AR5K_BCR_ADHOC; break; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: pcu_reg |= AR5K_STA_ID1_AP | AR5K_STA_ID1_RTS_DEF_ANTENNA | (ah->ah_version == AR5K_AR5210 ? AR5K_STA_ID1_NO_PSPOLL : 0); beacon_reg |= AR5K_BCR_AP; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: pcu_reg |= AR5K_STA_ID1_DEFAULT_ANTENNA | (ah->ah_version == AR5K_AR5210 ? AR5K_STA_ID1_PWR_SV : 0); - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: pcu_reg |= AR5K_STA_ID1_DEFAULT_ANTENNA | (ah->ah_version == AR5K_AR5210 ? AR5K_STA_ID1_NO_PSPOLL : 0); @@ -649,7 +649,7 @@ void ath5k_hw_init_beacon(struct ath5k_hw *ah, u32 next_beacon, u32 interval) * Set the additional timers by mode */ switch (ah->ah_op_mode) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: if (ah->ah_version == AR5K_AR5210) { timer1 = 0xffffffff; timer2 = 0xffffffff; diff --git a/drivers/net/wireless/ath5k/reset.c b/drivers/net/wireless/ath5k/reset.c index d260fba0180..f5c3de890cd 100644 --- a/drivers/net/wireless/ath5k/reset.c +++ b/drivers/net/wireless/ath5k/reset.c @@ -399,7 +399,7 @@ int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial) /* * Main reset function */ -int ath5k_hw_reset(struct ath5k_hw *ah, enum ieee80211_if_types op_mode, +int ath5k_hw_reset(struct ath5k_hw *ah, enum nl80211_iftype op_mode, struct ieee80211_channel *channel, bool change_channel) { struct ath5k_eeprom_info *ee = &ah->ah_capabilities.cap_eeprom; diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index b493dff5643..2a6e089062f 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -140,7 +140,7 @@ static int ath_key_config(struct ath_softc *sc, struct ath9k_keyval hk; const u8 *mac = NULL; int ret = 0; - enum ieee80211_if_types opmode; + enum nl80211_iftype opmode; memset(&hk, 0, sizeof(hk)); @@ -179,14 +179,14 @@ static int ath_key_config(struct ath_softc *sc, */ if (is_broadcast_ether_addr(addr)) { switch (opmode) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: /* default key: could be group WPA key * or could be static WEP key */ mac = NULL; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: break; default: ASSERT(0); @@ -1147,13 +1147,13 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, return -ENOBUFS; switch (conf->type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: ic_opmode = ATH9K_M_STA; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: ic_opmode = ATH9K_M_IBSS; break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: ic_opmode = ATH9K_M_HOSTAP; break; default: @@ -1275,7 +1275,7 @@ static int ath9k_config_interface(struct ieee80211_hw *hw, /* TODO: Need to decide which hw opmode to use for multi-interface * cases */ - if (vif->type == IEEE80211_IF_TYPE_AP && + if (vif->type == NL80211_IFTYPE_AP && ah->ah_opmode != ATH9K_M_HOSTAP) { ah->ah_opmode = ATH9K_M_HOSTAP; ath9k_hw_setopmode(ah); @@ -1287,8 +1287,8 @@ static int ath9k_config_interface(struct ieee80211_hw *hw, if ((conf->changed & IEEE80211_IFCC_BSSID) && !is_zero_ether_addr(conf->bssid)) { switch (vif->type) { - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: /* Update ratectrl about the new state */ ath_rate_newstate(sc, avp); @@ -1333,8 +1333,8 @@ static int ath9k_config_interface(struct ieee80211_hw *hw, } if ((conf->changed & IEEE80211_IFCC_BEACON) && - ((vif->type == IEEE80211_IF_TYPE_IBSS) || - (vif->type == IEEE80211_IF_TYPE_AP))) { + ((vif->type == NL80211_IFTYPE_ADHOC) || + (vif->type == NL80211_IFTYPE_AP))) { /* * Allocate and setup the beacon frame. * @@ -1353,7 +1353,7 @@ static int ath9k_config_interface(struct ieee80211_hw *hw, } /* Check for WLAN_CAPABILITY_PRIVACY ? */ - if ((avp->av_opmode != IEEE80211_IF_TYPE_STA)) { + if ((avp->av_opmode != NL80211_IFTYPE_STATION)) { for (i = 0; i < IEEE80211_WEP_NKID; i++) if (ath9k_hw_keyisvalid(sc->sc_ah, (u16)i)) ath9k_hw_keysetmac(sc->sc_ah, @@ -1362,7 +1362,7 @@ static int ath9k_config_interface(struct ieee80211_hw *hw, } /* Only legacy IBSS for now */ - if (vif->type == IEEE80211_IF_TYPE_IBSS) + if (vif->type == NL80211_IFTYPE_ADHOC) ath_update_chainmask(sc, 0); return 0; diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index d4a356b1163..df7a1e7f4a5 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -1244,13 +1244,13 @@ generate_new: static void handle_irq_tbtt_indication(struct b43_wldev *dev) { - if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) { + if (b43_is_mode(dev->wl, NL80211_IFTYPE_AP)) { ///TODO: PS TBTT } else { if (1 /*FIXME: the last PSpoll frame was sent successfully */ ) b43_power_saving_ctl_bits(dev, 0); } - if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS)) + if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC)) dev->dfq_valid = 1; } @@ -1599,8 +1599,8 @@ static void handle_irq_beacon(struct b43_wldev *dev) struct b43_wl *wl = dev->wl; u32 cmd, beacon0_valid, beacon1_valid; - if (!b43_is_mode(wl, IEEE80211_IF_TYPE_AP) && - !b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT)) + if (!b43_is_mode(wl, NL80211_IFTYPE_AP) && + !b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) return; /* This is the bottom half of the asynchronous beacon update. */ @@ -2568,10 +2568,10 @@ static void b43_adjust_opmode(struct b43_wldev *dev) ctl &= ~B43_MACCTL_BEACPROMISC; ctl |= B43_MACCTL_INFRA; - if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) || - b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT)) + if (b43_is_mode(wl, NL80211_IFTYPE_AP) || + b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) ctl |= B43_MACCTL_AP; - else if (b43_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) + else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) ctl &= ~B43_MACCTL_INFRA; if (wl->filter_flags & FIF_CONTROL) @@ -3406,8 +3406,8 @@ static int b43_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) phy->ops->set_rx_antenna(dev, antenna); /* Update templates for AP/mesh mode. */ - if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) || - b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT)) + if (b43_is_mode(wl, NL80211_IFTYPE_AP) || + b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) b43_set_beacon_int(dev, conf->beacon_int); if (!!conf->radio_enabled != phy->radio_on) { @@ -3595,14 +3595,14 @@ static int b43_op_config_interface(struct ieee80211_hw *hw, else memset(wl->bssid, 0, ETH_ALEN); if (b43_status(dev) >= B43_STAT_INITIALIZED) { - if (b43_is_mode(wl, IEEE80211_IF_TYPE_AP) || - b43_is_mode(wl, IEEE80211_IF_TYPE_MESH_POINT)) { + if (b43_is_mode(wl, NL80211_IFTYPE_AP) || + b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) { B43_WARN_ON(vif->type != wl->if_type); if (conf->changed & IEEE80211_IFCC_SSID) b43_set_ssid(dev, conf->ssid, conf->ssid_len); if (conf->changed & IEEE80211_IFCC_BEACON) b43_update_templates(wl); - } else if (b43_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) { + } else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) { if (conf->changed & IEEE80211_IFCC_BEACON) b43_update_templates(wl); } @@ -3903,7 +3903,7 @@ static void b43_set_synth_pu_delay(struct b43_wldev *dev, bool idle) pu_delay = 3700; else pu_delay = 1050; - if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS) || idle) + if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC) || idle) pu_delay = 500; if ((dev->phy.radio_ver == 0x2050) && (dev->phy.radio_rev == 8)) pu_delay = max(pu_delay, (u16)2400); @@ -3917,7 +3917,7 @@ static void b43_set_pretbtt(struct b43_wldev *dev) u16 pretbtt; /* The time value is in microseconds. */ - if (b43_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS)) { + if (b43_is_mode(dev->wl, NL80211_IFTYPE_ADHOC)) { pretbtt = 2; } else { if (dev->phy.type == B43_PHYTYPE_A) @@ -4084,11 +4084,11 @@ static int b43_op_add_interface(struct ieee80211_hw *hw, /* TODO: allow WDS/AP devices to coexist */ - if (conf->type != IEEE80211_IF_TYPE_AP && - conf->type != IEEE80211_IF_TYPE_MESH_POINT && - conf->type != IEEE80211_IF_TYPE_STA && - conf->type != IEEE80211_IF_TYPE_WDS && - conf->type != IEEE80211_IF_TYPE_IBSS) + if (conf->type != NL80211_IFTYPE_AP && + conf->type != NL80211_IFTYPE_MESH_POINT && + conf->type != NL80211_IFTYPE_STATION && + conf->type != NL80211_IFTYPE_WDS && + conf->type != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; mutex_lock(&wl->mutex); diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c index 4d4345d76ab..af37abccccb 100644 --- a/drivers/net/wireless/b43/phy_common.c +++ b/drivers/net/wireless/b43/phy_common.c @@ -162,7 +162,7 @@ void b43_phy_lock(struct b43_wldev *dev) #endif B43_WARN_ON(dev->dev->id.revision < 3); - if (!b43_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) + if (!b43_is_mode(dev->wl, NL80211_IFTYPE_AP)) b43_power_saving_ctl_bits(dev, B43_PS_AWAKE); } @@ -174,7 +174,7 @@ void b43_phy_unlock(struct b43_wldev *dev) #endif B43_WARN_ON(dev->dev->id.revision < 3); - if (!b43_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) + if (!b43_is_mode(dev->wl, NL80211_IFTYPE_AP)) b43_power_saving_ctl_bits(dev, 0); } diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 68f63f5093a..6e425410c99 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -888,13 +888,13 @@ generate_new: static void handle_irq_tbtt_indication(struct b43legacy_wldev *dev) { - if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) { + if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_AP)) { /* TODO: PS TBTT */ } else { if (1/*FIXME: the last PSpoll frame was sent successfully */) b43legacy_power_saving_ctl_bits(dev, -1, -1); } - if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS)) + if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_ADHOC)) dev->dfq_valid = 1; } @@ -1201,7 +1201,7 @@ static void handle_irq_beacon(struct b43legacy_wldev *dev) struct b43legacy_wl *wl = dev->wl; u32 cmd; - if (!b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP)) + if (!b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) return; /* This is the bottom half of the asynchronous beacon update. */ @@ -1936,9 +1936,9 @@ static void b43legacy_adjust_opmode(struct b43legacy_wldev *dev) ctl &= ~B43legacy_MACCTL_BEACPROMISC; ctl |= B43legacy_MACCTL_INFRA; - if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP)) + if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) ctl |= B43legacy_MACCTL_AP; - else if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) + else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) ctl &= ~B43legacy_MACCTL_INFRA; if (wl->filter_flags & FIF_CONTROL) @@ -2646,7 +2646,7 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, b43legacy_mgmtframe_txantenna(dev, antenna_tx); /* Update templates for AP mode. */ - if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP)) + if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) b43legacy_set_beacon_int(dev, conf->beacon_int); @@ -2733,12 +2733,12 @@ static int b43legacy_op_config_interface(struct ieee80211_hw *hw, else memset(wl->bssid, 0, ETH_ALEN); if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) { - if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_AP)) { - B43legacy_WARN_ON(vif->type != IEEE80211_IF_TYPE_AP); + if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) { + B43legacy_WARN_ON(vif->type != NL80211_IFTYPE_AP); b43legacy_set_ssid(dev, conf->ssid, conf->ssid_len); if (conf->changed & IEEE80211_IFCC_BEACON) b43legacy_update_templates(wl); - } else if (b43legacy_is_mode(wl, IEEE80211_IF_TYPE_IBSS)) { + } else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) { if (conf->changed & IEEE80211_IFCC_BEACON) b43legacy_update_templates(wl); } @@ -3020,7 +3020,7 @@ static void b43legacy_set_synth_pu_delay(struct b43legacy_wldev *dev, bool idle) { u16 pu_delay = 1050; - if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS) || idle) + if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_ADHOC) || idle) pu_delay = 500; if ((dev->phy.radio_ver == 0x2050) && (dev->phy.radio_rev == 8)) pu_delay = max(pu_delay, (u16)2400); @@ -3035,7 +3035,7 @@ static void b43legacy_set_pretbtt(struct b43legacy_wldev *dev) u16 pretbtt; /* The time value is in microseconds. */ - if (b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_IBSS)) + if (b43legacy_is_mode(dev->wl, NL80211_IFTYPE_ADHOC)) pretbtt = 2; else pretbtt = 250; @@ -3259,10 +3259,10 @@ static int b43legacy_op_add_interface(struct ieee80211_hw *hw, /* TODO: allow WDS/AP devices to coexist */ - if (conf->type != IEEE80211_IF_TYPE_AP && - conf->type != IEEE80211_IF_TYPE_STA && - conf->type != IEEE80211_IF_TYPE_WDS && - conf->type != IEEE80211_IF_TYPE_IBSS) + if (conf->type != NL80211_IFTYPE_AP && + conf->type != NL80211_IFTYPE_STATION && + conf->type != NL80211_IFTYPE_WDS && + conf->type != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; mutex_lock(&wl->mutex); diff --git a/drivers/net/wireless/b43legacy/phy.c b/drivers/net/wireless/b43legacy/phy.c index bed67a54250..4c9442b16f3 100644 --- a/drivers/net/wireless/b43legacy/phy.c +++ b/drivers/net/wireless/b43legacy/phy.c @@ -103,7 +103,7 @@ void b43legacy_phy_lock(struct b43legacy_wldev *dev) if (dev->dev->id.revision < 3) { b43legacy_mac_suspend(dev); } else { - if (!b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) + if (!b43legacy_is_mode(dev->wl, NL80211_IFTYPE_AP)) b43legacy_power_saving_ctl_bits(dev, -1, 1); } } @@ -118,7 +118,7 @@ void b43legacy_phy_unlock(struct b43legacy_wldev *dev) if (dev->dev->id.revision < 3) { b43legacy_mac_enable(dev); } else { - if (!b43legacy_is_mode(dev->wl, IEEE80211_IF_TYPE_AP)) + if (!b43legacy_is_mode(dev->wl, NL80211_IFTYPE_AP)) b43legacy_power_saving_ctl_bits(dev, -1, -1); } } diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index 10c64bdb314..da23c927380 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -682,7 +682,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev, rs_sta = (void *)sta->rate_ctrl_priv; - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) && + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && !rs_sta->ibss_sta_added) { u8 sta_id = iwl3945_hw_find_station(priv, hdr->addr1); diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 3d100e89824..7ca5627cc07 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -520,10 +520,10 @@ static int iwl3945_is_network_packet(struct iwl3945_priv *priv, /* Filter incoming packets to determine if they are targeted toward * this network, discarding packets coming from ourselves */ switch (priv->iw_mode) { - case IEEE80211_IF_TYPE_IBSS: /* Header: Dest. | Source | BSSID */ + case NL80211_IFTYPE_ADHOC: /* Header: Dest. | Source | BSSID */ /* packets to our IBSS update information */ return !compare_ether_addr(header->addr3, priv->bssid); - case IEEE80211_IF_TYPE_STA: /* Header: Dest. | AP{BSSID} | Source */ + case NL80211_IFTYPE_STATION: /* Header: Dest. | AP{BSSID} | Source */ /* packets to our IBSS update information */ return !compare_ether_addr(header->addr2, priv->bssid); default: @@ -807,7 +807,7 @@ void iwl3945_hw_build_tx_cmd_rate(struct iwl3945_priv *priv, priv->stations[sta_id].current_rate.rate_n_flags = rate; - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) && + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && (sta_id != priv->hw_setting.bcast_sta_id) && (sta_id != IWL_MULTICAST_ID)) priv->stations[IWL_STA_ID].current_rate.rate_n_flags = rate; diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index 9bbbc9d7c0e..2a4933b5fb6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h @@ -851,7 +851,7 @@ struct iwl3945_priv { /* eeprom */ struct iwl3945_eeprom eeprom; - enum ieee80211_if_types iw_mode; + enum nl80211_iftype iw_mode; struct sk_buff *ibss_beacon; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index c293e5b6cbb..700da67ac28 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -821,7 +821,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev, lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) && + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && !lq_sta->ibss_sta_added) goto out; @@ -2093,7 +2093,7 @@ static void rs_initialize_lq(struct iwl_priv *priv, i = sta->last_txrate_idx; if ((lq_sta->lq.sta_id == 0xff) && - (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)) + (priv->iw_mode == NL80211_IFTYPE_ADHOC)) goto out; valid_tx_ant = priv->hw_params.valid_tx_ant; @@ -2163,7 +2163,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev, lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; i = sta->last_txrate_idx; - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) && + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && !lq_sta->ibss_sta_added) { u8 sta_id = iwl_find_station(priv, hdr->addr1); DECLARE_MAC_BUF(mac); @@ -2243,7 +2243,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, * after assoc.. */ lq_sta->ibss_sta_added = 0; - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { u8 sta_id = iwl_find_station(priv, sta->addr); DECLARE_MAC_BUF(mac); diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 31ea2885889..e8db33bf5e5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -337,7 +337,7 @@ static int iwl4965_commit_rxon(struct iwl_priv *priv) /* If we have set the ASSOC_MSK and we are in BSS mode then * add the IWL_AP_ID to the station rate table */ if (new_assoc) { - if (priv->iw_mode == IEEE80211_IF_TYPE_STA) { + if (priv->iw_mode == NL80211_IFTYPE_STATION) { ret = iwl_rxon_add_station(priv, priv->active_rxon.bssid_addr, 1); if (ret == IWL_INVALID_STATION) { @@ -448,8 +448,8 @@ static unsigned int iwl_fill_beacon_frame(struct iwl_priv *priv, const u8 *dest, int left) { if (!iwl_is_associated(priv) || !priv->ibss_beacon || - ((priv->iw_mode != IEEE80211_IF_TYPE_IBSS) && - (priv->iw_mode != IEEE80211_IF_TYPE_AP))) + ((priv->iw_mode != NL80211_IFTYPE_ADHOC) && + (priv->iw_mode != NL80211_IFTYPE_AP))) return 0; if (priv->ibss_beacon->len > left) @@ -672,7 +672,7 @@ static void iwl4965_setup_rxon_timing(struct iwl_priv *priv) beacon_int = priv->beacon_int; spin_unlock_irqrestore(&priv->lock, flags); - if (priv->iw_mode == IEEE80211_IF_TYPE_STA) { + if (priv->iw_mode == NL80211_IFTYPE_STATION) { if (beacon_int == 0) { priv->rxon_timing.beacon_interval = cpu_to_le16(100); priv->rxon_timing.beacon_init_val = cpu_to_le32(102400); @@ -721,7 +721,7 @@ static void iwl_set_flags_for_band(struct iwl_priv *priv, else priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; priv->staging_rxon.flags |= RXON_FLG_BAND_24G_MSK; @@ -740,23 +740,23 @@ static void iwl4965_connection_init_rx_config(struct iwl_priv *priv) memset(&priv->staging_rxon, 0, sizeof(priv->staging_rxon)); switch (priv->iw_mode) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: priv->staging_rxon.dev_type = RXON_DEV_TYPE_AP; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->staging_rxon.dev_type = RXON_DEV_TYPE_ESS; priv->staging_rxon.filter_flags = RXON_FILTER_ACCEPT_GRP_MSK; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: priv->staging_rxon.dev_type = RXON_DEV_TYPE_IBSS; priv->staging_rxon.flags = RXON_FLG_SHORT_PREAMBLE_MSK; priv->staging_rxon.filter_flags = RXON_FILTER_BCON_AWARE_MSK | RXON_FILTER_ACCEPT_GRP_MSK; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: priv->staging_rxon.dev_type = RXON_DEV_TYPE_SNIFFER; priv->staging_rxon.filter_flags = RXON_FILTER_PROMISC_MSK | RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_ACCEPT_GRP_MSK; @@ -785,7 +785,7 @@ static void iwl4965_connection_init_rx_config(struct iwl_priv *priv) * in some case A channels are all non IBSS * in this case force B/G channel */ - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) && + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && !(is_channel_ibss(ch_info))) ch_info = &priv->channel_info[0]; @@ -1182,7 +1182,7 @@ static void iwl4965_rx_beacon_notif(struct iwl_priv *priv, le32_to_cpu(beacon->low_tsf), rate); #endif - if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) && + if ((priv->iw_mode == NL80211_IFTYPE_AP) && (!test_bit(STATUS_EXIT_PENDING, &priv->status))) queue_work(priv->workqueue, &priv->beacon_update); } @@ -2388,7 +2388,7 @@ static void iwl4965_bg_set_monitor(struct work_struct *work) mutex_lock(&priv->mutex); - ret = iwl4965_set_mode(priv, IEEE80211_IF_TYPE_MNTR); + ret = iwl4965_set_mode(priv, NL80211_IFTYPE_MONITOR); if (ret) { if (ret == -EAGAIN) @@ -2469,7 +2469,7 @@ static void iwl4965_post_associate(struct iwl_priv *priv) DECLARE_MAC_BUF(mac); unsigned long flags; - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { IWL_ERROR("%s Should not be called in AP mode\n", __func__); return; } @@ -2524,7 +2524,7 @@ static void iwl4965_post_associate(struct iwl_priv *priv) else priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; } @@ -2532,10 +2532,10 @@ static void iwl4965_post_associate(struct iwl_priv *priv) iwl4965_commit_rxon(priv); switch (priv->iw_mode) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: /* assume default assoc id */ priv->assoc_id = 1; @@ -2551,7 +2551,7 @@ static void iwl4965_post_associate(struct iwl_priv *priv) break; } - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->assoc_station_added = 1; spin_lock_irqsave(&priv->lock, flags); @@ -2828,7 +2828,7 @@ static int iwl4965_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *co goto out; } - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS && + if (priv->iw_mode == NL80211_IFTYPE_ADHOC && !is_channel_ibss(ch_info)) { IWL_ERROR("channel %d in band %d not IBSS channel\n", conf->channel->hw_value, conf->channel->band); @@ -2943,7 +2943,7 @@ static void iwl4965_config_ap(struct iwl_priv *priv) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; } @@ -2982,7 +2982,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw, return 0; } - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS && + if (priv->iw_mode == NL80211_IFTYPE_ADHOC && conf->changed & IEEE80211_IFCC_BEACON) { struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); if (!beacon) @@ -2992,7 +2992,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw, return rc; } - if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) && + if ((priv->iw_mode == NL80211_IFTYPE_AP) && (!conf->ssid_len)) { IWL_DEBUG_MAC80211 ("Leaving in AP mode because HostAPD is not ready.\n"); @@ -3015,7 +3015,7 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw, !(priv->hw->flags & IEEE80211_HW_NO_PROBE_FILTERING)) { */ - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { if (!conf->bssid) { conf->bssid = priv->mac_addr; memcpy(priv->bssid, priv->mac_addr, ETH_ALEN); @@ -3050,11 +3050,11 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw, * to verify) - jpk */ memcpy(priv->bssid, conf->bssid, ETH_ALEN); - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) + if (priv->iw_mode == NL80211_IFTYPE_AP) iwl4965_config_ap(priv); else { rc = iwl4965_commit_rxon(priv); - if ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && rc) + if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc) iwl_rxon_add_station( priv, priv->active_rxon.bssid_addr, 1); } @@ -3090,7 +3090,7 @@ static void iwl4965_configure_filter(struct ieee80211_hw *hw, if (changed_flags & (*total_flags) & FIF_OTHER_BSS) { IWL_DEBUG_MAC80211("Enter: type %d (0x%x, 0x%x)\n", - IEEE80211_IF_TYPE_MNTR, + NL80211_IFTYPE_MONITOR, changed_flags, *total_flags); /* queue work 'cuz mac80211 is holding a lock which * prevents us from issuing (synchronous) f/w cmds */ @@ -3204,7 +3204,7 @@ static int iwl_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t ssid_len) goto out_unlock; } - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { /* APs don't scan */ + if (priv->iw_mode == NL80211_IFTYPE_AP) { /* APs don't scan */ ret = -EIO; IWL_ERROR("ERROR: APs don't scan\n"); goto out_unlock; @@ -3329,7 +3329,7 @@ static int iwl4965_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, * in 1X mode. * In legacy wep mode, we use another host command to the uCode */ if (key->alg == ALG_WEP && sta_id == priv->hw_params.bcast_sta_id && - priv->iw_mode != IEEE80211_IF_TYPE_AP) { + priv->iw_mode != NL80211_IFTYPE_AP) { if (cmd == SET_KEY) is_default_wep_key = !priv->key_mapping_key; else @@ -3400,7 +3400,7 @@ static int iwl4965_mac_conf_tx(struct ieee80211_hw *hw, u16 queue, priv->qos_data.def_qos_parm.ac[q].reserved1 = 0; priv->qos_data.qos_active = 1; - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) + if (priv->iw_mode == NL80211_IFTYPE_AP) iwl_activate_qos(priv, 1); else if (priv->assoc_id && iwl_is_associated(priv)) iwl_activate_qos(priv, 0); @@ -3518,7 +3518,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw) priv->beacon_int = priv->hw->conf.beacon_int; priv->timestamp = 0; - if ((priv->iw_mode == IEEE80211_IF_TYPE_STA)) + if ((priv->iw_mode == NL80211_IFTYPE_STATION)) priv->beacon_int = 0; spin_unlock_irqrestore(&priv->lock, flags); @@ -3532,7 +3532,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw) /* we are restarting association process * clear RXON_FILTER_ASSOC_MSK bit */ - if (priv->iw_mode != IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode != NL80211_IFTYPE_AP) { iwl_scan_cancel_timeout(priv, 100); priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK; iwl4965_commit_rxon(priv); @@ -3541,7 +3541,7 @@ static void iwl4965_mac_reset_tsf(struct ieee80211_hw *hw) iwl_power_update_mode(priv, 0); /* Per mac80211.h: This is only used in IBSS mode... */ - if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) { + if (priv->iw_mode != NL80211_IFTYPE_ADHOC) { /* switch to CAM during association period. * the ucode will block any association/authentication @@ -3580,7 +3580,7 @@ static int iwl4965_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *sk return -EIO; } - if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) { + if (priv->iw_mode != NL80211_IFTYPE_ADHOC) { IWL_DEBUG_MAC80211("leave - not IBSS\n"); mutex_unlock(&priv->mutex); return -EIO; diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 36d08b0eec4..d80184ee911 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -306,14 +306,14 @@ void iwl_reset_qos(struct iwl_priv *priv) spin_lock_irqsave(&priv->lock, flags); priv->qos_data.qos_active = 0; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) { + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) { if (priv->qos_data.qos_enable) priv->qos_data.qos_active = 1; if (!(priv->active_rate & 0xfff0)) { cw_min = 31; is_legacy = 1; } - } else if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + } else if (priv->iw_mode == NL80211_IFTYPE_AP) { if (priv->qos_data.qos_enable) priv->qos_data.qos_active = 1; } else if (!(priv->staging_rxon.flags & RXON_FLG_SHORT_SLOT_MSK)) { @@ -932,7 +932,7 @@ int iwl_init_drv(struct iwl_priv *priv) priv->ieee_rates = NULL; priv->band = IEEE80211_BAND_2GHZ; - priv->iw_mode = IEEE80211_IF_TYPE_STA; + priv->iw_mode = NL80211_IFTYPE_STATION; priv->use_ant_b_for_management_frame = 1; /* start with ant B */ priv->current_ht_config.sm_ps = WLAN_HT_CAP_SM_PS_DISABLED; @@ -1396,7 +1396,7 @@ void iwl_radio_kill_sw_disable_radio(struct iwl_priv *priv) iwl_scan_cancel(priv); /* FIXME: This is a workaround for AP */ - if (priv->iw_mode != IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode != NL80211_IFTYPE_AP) { spin_lock_irqsave(&priv->lock, flags); iwl_write32(priv, CSR_UCODE_DRV_GP1_SET, CSR_UCODE_SW_BIT_RFKILL); diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 1823687e582..c018121085e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -954,7 +954,7 @@ struct iwl_priv { u8 *eeprom; struct iwl_eeprom_calib_info *calib_info; - enum ieee80211_if_types iw_mode; + enum nl80211_iftype iw_mode; struct sk_buff *ibss_beacon; diff --git a/drivers/net/wireless/iwlwifi/iwl-power.c b/drivers/net/wireless/iwlwifi/iwl-power.c index 55ec31ec9e1..60a03d2d2d0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-power.c +++ b/drivers/net/wireless/iwlwifi/iwl-power.c @@ -290,7 +290,7 @@ int iwl_power_update_mode(struct iwl_priv *priv, bool force) final_mode = setting->critical_power_setting; /* driver only support CAM for non STA network */ - if (priv->iw_mode != IEEE80211_IF_TYPE_STA) + if (priv->iw_mode != NL80211_IFTYPE_STATION) final_mode = IWL_POWER_MODE_CAM; if (!iwl_is_rfkill(priv) && !setting->power_disabled && diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c index faad4d3f56e..38b2946b1d8 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rx.c +++ b/drivers/net/wireless/iwlwifi/iwl-rx.c @@ -1026,10 +1026,10 @@ static int iwl_is_network_packet(struct iwl_priv *priv, /* Filter incoming packets to determine if they are targeted toward * this network, discarding packets coming from ourselves */ switch (priv->iw_mode) { - case IEEE80211_IF_TYPE_IBSS: /* Header: Dest. | Source | BSSID */ + case NL80211_IFTYPE_ADHOC: /* Header: Dest. | Source | BSSID */ /* packets to our IBSS update information */ return !compare_ether_addr(header->addr3, priv->bssid); - case IEEE80211_IF_TYPE_STA: /* Header: Dest. | AP{BSSID} | Source */ + case NL80211_IFTYPE_STATION: /* Header: Dest. | AP{BSSID} | Source */ /* packets to our IBSS update information */ return !compare_ether_addr(header->addr2, priv->bssid); default: @@ -1169,7 +1169,7 @@ void iwl_rx_reply_rx(struct iwl_priv *priv, rx_status.flag |= RX_FLAG_SHORTPRE; /* Take shortcut when only in monitor mode */ - if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) { + if (priv->iw_mode == NL80211_IFTYPE_MONITOR) { iwl_pass_packet_to_mac80211(priv, include_phy, rxb, &rx_status); return; @@ -1186,7 +1186,7 @@ void iwl_rx_reply_rx(struct iwl_priv *priv, switch (fc & IEEE80211_FCTL_FTYPE) { case IEEE80211_FTYPE_MGMT: case IEEE80211_FTYPE_DATA: - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) + if (priv->iw_mode == NL80211_IFTYPE_AP) iwl_update_ps_mode(priv, fc & IEEE80211_FCTL_PM, header->addr2); /* fall through */ diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index d026aaf6233..09c264be049 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -463,7 +463,7 @@ void iwl_init_scan_params(struct iwl_priv *priv) int iwl_scan_initiate(struct iwl_priv *priv) { - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { IWL_ERROR("APs don't scan.\n"); return 0; } @@ -868,7 +868,7 @@ static void iwl_bg_request_scan(struct work_struct *data) scan->tx_cmd.len = cpu_to_le16(cmd_len); - if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) + if (priv->iw_mode == NL80211_IFTYPE_MONITOR) scan->filter_flags = RXON_FILTER_PROMISC_MSK; scan->filter_flags |= (RXON_FILTER_ACCEPT_GRP_MSK | diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index a72569f1acb..61797f3f8d5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -47,8 +47,8 @@ u8 iwl_find_station(struct iwl_priv *priv, const u8 *addr) unsigned long flags; DECLARE_MAC_BUF(mac); - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) || - (priv->iw_mode == IEEE80211_IF_TYPE_AP)) + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) || + (priv->iw_mode == NL80211_IFTYPE_AP)) start = IWL_STA_ID; if (is_broadcast_ether_addr(addr)) @@ -74,7 +74,7 @@ EXPORT_SYMBOL(iwl_find_station); int iwl_get_ra_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr) { - if (priv->iw_mode == IEEE80211_IF_TYPE_STA) { + if (priv->iw_mode == NL80211_IFTYPE_STATION) { return IWL_AP_ID; } else { u8 *da = ieee80211_get_DA(hdr); @@ -286,7 +286,7 @@ u8 iwl_add_station_flags(struct iwl_priv *priv, const u8 *addr, int is_ap, /* BCAST station and IBSS stations do not work in HT mode */ if (sta_id != priv->hw_params.bcast_sta_id && - priv->iw_mode != IEEE80211_IF_TYPE_IBSS) + priv->iw_mode != NL80211_IFTYPE_ADHOC) iwl_set_ht_add_station(priv, sta_id, ht_info); spin_unlock_irqrestore(&priv->sta_lock, flags_spin); @@ -817,7 +817,7 @@ int iwl_send_lq_cmd(struct iwl_priv *priv, }; if ((lq->sta_id == 0xFF) && - (priv->iw_mode == IEEE80211_IF_TYPE_IBSS)) + (priv->iw_mode == NL80211_IFTYPE_ADHOC)) return -EINVAL; if (lq->sta_id == 0xFF) @@ -904,7 +904,7 @@ int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, int is_ap) if ((is_ap) && (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) && - (priv->iw_mode == IEEE80211_IF_TYPE_STA)) + (priv->iw_mode == NL80211_IFTYPE_STATION)) sta_id = iwl_add_station_flags(priv, addr, is_ap, 0, cur_ht_config); else @@ -938,11 +938,11 @@ int iwl_get_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr) /* If we are a client station in a BSS network, use the special * AP station entry (that's the only station we communicate with) */ - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: return IWL_AP_ID; /* If we are an AP, then find the station, or use BCAST */ - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: sta_id = iwl_find_station(priv, hdr->addr1); if (sta_id != IWL_INVALID_STATION) return sta_id; @@ -950,7 +950,7 @@ int iwl_get_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr) /* If this frame is going out to an IBSS network, find the station, * or create a new station table entry */ - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: sta_id = iwl_find_station(priv, hdr->addr1); if (sta_id != IWL_INVALID_STATION) return sta_id; @@ -970,7 +970,7 @@ int iwl_get_sta_id(struct iwl_priv *priv, struct ieee80211_hdr *hdr) /* If we are in monitor mode, use BCAST. This is required for * packet injection. */ - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: return priv->hw_params.bcast_sta_id; default: diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 9d5bcf46cbe..e9feca4033f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -814,10 +814,10 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) /* drop all data frame if we are not associated */ if (ieee80211_is_data(fc) && - (priv->iw_mode != IEEE80211_IF_TYPE_MNTR || + (priv->iw_mode != NL80211_IFTYPE_MONITOR || !(info->flags & IEEE80211_TX_CTL_INJECTED)) && /* packet injection */ (!iwl_is_associated(priv) || - ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && !priv->assoc_id) || + ((priv->iw_mode == NL80211_IFTYPE_STATION) && !priv->assoc_id) || !priv->assoc_station_added)) { IWL_DEBUG_DROP("Dropping - !iwl_is_associated\n"); goto drop_unlock; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index cbbe73a1288..e6c6ef6ad62 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -1160,7 +1160,7 @@ static int iwl3945_commit_rxon(struct iwl3945_priv *priv) /* If we have set the ASSOC_MSK and we are in BSS mode then * add the IWL_AP_ID to the station rate table */ if (iwl3945_is_associated(priv) && - (priv->iw_mode == IEEE80211_IF_TYPE_STA)) + (priv->iw_mode == NL80211_IFTYPE_STATION)) if (iwl3945_add_station(priv, priv->active_rxon.bssid_addr, 1, 0) == IWL_INVALID_STATION) { IWL_ERROR("Error adding AP address for transmit.\n"); @@ -1447,8 +1447,8 @@ unsigned int iwl3945_fill_beacon_frame(struct iwl3945_priv *priv, { if (!iwl3945_is_associated(priv) || !priv->ibss_beacon || - ((priv->iw_mode != IEEE80211_IF_TYPE_IBSS) && - (priv->iw_mode != IEEE80211_IF_TYPE_AP))) + ((priv->iw_mode != NL80211_IFTYPE_ADHOC) && + (priv->iw_mode != NL80211_IFTYPE_AP))) return 0; if (priv->ibss_beacon->len > left) @@ -1746,14 +1746,14 @@ static void iwl3945_reset_qos(struct iwl3945_priv *priv) spin_lock_irqsave(&priv->lock, flags); priv->qos_data.qos_active = 0; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) { + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) { if (priv->qos_data.qos_enable) priv->qos_data.qos_active = 1; if (!(priv->active_rate & 0xfff0)) { cw_min = 31; is_legacy = 1; } - } else if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + } else if (priv->iw_mode == NL80211_IFTYPE_AP) { if (priv->qos_data.qos_enable) priv->qos_data.qos_active = 1; } else if (!(priv->staging_rxon.flags & RXON_FLG_SHORT_SLOT_MSK)) { @@ -2120,7 +2120,7 @@ static void iwl3945_setup_rxon_timing(struct iwl3945_priv *priv) beacon_int = priv->beacon_int; spin_unlock_irqrestore(&priv->lock, flags); - if (priv->iw_mode == IEEE80211_IF_TYPE_STA) { + if (priv->iw_mode == NL80211_IFTYPE_STATION) { if (beacon_int == 0) { priv->rxon_timing.beacon_interval = cpu_to_le16(100); priv->rxon_timing.beacon_init_val = cpu_to_le32(102400); @@ -2156,7 +2156,7 @@ static void iwl3945_setup_rxon_timing(struct iwl3945_priv *priv) static int iwl3945_scan_initiate(struct iwl3945_priv *priv) { - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { IWL_ERROR("APs don't scan.\n"); return 0; } @@ -2218,7 +2218,7 @@ static void iwl3945_set_flags_for_phymode(struct iwl3945_priv *priv, else priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; priv->staging_rxon.flags |= RXON_FLG_BAND_24G_MSK; @@ -2237,23 +2237,23 @@ static void iwl3945_connection_init_rx_config(struct iwl3945_priv *priv) memset(&priv->staging_rxon, 0, sizeof(priv->staging_rxon)); switch (priv->iw_mode) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: priv->staging_rxon.dev_type = RXON_DEV_TYPE_AP; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->staging_rxon.dev_type = RXON_DEV_TYPE_ESS; priv->staging_rxon.filter_flags = RXON_FILTER_ACCEPT_GRP_MSK; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: priv->staging_rxon.dev_type = RXON_DEV_TYPE_IBSS; priv->staging_rxon.flags = RXON_FLG_SHORT_PREAMBLE_MSK; priv->staging_rxon.filter_flags = RXON_FILTER_BCON_AWARE_MSK | RXON_FILTER_ACCEPT_GRP_MSK; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: priv->staging_rxon.dev_type = RXON_DEV_TYPE_SNIFFER; priv->staging_rxon.filter_flags = RXON_FILTER_PROMISC_MSK | RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_ACCEPT_GRP_MSK; @@ -2282,7 +2282,7 @@ static void iwl3945_connection_init_rx_config(struct iwl3945_priv *priv) * in some case A channels are all non IBSS * in this case force B/G channel */ - if ((priv->iw_mode == IEEE80211_IF_TYPE_IBSS) && + if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && !(is_channel_ibss(ch_info))) ch_info = &priv->channel_info[0]; @@ -2302,7 +2302,7 @@ static void iwl3945_connection_init_rx_config(struct iwl3945_priv *priv) static int iwl3945_set_mode(struct iwl3945_priv *priv, int mode) { - if (mode == IEEE80211_IF_TYPE_IBSS) { + if (mode == NL80211_IFTYPE_ADHOC) { const struct iwl3945_channel_info *ch_info; ch_info = iwl3945_get_channel_info(priv, @@ -2469,11 +2469,11 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h /* If we are a client station in a BSS network, use the special * AP station entry (that's the only station we communicate with) */ - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: return IWL_AP_ID; /* If we are an AP, then find the station, or use BCAST */ - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: sta_id = iwl3945_hw_find_station(priv, hdr->addr1); if (sta_id != IWL_INVALID_STATION) return sta_id; @@ -2481,7 +2481,7 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h /* If this frame is going out to an IBSS network, find the station, * or create a new station table entry */ - case IEEE80211_IF_TYPE_IBSS: { + case NL80211_IFTYPE_ADHOC: { DECLARE_MAC_BUF(mac); /* Create new station table entry */ @@ -2502,7 +2502,7 @@ static int iwl3945_get_sta_id(struct iwl3945_priv *priv, struct ieee80211_hdr *h } /* If we are in monitor mode, use BCAST. This is required for * packet injection. */ - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: return priv->hw_setting.bcast_sta_id; default: @@ -2565,9 +2565,9 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb) /* drop all data frame if we are not associated */ if (ieee80211_is_data(fc) && - (priv->iw_mode != IEEE80211_IF_TYPE_MNTR) && /* packet injection */ + (priv->iw_mode != NL80211_IFTYPE_MONITOR) && /* packet injection */ (!iwl3945_is_associated(priv) || - ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && !priv->assoc_id))) { + ((priv->iw_mode == NL80211_IFTYPE_STATION) && !priv->assoc_id))) { IWL_DEBUG_DROP("Dropping - !iwl3945_is_associated\n"); goto drop_unlock; } @@ -2806,7 +2806,7 @@ static void iwl3945_radio_kill_sw(struct iwl3945_priv *priv, int disable_radio) if (disable_radio) { iwl3945_scan_cancel(priv); /* FIXME: This is a workaround for AP */ - if (priv->iw_mode != IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode != NL80211_IFTYPE_AP) { spin_lock_irqsave(&priv->lock, flags); iwl3945_write32(priv, CSR_UCODE_DRV_GP1_SET, CSR_UCODE_SW_BIT_RFKILL); @@ -3161,7 +3161,7 @@ static void iwl3945_rx_beacon_notif(struct iwl3945_priv *priv, le32_to_cpu(beacon->low_tsf), rate); #endif - if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) && + if ((priv->iw_mode == NL80211_IFTYPE_AP) && (!test_bit(STATUS_EXIT_PENDING, &priv->status))) queue_work(priv->workqueue, &priv->beacon_update); } @@ -6059,7 +6059,7 @@ static void iwl3945_bg_set_monitor(struct work_struct *work) if (!iwl3945_is_ready(priv)) IWL_DEBUG(IWL_DL_STATE, "leave - not ready\n"); else - if (iwl3945_set_mode(priv, IEEE80211_IF_TYPE_MNTR) != 0) + if (iwl3945_set_mode(priv, NL80211_IFTYPE_MONITOR) != 0) IWL_ERROR("iwl3945_set_mode() failed\n"); mutex_unlock(&priv->mutex); @@ -6248,7 +6248,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data) /* select Rx antennas */ scan->flags |= iwl3945_get_antenna_flags(priv); - if (priv->iw_mode == IEEE80211_IF_TYPE_MNTR) + if (priv->iw_mode == NL80211_IFTYPE_MONITOR) scan->filter_flags = RXON_FILTER_PROMISC_MSK; scan->channel_count = @@ -6323,7 +6323,7 @@ static void iwl3945_post_associate(struct iwl3945_priv *priv) struct ieee80211_conf *conf = NULL; DECLARE_MAC_BUF(mac); - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { IWL_ERROR("%s Should not be called in AP mode\n", __func__); return; } @@ -6372,7 +6372,7 @@ static void iwl3945_post_associate(struct iwl3945_priv *priv) else priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; } @@ -6380,11 +6380,11 @@ static void iwl3945_post_associate(struct iwl3945_priv *priv) iwl3945_commit_rxon(priv); switch (priv->iw_mode) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: iwl3945_rate_scale_init(priv->hw, IWL_AP_ID); break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: /* clear out the station table */ iwl3945_clear_stations_table(priv); @@ -6754,7 +6754,7 @@ static void iwl3945_config_ap(struct iwl3945_priv *priv) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS) + if (priv->iw_mode == NL80211_IFTYPE_ADHOC) priv->staging_rxon.flags &= ~RXON_FLG_SHORT_SLOT_MSK; } @@ -6791,7 +6791,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw, } /* handle this temporarily here */ - if (priv->iw_mode == IEEE80211_IF_TYPE_IBSS && + if (priv->iw_mode == NL80211_IFTYPE_ADHOC && conf->changed & IEEE80211_IFCC_BEACON) { struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); if (!beacon) @@ -6803,7 +6803,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw, /* XXX: this MUST use conf->mac_addr */ - if ((priv->iw_mode == IEEE80211_IF_TYPE_AP) && + if ((priv->iw_mode == NL80211_IFTYPE_AP) && (!conf->ssid_len)) { IWL_DEBUG_MAC80211 ("Leaving in AP mode because HostAPD is not ready.\n"); @@ -6826,7 +6826,7 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw, !(priv->hw->flags & IEEE80211_HW_NO_PROBE_FILTERING)) { */ - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode == NL80211_IFTYPE_AP) { if (!conf->bssid) { conf->bssid = priv->mac_addr; memcpy(priv->bssid, priv->mac_addr, ETH_ALEN); @@ -6861,11 +6861,11 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw, * to verify) - jpk */ memcpy(priv->bssid, conf->bssid, ETH_ALEN); - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) + if (priv->iw_mode == NL80211_IFTYPE_AP) iwl3945_config_ap(priv); else { rc = iwl3945_commit_rxon(priv); - if ((priv->iw_mode == IEEE80211_IF_TYPE_STA) && rc) + if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc) iwl3945_add_station(priv, priv->active_rxon.bssid_addr, 1, 0); } @@ -6901,7 +6901,7 @@ static void iwl3945_configure_filter(struct ieee80211_hw *hw, if (changed_flags & (*total_flags) & FIF_OTHER_BSS) { IWL_DEBUG_MAC80211("Enter: type %d (0x%x, 0x%x)\n", - IEEE80211_IF_TYPE_MNTR, + NL80211_IFTYPE_MONITOR, changed_flags, *total_flags); /* queue work 'cuz mac80211 is holding a lock which * prevents us from issuing (synchronous) f/w cmds */ @@ -7010,7 +7010,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len) goto out_unlock; } - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) { /* APs don't scan */ + if (priv->iw_mode == NL80211_IFTYPE_AP) { /* APs don't scan */ rc = -EIO; IWL_ERROR("ERROR: APs don't scan\n"); goto out_unlock; @@ -7152,7 +7152,7 @@ static int iwl3945_mac_conf_tx(struct ieee80211_hw *hw, u16 queue, spin_unlock_irqrestore(&priv->lock, flags); mutex_lock(&priv->mutex); - if (priv->iw_mode == IEEE80211_IF_TYPE_AP) + if (priv->iw_mode == NL80211_IFTYPE_AP) iwl3945_activate_qos(priv, 1); else if (priv->assoc_id && iwl3945_is_associated(priv)) iwl3945_activate_qos(priv, 0); @@ -7239,7 +7239,7 @@ static void iwl3945_mac_reset_tsf(struct ieee80211_hw *hw) priv->beacon_int = priv->hw->conf.beacon_int; priv->timestamp1 = 0; priv->timestamp0 = 0; - if ((priv->iw_mode == IEEE80211_IF_TYPE_STA)) + if ((priv->iw_mode == NL80211_IFTYPE_STATION)) priv->beacon_int = 0; spin_unlock_irqrestore(&priv->lock, flags); @@ -7253,14 +7253,14 @@ static void iwl3945_mac_reset_tsf(struct ieee80211_hw *hw) /* we are restarting association process * clear RXON_FILTER_ASSOC_MSK bit */ - if (priv->iw_mode != IEEE80211_IF_TYPE_AP) { + if (priv->iw_mode != NL80211_IFTYPE_AP) { iwl3945_scan_cancel_timeout(priv, 100); priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK; iwl3945_commit_rxon(priv); } /* Per mac80211.h: This is only used in IBSS mode... */ - if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) { + if (priv->iw_mode != NL80211_IFTYPE_ADHOC) { IWL_DEBUG_MAC80211("leave - not in IBSS\n"); mutex_unlock(&priv->mutex); @@ -7289,7 +7289,7 @@ static int iwl3945_mac_beacon_update(struct ieee80211_hw *hw, struct sk_buff *sk return -EIO; } - if (priv->iw_mode != IEEE80211_IF_TYPE_IBSS) { + if (priv->iw_mode != NL80211_IFTYPE_ADHOC) { IWL_DEBUG_MAC80211("leave - not IBSS\n"); mutex_unlock(&priv->mutex); return -EIO; @@ -7996,7 +7996,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e IWL_DEBUG_INFO("Radio disabled.\n"); } - priv->iw_mode = IEEE80211_IF_TYPE_STA; + priv->iw_mode = NL80211_IFTYPE_STATION; printk(KERN_INFO DRV_NAME ": Detected Intel Wireless WiFi Link %s\n", priv->cfg->name); diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index c948021bff6..feff945ad85 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -219,7 +219,7 @@ static void lbtf_tx_work(struct work_struct *work) struct sk_buff *skb = NULL; int err; - if ((priv->vif->type == IEEE80211_IF_TYPE_AP) && + if ((priv->vif->type == NL80211_IFTYPE_AP) && (!skb_queue_empty(&priv->bc_ps_buf))) skb = skb_dequeue(&priv->bc_ps_buf); else if (priv->skb_to_tx) { @@ -326,11 +326,11 @@ static int lbtf_op_add_interface(struct ieee80211_hw *hw, priv->vif = conf->vif; switch (conf->type) { - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_AP: lbtf_set_mode(priv, LBTF_AP_MODE); break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: lbtf_set_mode(priv, LBTF_STA_MODE); break; default: @@ -346,8 +346,8 @@ static void lbtf_op_remove_interface(struct ieee80211_hw *hw, { struct lbtf_private *priv = hw->priv; - if (priv->vif->type == IEEE80211_IF_TYPE_AP || - priv->vif->type == IEEE80211_IF_TYPE_MESH_POINT) + if (priv->vif->type == NL80211_IFTYPE_AP || + priv->vif->type == NL80211_IFTYPE_MESH_POINT) lbtf_beacon_ctrl(priv, 0, 0); lbtf_set_mode(priv, LBTF_PASSIVE_MODE); lbtf_set_bssid(priv, 0, NULL); @@ -372,8 +372,8 @@ static int lbtf_op_config_interface(struct ieee80211_hw *hw, struct sk_buff *beacon; switch (priv->vif->type) { - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: beacon = ieee80211_beacon_get(hw, vif); if (beacon) { lbtf_beacon_set(priv, beacon); @@ -614,7 +614,7 @@ void lbtf_bcn_sent(struct lbtf_private *priv) { struct sk_buff *skb = NULL; - if (priv->vif->type != IEEE80211_IF_TYPE_AP) + if (priv->vif->type != NL80211_IFTYPE_AP) return; if (skb_queue_empty(&priv->bc_ps_buf)) { diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6ba50f087f7..e855211a90f 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -267,7 +267,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, struct sk_buff *skb; struct ieee80211_tx_info *info; - if (vif->type != IEEE80211_IF_TYPE_AP) + if (vif->type != NL80211_IFTYPE_AP) return; skb = ieee80211_beacon_get(hw, vif); diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index da51786254d..bac58ed03e5 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -1139,7 +1139,7 @@ static int p54_start(struct ieee80211_hw *dev) err = priv->open(dev); if (!err) - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; p54_init_vdcf(dev); @@ -1157,7 +1157,7 @@ static void p54_stop(struct ieee80211_hw *dev) kfree_skb(skb); priv->stop(dev); priv->tsf_high32 = priv->tsf_low32 = 0; - priv->mode = IEEE80211_IF_TYPE_INVALID; + priv->mode = NL80211_IFTYPE_UNSPECIFIED; } static int p54_add_interface(struct ieee80211_hw *dev, @@ -1165,11 +1165,11 @@ static int p54_add_interface(struct ieee80211_hw *dev, { struct p54_common *priv = dev->priv; - if (priv->mode != IEEE80211_IF_TYPE_MNTR) + if (priv->mode != NL80211_IFTYPE_MONITOR) return -EOPNOTSUPP; switch (conf->type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->mode = conf->type; break; default: @@ -1181,7 +1181,7 @@ static int p54_add_interface(struct ieee80211_hw *dev, p54_set_filter(dev, 0, NULL); switch (conf->type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: p54_set_filter(dev, 1, NULL); break; default: @@ -1198,7 +1198,7 @@ static void p54_remove_interface(struct ieee80211_hw *dev, struct ieee80211_if_init_conf *conf) { struct p54_common *priv = dev->priv; - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; memset(priv->mac_addr, 0, ETH_ALEN); p54_set_filter(dev, 0, NULL); } @@ -1380,7 +1380,7 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len) return NULL; priv = dev->priv; - priv->mode = IEEE80211_IF_TYPE_INVALID; + priv->mode = NL80211_IFTYPE_UNSPECIFIED; skb_queue_head_init(&priv->tx_queue); dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | /* not sure */ IEEE80211_HW_RX_INCLUDES_FCS | diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c index 1594786205f..1c2a02a741a 100644 --- a/drivers/net/wireless/p54/p54pci.c +++ b/drivers/net/wireless/p54/p54pci.c @@ -616,7 +616,7 @@ static int p54p_suspend(struct pci_dev *pdev, pm_message_t state) struct ieee80211_hw *dev = pci_get_drvdata(pdev); struct p54p_priv *priv = dev->priv; - if (priv->common.mode != IEEE80211_IF_TYPE_INVALID) { + if (priv->common.mode != NL80211_IFTYPE_UNSPECIFIED) { ieee80211_stop_queues(dev); p54p_stop(dev); } @@ -634,7 +634,7 @@ static int p54p_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (priv->common.mode != IEEE80211_IF_TYPE_INVALID) { + if (priv->common.mode != NL80211_IFTYPE_UNSPECIFIED) { p54p_open(dev); ieee80211_wake_queues(dev); } diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index cb5f2d01a9c..d3bf7bba611 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -384,7 +384,7 @@ static void rt2500usb_config_intf(struct rt2x00_dev *rt2x00dev, rt2500usb_register_read(rt2x00dev, TXRX_CSR20, ®); rt2x00_set_field16(®, TXRX_CSR20_OFFSET, bcn_preload >> 6); rt2x00_set_field16(®, TXRX_CSR20_BCN_EXPECT_WINDOW, - 2 * (conf->type != IEEE80211_IF_TYPE_STA)); + 2 * (conf->type != NL80211_IFTYPE_STATION)); rt2500usb_register_write(rt2x00dev, TXRX_CSR20, reg); /* diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 6f296cef76a..1359a376840 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -483,7 +483,7 @@ struct rt2x00intf_conf { /* * Interface type */ - enum ieee80211_if_types type; + enum nl80211_iftype type; /* * TSF sync value, this is dependant on the operation type. diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index ca051f50ef1..4d5e87b015a 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -31,7 +31,7 @@ void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev, struct rt2x00_intf *intf, - enum ieee80211_if_types type, + enum nl80211_iftype type, u8 *mac, u8 *bssid) { struct rt2x00intf_conf conf; @@ -40,11 +40,11 @@ void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev, conf.type = type; switch (type) { - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: conf.sync = TSF_SYNC_BEACON; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: conf.sync = TSF_SYNC_INFRA; break; default: diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 2f3bfc60688..86840e3585e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -467,8 +467,8 @@ static void rt2x00lib_beacondone_iter(void *data, u8 *mac, struct rt2x00_dev *rt2x00dev = data; struct rt2x00_intf *intf = vif_to_intf(vif); - if (vif->type != IEEE80211_IF_TYPE_AP && - vif->type != IEEE80211_IF_TYPE_IBSS) + if (vif->type != NL80211_IFTYPE_AP && + vif->type != NL80211_IFTYPE_ADHOC) return; /* @@ -1212,8 +1212,8 @@ static void rt2x00lib_resume_intf(void *data, u8 *mac, /* * Master or Ad-hoc mode require a new beacon update. */ - if (vif->type == IEEE80211_IF_TYPE_AP || - vif->type == IEEE80211_IF_TYPE_IBSS) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) intf->delayed_flags |= DELAYED_UPDATE_BEACON; spin_unlock(&intf->lock); diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h index 7bbc16b1b6c..797eb619aa0 100644 --- a/drivers/net/wireless/rt2x00/rt2x00lib.h +++ b/drivers/net/wireless/rt2x00/rt2x00lib.h @@ -88,7 +88,7 @@ void rt2x00lib_stop(struct rt2x00_dev *rt2x00dev); */ void rt2x00lib_config_intf(struct rt2x00_dev *rt2x00dev, struct rt2x00_intf *intf, - enum ieee80211_if_types type, + enum nl80211_iftype type, u8 *mac, u8 *bssid); void rt2x00lib_config_erp(struct rt2x00_dev *rt2x00dev, struct rt2x00_intf *intf, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 56829fad347..485c40de5cc 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -211,7 +211,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, return -ENODEV; switch (conf->type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: /* * We don't support mixed combinations of * sta and ap interfaces. @@ -227,8 +227,8 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, return -ENOBUFS; break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: /* * We don't support mixed combinations of * sta and ap interfaces. @@ -268,7 +268,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, * increase interface count and start initialization. */ - if (conf->type == IEEE80211_IF_TYPE_AP) + if (conf->type == NL80211_IFTYPE_AP) rt2x00dev->intf_ap_count++; else rt2x00dev->intf_sta_count++; @@ -277,7 +277,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, spin_lock_init(&intf->seqlock); intf->beacon = entry; - if (conf->type == IEEE80211_IF_TYPE_AP) + if (conf->type == NL80211_IFTYPE_AP) memcpy(&intf->bssid, conf->mac_addr, ETH_ALEN); memcpy(&intf->mac, conf->mac_addr, ETH_ALEN); @@ -311,11 +311,11 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw, * no interface is present. */ if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags) || - (conf->type == IEEE80211_IF_TYPE_AP && !rt2x00dev->intf_ap_count) || - (conf->type != IEEE80211_IF_TYPE_AP && !rt2x00dev->intf_sta_count)) + (conf->type == NL80211_IFTYPE_AP && !rt2x00dev->intf_ap_count) || + (conf->type != NL80211_IFTYPE_AP && !rt2x00dev->intf_sta_count)) return; - if (conf->type == IEEE80211_IF_TYPE_AP) + if (conf->type == NL80211_IFTYPE_AP) rt2x00dev->intf_ap_count--; else rt2x00dev->intf_sta_count--; @@ -331,7 +331,7 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw, * are cleared to prevent false ACKing of frames. */ rt2x00lib_config_intf(rt2x00dev, intf, - IEEE80211_IF_TYPE_INVALID, NULL, NULL); + NL80211_IFTYPE_UNSPECIFIED, NULL, NULL); } EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface); diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c index 861c76a65d6..abcd641c54b 100644 --- a/drivers/net/wireless/rtl8180_dev.c +++ b/drivers/net/wireless/rtl8180_dev.c @@ -615,7 +615,7 @@ static int rtl8180_start(struct ieee80211_hw *dev) reg |= RTL818X_CMD_TX_ENABLE; rtl818x_iowrite8(priv, &priv->map->CMD, reg); - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; return 0; err_free_rings: @@ -633,7 +633,7 @@ static void rtl8180_stop(struct ieee80211_hw *dev) u8 reg; int i; - priv->mode = IEEE80211_IF_TYPE_INVALID; + priv->mode = NL80211_IFTYPE_UNSPECIFIED; rtl818x_iowrite16(priv, &priv->map->INT_MASK, 0); @@ -661,11 +661,11 @@ static int rtl8180_add_interface(struct ieee80211_hw *dev, { struct rtl8180_priv *priv = dev->priv; - if (priv->mode != IEEE80211_IF_TYPE_MNTR) + if (priv->mode != NL80211_IFTYPE_MONITOR) return -EOPNOTSUPP; switch (conf->type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->mode = conf->type; break; default: @@ -688,7 +688,7 @@ static void rtl8180_remove_interface(struct ieee80211_hw *dev, struct ieee80211_if_init_conf *conf) { struct rtl8180_priv *priv = dev->priv; - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; priv->vif = NULL; } diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c index 8a42bfa6d4f..e9902613e2e 100644 --- a/drivers/net/wireless/rtl8187_dev.c +++ b/drivers/net/wireless/rtl8187_dev.c @@ -836,11 +836,11 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev, struct rtl8187_priv *priv = dev->priv; int i; - if (priv->mode != IEEE80211_IF_TYPE_MNTR) + if (priv->mode != NL80211_IFTYPE_MONITOR) return -EOPNOTSUPP; switch (conf->type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: priv->mode = conf->type; break; default: @@ -865,7 +865,7 @@ static void rtl8187_remove_interface(struct ieee80211_hw *dev, { struct rtl8187_priv *priv = dev->priv; mutex_lock(&priv->conf_mutex); - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; priv->vif = NULL; mutex_unlock(&priv->conf_mutex); } @@ -1057,7 +1057,7 @@ static int __devinit rtl8187_probe(struct usb_interface *intf, dev->wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->band; - priv->mode = IEEE80211_IF_TYPE_MNTR; + priv->mode = NL80211_IFTYPE_MONITOR; dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_RX_INCLUDES_FCS; diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 3005dd1fde4..a3da014f928 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -716,15 +716,15 @@ static int zd_op_add_interface(struct ieee80211_hw *hw, { struct zd_mac *mac = zd_hw_mac(hw); - /* using IEEE80211_IF_TYPE_INVALID to indicate no mode selected */ - if (mac->type != IEEE80211_IF_TYPE_INVALID) + /* using NL80211_IFTYPE_UNSPECIFIED to indicate no mode selected */ + if (mac->type != NL80211_IFTYPE_UNSPECIFIED) return -EOPNOTSUPP; switch (conf->type) { - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: mac->type = conf->type; break; default: @@ -738,7 +738,7 @@ static void zd_op_remove_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf) { struct zd_mac *mac = zd_hw_mac(hw); - mac->type = IEEE80211_IF_TYPE_INVALID; + mac->type = NL80211_IFTYPE_UNSPECIFIED; zd_set_beacon_interval(&mac->chip, 0); zd_write_mac_addr(&mac->chip, NULL); } @@ -757,8 +757,8 @@ static int zd_op_config_interface(struct ieee80211_hw *hw, int associated; int r; - if (mac->type == IEEE80211_IF_TYPE_MESH_POINT || - mac->type == IEEE80211_IF_TYPE_IBSS) { + if (mac->type == NL80211_IFTYPE_MESH_POINT || + mac->type == NL80211_IFTYPE_ADHOC) { associated = true; if (conf->changed & IEEE80211_IFCC_BEACON) { struct sk_buff *beacon = ieee80211_beacon_get(hw, vif); @@ -955,7 +955,7 @@ struct ieee80211_hw *zd_mac_alloc_hw(struct usb_interface *intf) spin_lock_init(&mac->lock); mac->hw = hw; - mac->type = IEEE80211_IF_TYPE_INVALID; + mac->type = NL80211_IFTYPE_UNSPECIFIED; memcpy(mac->channels, zd_channels, sizeof(zd_channels)); memcpy(mac->rates, zd_rates, sizeof(zd_rates)); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c81e579c17f..7f5ea55e00f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -477,33 +477,6 @@ struct ieee80211_conf { struct ieee80211_ht_bss_info ht_bss_conf; }; -/** - * enum ieee80211_if_types - types of 802.11 network interfaces - * - * @IEEE80211_IF_TYPE_INVALID: invalid interface type, not used - * by mac80211 itself - * @IEEE80211_IF_TYPE_AP: interface in AP mode. - * @IEEE80211_IF_TYPE_MGMT: special interface for communication with hostap - * daemon. Drivers should never see this type. - * @IEEE80211_IF_TYPE_STA: interface in STA (client) mode. - * @IEEE80211_IF_TYPE_IBSS: interface in IBSS (ad-hoc) mode. - * @IEEE80211_IF_TYPE_MNTR: interface in monitor (rfmon) mode. - * @IEEE80211_IF_TYPE_WDS: interface in WDS mode. - * @IEEE80211_IF_TYPE_VLAN: VLAN interface bound to an AP, drivers - * will never see this type. - * @IEEE80211_IF_TYPE_MESH_POINT: 802.11s mesh point - */ -enum ieee80211_if_types { - IEEE80211_IF_TYPE_INVALID, - IEEE80211_IF_TYPE_AP, - IEEE80211_IF_TYPE_STA, - IEEE80211_IF_TYPE_IBSS, - IEEE80211_IF_TYPE_MESH_POINT, - IEEE80211_IF_TYPE_MNTR, - IEEE80211_IF_TYPE_WDS, - IEEE80211_IF_TYPE_VLAN, -}; - /** * struct ieee80211_vif - per-interface data * @@ -515,7 +488,7 @@ enum ieee80211_if_types { * sizeof(void *). */ struct ieee80211_vif { - enum ieee80211_if_types type; + enum nl80211_iftype type; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; @@ -523,7 +496,7 @@ struct ieee80211_vif { static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) { #ifdef CONFIG_MAC80211_MESH - return vif->type == IEEE80211_IF_TYPE_MESH_POINT; + return vif->type == NL80211_IFTYPE_MESH_POINT; #endif return false; } @@ -534,7 +507,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * @vif: pointer to a driver-use per-interface structure. The pointer * itself is also used for various functions including * ieee80211_beacon_get() and ieee80211_get_buffered_bc(). - * @type: one of &enum ieee80211_if_types constants. Determines the type of + * @type: one of &enum nl80211_iftype constants. Determines the type of * added/removed interface. * @mac_addr: pointer to MAC address of the interface. This pointer is valid * until the interface is removed (i.e. it cannot be used after @@ -550,7 +523,7 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) * in pure monitor mode. */ struct ieee80211_if_init_conf { - enum ieee80211_if_types type; + enum nl80211_iftype type; struct ieee80211_vif *vif; void *mac_addr; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6ec2127f9a6..d004351050c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -24,26 +24,19 @@ struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_to_hw); -static enum ieee80211_if_types -nl80211_type_to_mac80211_type(enum nl80211_iftype type) +static bool nl80211_type_check(enum nl80211_iftype type) { switch (type) { - case NL80211_IFTYPE_UNSPECIFIED: - return IEEE80211_IF_TYPE_STA; case NL80211_IFTYPE_ADHOC: - return IEEE80211_IF_TYPE_IBSS; case NL80211_IFTYPE_STATION: - return IEEE80211_IF_TYPE_STA; case NL80211_IFTYPE_MONITOR: - return IEEE80211_IF_TYPE_MNTR; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - return IEEE80211_IF_TYPE_MESH_POINT; #endif case NL80211_IFTYPE_WDS: - return IEEE80211_IF_TYPE_WDS; + return true; default: - return IEEE80211_IF_TYPE_INVALID; + return false; } } @@ -52,17 +45,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); - enum ieee80211_if_types itype; struct net_device *dev; struct ieee80211_sub_if_data *sdata; int err; - itype = nl80211_type_to_mac80211_type(type); - if (itype == IEEE80211_IF_TYPE_INVALID) + if (!nl80211_type_check(type)) return -EINVAL; - err = ieee80211_if_add(local, name, &dev, itype, params); - if (err || itype != IEEE80211_IF_TYPE_MNTR || !flags) + err = ieee80211_if_add(local, name, &dev, type, params); + if (err || type != NL80211_IFTYPE_MONITOR || !flags) return err; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -93,7 +84,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, { struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; - enum ieee80211_if_types itype; struct ieee80211_sub_if_data *sdata; int ret; @@ -102,8 +92,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, if (!dev) return -ENODEV; - itype = nl80211_type_to_mac80211_type(type); - if (itype == IEEE80211_IF_TYPE_INVALID) + if (!nl80211_type_check(type)) return -EINVAL; if (dev == local->mdev) @@ -111,7 +100,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ret = ieee80211_if_change_type(sdata, itype); + ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; @@ -120,7 +109,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, params->mesh_id_len, params->mesh_id); - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || !flags) + if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags) return 0; sdata->u.mntr_flags = *flags; @@ -516,7 +505,7 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -539,7 +528,7 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -561,7 +550,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -716,8 +705,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN && - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -747,8 +736,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, return err; } - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN || - sdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_AP) ieee80211_send_layer2_update(sta); rcu_read_unlock(); @@ -812,8 +801,8 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN && - vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { + if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && + vlansdata->vif.type != NL80211_IFTYPE_AP) { rcu_read_unlock(); return -EINVAL; } @@ -847,7 +836,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -903,7 +892,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -978,7 +967,7 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -1006,7 +995,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -1035,7 +1024,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; if (params->use_cts_prot >= 0) { diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 0fa7681a3d2..1b33cad24ab 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -345,26 +345,26 @@ static void add_files(struct ieee80211_sub_if_data *sdata) return; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH add_mesh_stats(sdata); add_mesh_config(sdata); #endif break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: add_sta_files(sdata); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: add_ap_files(sdata); break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: add_wds_files(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: add_monitor_files(sdata); break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: add_vlan_files(sdata); break; default: @@ -482,26 +482,26 @@ static void del_files(struct ieee80211_sub_if_data *sdata) return; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH del_mesh_stats(sdata); del_mesh_config(sdata); #endif break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: del_sta_files(sdata); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: del_ap_files(sdata); break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: del_wds_files(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: del_monitor_files(sdata); break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: del_vlan_files(sdata); break; default: diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 4dc35c9dabc..bc3c71ad7ae 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -89,7 +89,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); else memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); @@ -139,7 +139,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); else memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); @@ -185,7 +185,7 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); else memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 21cc6d07b02..80d88f5ff90 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -954,10 +954,10 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, /* interface handling */ void ieee80211_if_setup(struct net_device *dev); int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum ieee80211_if_types type, + struct net_device **new_dev, enum nl80211_iftype type, struct vif_params *params); int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, - enum ieee80211_if_types type); + enum nl80211_iftype type); void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); @@ -1001,7 +1001,7 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */ extern const unsigned char rfc1042_header[6]; extern const unsigned char bridge_tunnel_header[6]; u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, - enum ieee80211_if_types type); + enum nl80211_iftype type); int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, int rate, int erp, int short_preamble); void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index dab8eba2602..004fb23241d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -41,7 +41,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev) sdata->fragment_next = 0; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: beacon = sdata->u.ap.beacon; rcu_assign_pointer(sdata->u.ap.beacon, NULL); synchronize_rcu(); @@ -53,22 +53,23 @@ static void ieee80211_teardown_sdata(struct net_device *dev) } break; - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: kfree(sdata->u.sta.extra_ie); kfree(sdata->u.sta.assocreq_ies); kfree(sdata->u.sta.assocresp_ies); kfree_skb(sdata->u.sta.probe_resp); break; - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_VLAN: - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: BUG(); break; } @@ -81,7 +82,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev) * Helper function to initialise an interface to a specific type. */ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, - enum ieee80211_if_types type) + enum nl80211_iftype type) { /* clear type-dependent union */ memset(&sdata->u, 0, sizeof(sdata->u)); @@ -93,28 +94,29 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->dev->type = ARPHRD_ETHER; switch (type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps_bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: ieee80211_sta_setup_sdata(sdata); break; - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_init_sdata(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_AP_VLAN: break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: BUG(); break; } @@ -123,7 +125,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, } int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, - enum ieee80211_if_types type) + enum nl80211_iftype type) { ASSERT_RTNL(); @@ -153,7 +155,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, } int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum ieee80211_if_types type, + struct net_device **new_dev, enum nl80211_iftype type, struct vif_params *params) { struct net_device *ndev; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6597c779e35..d5b95748db2 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -118,8 +118,8 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key) * address to indicate a transmit-only key. */ if (key->conf.alg != ALG_WEP && - (key->sdata->vif.type == IEEE80211_IF_TYPE_AP || - key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) + (key->sdata->vif.type == NL80211_IFTYPE_AP || + key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) addr = zero_addr; if (key->sta) @@ -331,7 +331,7 @@ void ieee80211_key_link(struct ieee80211_key *key, */ key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; } else { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { struct sta_info *ap; /* diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c424acc01a..584a75bd6cf 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -146,7 +146,7 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) int meshhdrlen; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - meshhdrlen = (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) ? 5 : 0; + meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0; /* FIX: what would be proper limits for MTU? * This interface uses 802.3 frames. */ @@ -164,18 +164,16 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) static inline int identical_mac_addr_allowed(int type1, int type2) { - return (type1 == IEEE80211_IF_TYPE_MNTR || - type2 == IEEE80211_IF_TYPE_MNTR || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_WDS) || - (type1 == IEEE80211_IF_TYPE_WDS && - (type2 == IEEE80211_IF_TYPE_WDS || - type2 == IEEE80211_IF_TYPE_AP)) || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_VLAN) || - (type1 == IEEE80211_IF_TYPE_VLAN && - (type2 == IEEE80211_IF_TYPE_AP || - type2 == IEEE80211_IF_TYPE_VLAN))); + return type1 == NL80211_IFTYPE_MONITOR || + type2 == NL80211_IFTYPE_MONITOR || + (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) || + (type1 == NL80211_IFTYPE_WDS && + (type2 == NL80211_IFTYPE_WDS || + type2 == NL80211_IFTYPE_AP)) || + (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) || + (type1 == NL80211_IFTYPE_AP_VLAN && + (type2 == NL80211_IFTYPE_AP || + type2 == NL80211_IFTYPE_AP_VLAN)); } static int ieee80211_open(struct net_device *dev) @@ -211,8 +209,8 @@ static int ieee80211_open(struct net_device *dev) * belonging to the same hardware. Then, however, we're * faced with having to adopt two different TSF timers... */ - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - nsdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + nsdata->vif.type == NL80211_IFTYPE_ADHOC) return -EBUSY; /* @@ -232,37 +230,38 @@ static int ieee80211_open(struct net_device *dev) /* * can only add VLANs to enabled APs */ - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN && - nsdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + nsdata->vif.type == NL80211_IFTYPE_AP) sdata->bss = &nsdata->u.ap; } } switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) return -ENOLINK; break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: if (!sdata->bss) return -ENOLINK; list_add(&sdata->u.vlan.list, &sdata->bss->vlans); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: sdata->bss = &sdata->u.ap; break; - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (!ieee80211_vif_is_mesh(&sdata->vif)) break; /* mesh ifaces must set allmulti to forward mcast traffic */ atomic_inc(&local->iff_allmultis); break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_ADHOC: /* no special treatment */ break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: /* cannot happen */ WARN_ON(1); break; @@ -309,10 +308,10 @@ static int ieee80211_open(struct net_device *dev) } switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: /* no need to tell driver */ break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs++; break; @@ -336,8 +335,8 @@ static int ieee80211_open(struct net_device *dev) ieee80211_configure_filter(local); netif_addr_unlock_bh(local->mdev); break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; /* fall through */ default: @@ -354,14 +353,14 @@ static int ieee80211_open(struct net_device *dev) ieee80211_bss_info_change_notify(sdata, changed); ieee80211_enable_keys(sdata); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && + if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) netif_carrier_off(dev); else netif_carrier_on(dev); } - if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + if (sdata->vif.type == NL80211_IFTYPE_WDS) { /* Create STA entry for the WDS peer */ sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, GFP_KERNEL); @@ -417,8 +416,8 @@ static int ieee80211_open(struct net_device *dev) * yet be effective. Trigger execution of ieee80211_sta_work * to fix this. */ - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; queue_work(local->hw.workqueue, &ifsta->work); } @@ -433,7 +432,7 @@ static int ieee80211_open(struct net_device *dev) local->ops->stop(local_to_hw(local)); err_del_bss: sdata->bss = NULL; - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); return res; } @@ -496,7 +495,7 @@ static int ieee80211_stop(struct net_device *dev) dev_mc_unsync(local->mdev, dev); /* APs need special treatment */ - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { struct ieee80211_sub_if_data *vlan, *tmp; struct beacon_data *old_beacon = sdata->u.ap.beacon; @@ -515,11 +514,11 @@ static int ieee80211_stop(struct net_device *dev) local->open_count--; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: list_del(&sdata->u.vlan.list); /* no need to tell driver */ break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs--; break; @@ -542,8 +541,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_configure_filter(local); netif_addr_unlock_bh(local->mdev); break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED; memset(sdata->u.sta.bssid, 0, ETH_ALEN); del_timer_sync(&sdata->u.sta.timer); @@ -569,7 +568,7 @@ static int ieee80211_stop(struct net_device *dev) sdata->u.sta.extra_ie = NULL; sdata->u.sta.extra_ie_len = 0; /* fall through */ - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) { /* allmulti is always set on mesh ifaces */ atomic_dec(&local->iff_allmultis); @@ -698,12 +697,12 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) memset(&conf, 0, sizeof(conf)); conf.changed = changed; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { conf.bssid = sdata->u.sta.bssid; conf.ssid = sdata->u.sta.ssid; conf.ssid_len = sdata->u.sta.ssid_len; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + } else if (sdata->vif.type == NL80211_IFTYPE_AP) { conf.bssid = sdata->dev->dev_addr; conf.ssid = sdata->u.ap.ssid; conf.ssid_len = sdata->u.ap.ssid_len; @@ -1204,7 +1203,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { if (!netif_running(sdata->dev)) continue; @@ -1450,7 +1449,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* add one default STA interface */ result = ieee80211_if_add(local, "wlan%d", NULL, - IEEE80211_IF_TYPE_STA, NULL); + NL80211_IFTYPE_STATION, NULL); if (result) printk(KERN_WARNING "%s: Failed to add default virtual iface\n", wiphy_name(local->hw.wiphy)); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 55bc6071393..8a2cfd3609b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -678,7 +678,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ifsta->flags |= IEEE80211_STA_ASSOCIATED; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; bss = ieee80211_rx_bss_get(local, ifsta->bssid, @@ -1002,17 +1002,17 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, DECLARE_MAC_BUF(mac); if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + sdata->vif.type != NL80211_IFTYPE_ADHOC) return; if (len < 24 + 6) return; - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; @@ -1020,7 +1020,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. @@ -1487,7 +1487,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates && + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { supp_rates = ieee80211_sta_get_rates(local, elems, band); @@ -1532,14 +1532,14 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * In STA mode, the remaining parameters should not be overridden * by beacons because they're not necessarily accurate there. */ - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && bss->last_probe_resp && beacon) { ieee80211_rx_bss_put(local, bss); return; } /* check if we need to merge IBSS */ - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon && + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon && bss->capability & WLAN_CAPABILITY_IBSS && bss->freq == local->oper_channel->center_freq && elems->ssid_len == sdata->u.sta.ssid_len && @@ -1649,7 +1649,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; ifsta = &sdata->u.sta; @@ -1700,7 +1700,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, DECLARE_MAC_BUF(mac3); #endif - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS || + if (sdata->vif.type != NL80211_IFTYPE_ADHOC || ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || len < 24 + 2 || !ifsta->probe_resp) return; @@ -2212,8 +2212,8 @@ static void ieee80211_sta_work(struct work_struct *work) if (local->sw_scanning || local->hw_scanning) return; - if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC)) return; ifsta = &sdata->u.sta; @@ -2273,7 +2273,7 @@ static void ieee80211_sta_work(struct work_struct *work) static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) queue_work(sdata->local->hw.workqueue, &sdata->u.sta.work); } @@ -2355,7 +2355,7 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | @@ -2407,7 +2407,7 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size else ifsta->flags &= ~IEEE80211_STA_SSID_SET; - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { ifsta->ibss_join_req = jiffies; ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; @@ -2482,8 +2482,8 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", sdata->dev->name, reason); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; ieee80211_set_disassoc(sdata, ifsta, true, true, reason); @@ -2497,7 +2497,7 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", sdata->dev->name, reason); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) @@ -2513,7 +2513,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata = local->scan_sdata; struct ieee80211_if_sta *ifsta; - if (sdata && sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { ifsta = &sdata->u.sta; if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) && @@ -2539,7 +2539,7 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw, case IEEE80211_NOTIFY_RE_ASSOC: rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; ieee80211_sta_req_auth(sdata, &sdata->u.sta); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 33530b29c42..8c3dda5f00b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -295,7 +295,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR) + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) continue; if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) @@ -512,7 +512,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (unlikely((ieee80211_is_data(hdr->frame_control) || ieee80211_is_pspoll(hdr->frame_control)) && - rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { if ((!ieee80211_has_fromds(hdr->frame_control) && !ieee80211_has_tods(hdr->frame_control) && @@ -724,14 +724,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) /* Update last_rx only for IBSS packets which are for the current * BSSID to avoid keeping the current IBSS network alive in cases where * other STAs are using different BSSID. */ - if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, - IEEE80211_IF_TYPE_IBSS); + NL80211_IFTYPE_ADHOC); if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) sta->last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1) || - rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) { + rx->sdata->vif.type == NL80211_IFTYPE_STATION) { /* Update last_rx only for unicast frames in order to prevent * the Probe Request frames (the only broadcast frames from a * STA in infrastructure mode) from keeping a connection alive. @@ -751,8 +751,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->last_noise = rx->status->noise; if (!ieee80211_has_morefrags(hdr->frame_control) && - (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP || - rx->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) { + (rx->sdata->vif.type == NL80211_IFTYPE_AP || + rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { /* Change STA power saving mode only in the end of a frame * exchange sequence */ if (test_sta_flags(sta, WLAN_STA_PS) && @@ -982,8 +982,8 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) !(rx->flags & IEEE80211_RX_RA_MATCH))) return RX_CONTINUE; - if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) && - (sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) + if ((sdata->vif.type != NL80211_IFTYPE_AP) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return RX_DROP_UNUSABLE; skb = skb_dequeue(&rx->sta->tx_filtered); @@ -1131,23 +1131,23 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) switch (hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { case __constant_cpu_to_le16(IEEE80211_FCTL_TODS): - if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP && - sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) + if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return -1; break; case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) + if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) return -1; break; case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): - if (sdata->vif.type != IEEE80211_IF_TYPE_STA || + if (sdata->vif.type != NL80211_IFTYPE_STATION || (is_multicast_ether_addr(dst) && !compare_ether_addr(src, dev->dev_addr))) return -1; break; case __constant_cpu_to_le16(0): - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) return -1; break; } @@ -1221,8 +1221,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb = rx->skb; xmit_skb = NULL; - if ((sdata->vif.type == IEEE80211_IF_TYPE_AP || - sdata->vif.type == IEEE80211_IF_TYPE_VLAN) && + if ((sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && (rx->flags & IEEE80211_RX_RA_MATCH)) { if (is_multicast_ether_addr(ehdr->h_dest)) { @@ -1536,8 +1536,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) * FIXME: revisit this, I'm sure we should handle most * of these frames in other modes as well! */ - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return RX_DROP_MONITOR; switch (mgmt->u.action.category) { @@ -1595,8 +1595,8 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&sdata->vif)) return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return RX_DROP_MONITOR; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) @@ -1632,7 +1632,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, if (!ieee80211_has_protected(hdr->frame_control)) goto ignore; - if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) { + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && keyidx) { /* * APs with pairwise keys should never receive Michael MIC * errors for non-zero keyidx because these are reserved for @@ -1702,7 +1702,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || + if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) continue; @@ -1801,7 +1801,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, int multicast = is_multicast_ether_addr(hdr->addr1); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: if (!bssid) return 0; if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { @@ -1816,7 +1816,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; if (ieee80211_is_beacon(hdr->frame_control)) { @@ -1837,7 +1837,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, bssid, hdr->addr2, BIT(rx->status->rate_idx)); break; - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (!multicast && compare_ether_addr(sdata->dev->dev_addr, hdr->addr1) != 0) { @@ -1847,8 +1847,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_VLAN: - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP: if (!bssid) { if (compare_ether_addr(sdata->dev->dev_addr, hdr->addr1)) @@ -1860,16 +1860,17 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) return 0; if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) return 0; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: /* take everything */ break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: /* should never get here */ WARN_ON(1); break; @@ -1930,7 +1931,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5e719e7b720..8e6685e7ae8 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -475,7 +475,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { /* Tell AP we're back */ - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { ieee80211_send_nullfunc(local, sdata, 0); netif_tx_wake_all_queues(sdata->dev); @@ -539,7 +539,7 @@ void ieee80211_scan_work(struct work_struct *work) chan = &sband->channels[local->scan_channel_idx]; if (chan->flags & IEEE80211_CHAN_DISABLED || - (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && + (sdata->vif.type == NL80211_IFTYPE_ADHOC && chan->flags & IEEE80211_CHAN_NO_IBSS)) skip = 1; @@ -638,7 +638,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { netif_tx_stop_all_queues(sdata->dev); ieee80211_send_nullfunc(local, sdata, 1); @@ -681,7 +681,7 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return ieee80211_start_scan(sdata, ssid, ssid_len); /* diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3370b262563..31246d8e532 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -319,7 +319,7 @@ int sta_info_insert(struct sta_info *sta) /* notify driver */ if (local->ops->sta_notify) { - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -456,7 +456,7 @@ static void __sta_info_unlink(struct sta_info **sta) local->num_sta--; if (local->ops->sta_notify) { - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a523189f10c..f4bcc589d67 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -226,7 +226,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) !ieee80211_is_probe_req(hdr->frame_control)) return TX_DROP; - if (tx->sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) + if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) return TX_CONTINUE; if (tx->flags & IEEE80211_TX_PS_BUFFERED) @@ -236,7 +236,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (likely(tx->flags & IEEE80211_TX_UNICAST)) { if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + tx->sdata->vif.type != NL80211_IFTYPE_ADHOC && ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG DECLARE_MAC_BUF(mac); @@ -250,7 +250,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) } else { if (unlikely(ieee80211_is_data(hdr->frame_control) && tx->local->num_sta == 0 && - tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) { + tx->sdata->vif.type != NL80211_IFTYPE_ADHOC)) { /* * No associated STAs - no need to send multicast * frames. @@ -281,7 +281,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { struct ieee80211_if_ap *ap; - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) continue; ap = &sdata->u.ap; skb = skb_dequeue(&ap->ps_bc_buf); @@ -979,7 +979,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* process and remove the injection radiotap header */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) { + if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) { if (__ieee80211_parse_tx_radiotap(tx, skb) == TX_DROP) return TX_DROP; @@ -1457,8 +1457,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -1466,7 +1466,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 24; break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); @@ -1476,7 +1476,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 30; break; #ifdef CONFIG_MAC80211_MESH - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memset(hdr.addr1, 0, ETH_ALEN); @@ -1493,7 +1493,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 30; break; #endif - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); @@ -1501,7 +1501,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); @@ -1812,7 +1812,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); bdev = sdata->dev; - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); if (ap && beacon) { @@ -1854,7 +1854,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, num_beacons = &ap->num_beacons; } else goto out; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_hdr *hdr; ifsta = &sdata->u.sta; @@ -1999,7 +1999,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, rcu_read_lock(); beacon = rcu_dereference(bss->beacon); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || !beacon->head) + if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) goto out; if (bss->dtim_count != 0) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d6aca91e612..6eb222369bc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -43,7 +43,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) = u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, - enum ieee80211_if_types type) + enum nl80211_iftype type) { __le16 fc = hdr->frame_control; @@ -77,10 +77,10 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, if (ieee80211_is_back_req(fc)) { switch (type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: return hdr->addr2; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: return hdr->addr1; default: break; /* fall through to the return */ @@ -376,15 +376,16 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_VLAN: + case __NL80211_IFTYPE_AFTER_LAST: + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: continue; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: break; } if (netif_running(sdata->dev)) @@ -409,15 +410,16 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_VLAN: + case __NL80211_IFTYPE_AFTER_LAST: + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: continue; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: break; } if (netif_running(sdata->dev)) @@ -622,7 +624,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && chan->flags & IEEE80211_CHAN_NO_IBSS) { printk(KERN_DEBUG "%s: IBSS not allowed on frequency " "%d MHz\n", sdata->dev->name, chan->center_freq); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 77b68ed8b83..aef9707700f 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -122,8 +122,8 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return -EOPNOTSUPP; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); if (ret) return ret; @@ -273,21 +273,21 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int type; - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; switch (*mode) { case IW_MODE_INFRA: - type = IEEE80211_IF_TYPE_STA; + type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: - type = IEEE80211_IF_TYPE_IBSS; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: - type = IEEE80211_IF_TYPE_WDS; + type = NL80211_IFTYPE_WDS; break; case IW_MODE_MONITOR: - type = IEEE80211_IF_TYPE_MNTR; + type = NL80211_IFTYPE_MONITOR; break; default: return -EINVAL; @@ -305,22 +305,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: *mode = IW_MODE_MASTER; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: *mode = IW_MODE_INFRA; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: *mode = IW_MODE_ADHOC; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: *mode = IW_MODE_MONITOR; break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: *mode = IW_MODE_REPEAT; break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: *mode = IW_MODE_SECOND; /* FIXME */ break; default: @@ -336,13 +336,13 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; @@ -386,8 +386,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, len--; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { if (len > IEEE80211_MAX_SSID_LEN) @@ -407,7 +407,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, return 0; } - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { memcpy(sdata->u.ap.ssid, ssid, len); memset(sdata->u.ap.ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); @@ -426,8 +426,8 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int res = ieee80211_sta_get_ssid(sdata, ssid, &len); if (res == 0) { data->length = len; @@ -437,7 +437,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, return res; } - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { len = sdata->u.ap.ssid_len; if (len > IW_ESSID_MAX_SIZE) len = IW_ESSID_MAX_SIZE; @@ -457,8 +457,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, @@ -477,7 +477,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, return ret; ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { /* * If it is necessary to update the WDS peer address * while the interface is running, then we need to do @@ -505,8 +505,8 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED || sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) { ap_addr->sa_family = ARPHRD_ETHER; @@ -516,7 +516,7 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, memset(&ap_addr->sa_data, 0, ETH_ALEN); return 0; } - } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); return 0; @@ -538,10 +538,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT && - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP) return -EOPNOTSUPP; /* if SSID was specified explicitly then use that */ @@ -627,7 +627,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -858,8 +858,8 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev, struct iw_mlme *mlme = (struct iw_mlme *) extra; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; switch (mlme->cmd) { @@ -954,7 +954,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; switch (ifsta->auth_alg) { case WLAN_AUTH_OPEN: @@ -1028,7 +1028,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, sdata->drop_unencrypted = !!data->value; break; case IW_AUTH_PRIVACY_INVOKED: - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) ret = -EINVAL; else { sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; @@ -1043,8 +1043,8 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, } break; case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) sdata->u.sta.auth_algs = data->value; else ret = -EOPNOTSUPP; @@ -1066,8 +1066,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev rcu_read_lock(); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) sta = sta_info_get(local, sdata->u.sta.bssid); if (!sta) { wstats->discard.fragment = 0; @@ -1097,8 +1097,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev, switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) data->value = sdata->u.sta.auth_algs; else ret = -EOPNOTSUPP; -- cgit v1.2.3-70-g09d2 From 17741cdc264e4d768167766a252210e201c1519a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 00:02:02 +0200 Subject: mac80211: share STA information with driver This patch changes mac80211 to share some more data about stations with drivers. Should help iwlwifi and ath9k when they get around to updating, and might also help with implementing rate control algorithms without internals. Signed-off-by: Johannes Berg Cc: Sujith Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 30 ++++++++-------- drivers/net/wireless/b43/main.c | 5 +-- drivers/net/wireless/b43legacy/main.c | 2 +- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 12 +++---- drivers/net/wireless/iwlwifi/iwl-agn.c | 12 +++---- drivers/net/wireless/mac80211_hwsim.c | 3 +- include/net/mac80211.h | 53 ++++++++++++++++++++++++--- net/mac80211/cfg.c | 12 +++---- net/mac80211/debugfs_key.c | 3 +- net/mac80211/debugfs_sta.c | 6 ++-- net/mac80211/ht.c | 22 ++++++------ net/mac80211/iface.c | 3 +- net/mac80211/key.c | 2 +- net/mac80211/mesh_hwmp.c | 8 ++--- net/mac80211/mesh_plink.c | 44 +++++++++++------------ net/mac80211/mlme.c | 5 +-- net/mac80211/rx.c | 12 +++---- net/mac80211/sta_info.c | 59 +++++++++++++++++++------------ net/mac80211/sta_info.h | 7 ++-- net/mac80211/tkip.c | 2 +- net/mac80211/tx.c | 10 +++--- net/mac80211/wme.c | 2 +- net/mac80211/wpa.c | 2 +- 23 files changed, 189 insertions(+), 127 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 2a6e089062f..1ba18006f47 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -1405,7 +1405,7 @@ static void ath9k_configure_filter(struct ieee80211_hw *hw, static void ath9k_sta_notify(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd cmd, - const u8 *addr) + struct ieee80211_sta *sta) { struct ath_softc *sc = hw->priv; struct ath_node *an; @@ -1413,19 +1413,18 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw, DECLARE_MAC_BUF(mac); spin_lock_irqsave(&sc->node_lock, flags); - an = ath_node_find(sc, (u8 *) addr); + an = ath_node_find(sc, sta->addr); spin_unlock_irqrestore(&sc->node_lock, flags); switch (cmd) { case STA_NOTIFY_ADD: spin_lock_irqsave(&sc->node_lock, flags); if (!an) { - ath_node_attach(sc, (u8 *)addr, 0); + ath_node_attach(sc, sta->addr, 0); DPRINTF(sc, ATH_DBG_CONFIG, "%s: Attach a node: %s\n", - __func__, - print_mac(mac, addr)); + __func__, print_mac(mac, sta->addr)); } else { - ath_node_get(sc, (u8 *)addr); + ath_node_get(sc, sta->addr); } spin_unlock_irqrestore(&sc->node_lock, flags); break; @@ -1438,7 +1437,7 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw, ath_node_put(sc, an, ATH9K_BH_STATUS_INTACT); DPRINTF(sc, ATH_DBG_CONFIG, "%s: Put a node: %s\n", __func__, - print_mac(mac, addr)); + print_mac(mac, sta->addr)); } break; default: @@ -1581,45 +1580,44 @@ static void ath9k_reset_tsf(struct ieee80211_hw *hw) static int ath9k_ampdu_action(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action, - const u8 *addr, - u16 tid, - u16 *ssn) + struct ieee80211_sta *sta, + u16 tid, u16 *ssn) { struct ath_softc *sc = hw->priv; int ret = 0; switch (action) { case IEEE80211_AMPDU_RX_START: - ret = ath_rx_aggr_start(sc, addr, tid, ssn); + ret = ath_rx_aggr_start(sc, sta->addr, tid, ssn); if (ret < 0) DPRINTF(sc, ATH_DBG_FATAL, "%s: Unable to start RX aggregation\n", __func__); break; case IEEE80211_AMPDU_RX_STOP: - ret = ath_rx_aggr_stop(sc, addr, tid); + ret = ath_rx_aggr_stop(sc, sta->addr, tid); if (ret < 0) DPRINTF(sc, ATH_DBG_FATAL, "%s: Unable to stop RX aggregation\n", __func__); break; case IEEE80211_AMPDU_TX_START: - ret = ath_tx_aggr_start(sc, addr, tid, ssn); + ret = ath_tx_aggr_start(sc, sta->addr, tid, ssn); if (ret < 0) DPRINTF(sc, ATH_DBG_FATAL, "%s: Unable to start TX aggregation\n", __func__); else - ieee80211_start_tx_ba_cb_irqsafe(hw, (u8 *)addr, tid); + ieee80211_start_tx_ba_cb_irqsafe(hw, sta->addr, tid); break; case IEEE80211_AMPDU_TX_STOP: - ret = ath_tx_aggr_stop(sc, addr, tid); + ret = ath_tx_aggr_stop(sc, sta->addr, tid); if (ret < 0) DPRINTF(sc, ATH_DBG_FATAL, "%s: Unable to stop TX aggregation\n", __func__); - ieee80211_stop_tx_ba_cb_irqsafe(hw, (u8 *)addr, tid); + ieee80211_stop_tx_ba_cb_irqsafe(hw, sta->addr, tid); break; default: DPRINTF(sc, ATH_DBG_FATAL, diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index df7a1e7f4a5..0f628a29d83 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4234,7 +4234,8 @@ out_unlock: return err; } -static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, int aid, int set) +static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, bool set) { struct b43_wl *wl = hw_to_b43_wl(hw); unsigned long flags; @@ -4249,7 +4250,7 @@ static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, int aid, int set) static void b43_op_sta_notify(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd notify_cmd, - const u8 *addr) + struct ieee80211_sta *sta) { struct b43_wl *wl = hw_to_b43_wl(hw); diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 6e425410c99..9fb1421cbec 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3403,7 +3403,7 @@ out_unlock: } static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw, - int aid, int set) + struct ieee80211_sta *sta, bool set) { struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); unsigned long flags; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index 700da67ac28..af4e0b994e4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -366,8 +366,8 @@ static void rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv, if (state == HT_AGG_STATE_IDLE && rs_tl_get_load(lq_data, tid) > IWL_AGG_LOAD_THRESHOLD) { IWL_DEBUG_HT("Starting Tx agg: STA: %s tid: %d\n", - print_mac(mac, sta->addr), tid); - ieee80211_start_tx_ba_session(priv->hw, sta->addr, tid); + print_mac(mac, sta->sta.addr), tid); + ieee80211_start_tx_ba_session(priv->hw, sta->sta.addr, tid); } } @@ -2244,17 +2244,17 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, lq_sta->ibss_sta_added = 0; if (priv->iw_mode == NL80211_IFTYPE_AP) { - u8 sta_id = iwl_find_station(priv, sta->addr); + u8 sta_id = iwl_find_station(priv, sta->sta.addr); DECLARE_MAC_BUF(mac); /* for IBSS the call are from tasklet */ IWL_DEBUG_RATE("LQ: ADD station %s\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); if (sta_id == IWL_INVALID_STATION) { IWL_DEBUG_RATE("LQ: ADD station %s\n", - print_mac(mac, sta->addr)); - sta_id = iwl_add_station_flags(priv, sta->addr, + print_mac(mac, sta->sta.addr)); + sta_id = iwl_add_station_flags(priv, sta->sta.addr, 0, CMD_ASYNC, NULL); } if ((sta_id != IWL_INVALID_STATION)) { diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index e8db33bf5e5..5eeffb41d8c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3413,13 +3413,13 @@ static int iwl4965_mac_conf_tx(struct ieee80211_hw *hw, u16 queue, static int iwl4965_mac_ampdu_action(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action, - const u8 *addr, u16 tid, u16 *ssn) + struct ieee80211_sta *sta, u16 tid, u16 *ssn) { struct iwl_priv *priv = hw->priv; DECLARE_MAC_BUF(mac); IWL_DEBUG_HT("A-MPDU action on addr %s tid %d\n", - print_mac(mac, addr), tid); + print_mac(mac, sta->addr), tid); if (!(priv->cfg->sku & IWL_SKU_N)) return -EACCES; @@ -3427,16 +3427,16 @@ static int iwl4965_mac_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_RX_START: IWL_DEBUG_HT("start Rx\n"); - return iwl_rx_agg_start(priv, addr, tid, *ssn); + return iwl_rx_agg_start(priv, sta->addr, tid, *ssn); case IEEE80211_AMPDU_RX_STOP: IWL_DEBUG_HT("stop Rx\n"); - return iwl_rx_agg_stop(priv, addr, tid); + return iwl_rx_agg_stop(priv, sta->addr, tid); case IEEE80211_AMPDU_TX_START: IWL_DEBUG_HT("start Tx\n"); - return iwl_tx_agg_start(priv, addr, tid, ssn); + return iwl_tx_agg_start(priv, sta->addr, tid, ssn); case IEEE80211_AMPDU_TX_STOP: IWL_DEBUG_HT("stop Tx\n"); - return iwl_tx_agg_stop(priv, addr, tid); + return iwl_tx_agg_stop(priv, sta->addr, tid); default: IWL_DEBUG_HT("unknown\n"); return -EINVAL; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index bdedf10fc86..173dd5d2c62 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -390,7 +390,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, static void mac80211_hwsim_sta_notify(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum sta_notify_cmd cmd, const u8 *addr) + enum sta_notify_cmd cmd, + struct ieee80211_sta *sta) { hwsim_check_magic(vif); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f5ea55e00f..5a6a029da4d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -300,6 +300,9 @@ enum mac80211_tx_control_flags { * (2) driver internal use (if applicable) * (3) TX status information - driver tells mac80211 what happened * + * The TX control's sta pointer is only valid during the ->tx call, + * it may be NULL. + * * @flags: transmit info flags, defined above * @band: TBD * @tx_rate_idx: TBD @@ -329,8 +332,8 @@ struct ieee80211_tx_info { struct { struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; + struct ieee80211_sta *sta; unsigned long jiffies; - u16 aid; s8 rts_cts_rate_idx, alt_retry_rate_idx; u8 retry_limit; u8 icv_len; @@ -651,6 +654,29 @@ enum set_key_cmd { SET_KEY, DISABLE_KEY, }; +/** + * struct ieee80211_sta - station table entry + * + * A station table entry represents a station we are possibly + * communicating with. Since stations are RCU-managed in + * mac80211, any ieee80211_sta pointer you get access to must + * either be protected by rcu_read_lock() explicitly or implicitly, + * or you must take good care to not use such a pointer after a + * call to your sta_notify callback that removed it. + * + * @addr: MAC address + * @aid: AID we assigned to the station if we're an AP + * @drv_priv: data area for driver use, will always be aligned to + * sizeof(void *), size is determined in hw information. + */ +struct ieee80211_sta { + u8 addr[ETH_ALEN]; + u16 aid; + + /* must be last */ + u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); +}; + /** * enum sta_notify_cmd - sta notify command * @@ -795,6 +821,8 @@ enum ieee80211_hw_flags { * * @vif_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_vif. + * @sta_data_size: size (in bytes) of the drv_priv data area + * within &struct ieee80211_sta. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -806,6 +834,7 @@ struct ieee80211_hw { unsigned int extra_tx_headroom; int channel_change_time; int vif_data_size; + int sta_data_size; u16 queues; u16 ampdu_queues; u16 max_listen_interval; @@ -1089,7 +1118,7 @@ enum ieee80211_ampdu_mlme_action { * This callback must be implemented and atomic. * * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit - * must be set or cleared for a given AID. Must be atomic. + * must be set or cleared for a given STA. Must be atomic. * * @set_key: See the section "Hardware crypto acceleration" * This callback can sleep, and is only called between add_interface @@ -1175,7 +1204,8 @@ struct ieee80211_ops { unsigned int changed_flags, unsigned int *total_flags, int mc_count, struct dev_addr_list *mc_list); - int (*set_tim)(struct ieee80211_hw *hw, int aid, int set); + int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, + bool set); int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd, const u8 *local_address, const u8 *address, struct ieee80211_key_conf *key); @@ -1192,7 +1222,7 @@ struct ieee80211_ops { int (*set_retry_limit)(struct ieee80211_hw *hw, u32 short_retry, u32 long_retr); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum sta_notify_cmd, const u8 *addr); + enum sta_notify_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); int (*get_tx_stats)(struct ieee80211_hw *hw, @@ -1202,7 +1232,7 @@ struct ieee80211_ops { int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, enum ieee80211_ampdu_mlme_action action, - const u8 *addr, u16 tid, u16 *ssn); + struct ieee80211_sta *sta, u16 tid, u16 *ssn); }; /** @@ -1752,4 +1782,17 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, */ void ieee80211_notify_mac(struct ieee80211_hw *hw, enum ieee80211_notification_types notif_type); + +/** + * ieee80211_find_sta - find a station + * + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @addr: station's address + * + * This function must be called under RCU lock and the + * resulting pointer is only valid under RCU lock as well. + */ +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, + const u8 *addr); + #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d004351050c..ed5e77ce627 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -364,7 +364,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta = sta_info_get_by_idx(local, idx, dev); if (sta) { ret = 0; - memcpy(mac, sta->addr, ETH_ALEN); + memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo); } @@ -593,7 +593,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ memset(msg->da, 0xff, ETH_ALEN); - memcpy(msg->sa, sta->addr, ETH_ALEN); + memcpy(msg->sa, sta->sta.addr, ETH_ALEN); msg->len = htons(6); msg->dsap = 0; msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ @@ -648,9 +648,9 @@ static void sta_apply_parameters(struct ieee80211_local *local, */ if (params->aid) { - sta->aid = params->aid; - if (sta->aid > IEEE80211_MAX_AID) - sta->aid = 0; /* XXX: should this be an error? */ + sta->sta.aid = params->aid; + if (sta->sta.aid > IEEE80211_MAX_AID) + sta->sta.aid = 0; /* XXX: should this be an error? */ } if (params->listen_interval >= 0) @@ -919,7 +919,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { if (mpath->next_hop) - memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN); + memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); else memset(next_hop, 0, ETH_ALEN); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index cf82acec913..a3294d10932 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -206,7 +206,8 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) rcu_read_lock(); sta = rcu_dereference(key->sta); if (sta) - sprintf(buf, "../../stations/%s", print_mac(mac, sta->addr)); + sprintf(buf, "../../stations/%s", + print_mac(mac, sta->sta.addr)); rcu_read_unlock(); /* using sta as a boolean is fine outside RCU lock */ diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 6abe5427752..81f350eaf8a 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -50,7 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_READ_##format(name, field) \ STA_OPS(name) -STA_FILE(aid, aid, D); +STA_FILE(aid, sta.aid, D); STA_FILE(dev, sdata->dev->name, S); STA_FILE(rx_packets, rx_packets, LU); STA_FILE(tx_packets, tx_packets, LU); @@ -176,7 +176,7 @@ static ssize_t sta_agg_status_write(struct file *file, struct net_device *dev = sta->sdata->dev; struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hw *hw = &local->hw; - u8 *da = sta->addr; + u8 *da = sta->sta.addr; static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static int tid_static_rx[16] = {1, 1, 1, 1, 1, 1, 1, 1, @@ -253,7 +253,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) if (!stations_dir) return; - mac = print_mac(mbuf, sta->addr); + mac = print_mac(mbuf, sta->sta.addr); sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); if (!sta->debugfs.dir) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index bc3c71ad7ae..dc7d9a3d70d 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -274,7 +274,7 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r #endif /* CONFIG_MAC80211_HT_DEBUG */ ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, - ra, tid, NULL); + &sta->sta, tid, NULL); if (ret) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -328,7 +328,7 @@ static void sta_addba_resp_timer_expired(unsigned long data) rcu_read_lock(); - sta = sta_info_get(local, temp_sta->addr); + sta = sta_info_get(local, temp_sta->sta.addr); if (!sta) { rcu_read_unlock(); return; @@ -354,7 +354,7 @@ static void sta_addba_resp_timer_expired(unsigned long data) /* go through the state check in stop_BA_session */ *state = HT_AGG_STATE_OPERATIONAL; spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid, + ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid, WLAN_BACK_INITIATOR); timer_expired_exit: @@ -465,7 +465,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, - ra, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num); if (ret) { /* No need to requeue the packets in the agg queue, since we @@ -557,7 +557,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, - ra, tid, NULL); + &sta->sta, tid, NULL); /* case HW denied going back to legacy */ if (ret) { @@ -767,7 +767,7 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); #endif - ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr, + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, (u16)*ptid, WLAN_BACK_TIMER, WLAN_REASON_QSTA_TIMEOUT); } @@ -874,7 +874,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, - sta->addr, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); #endif /* CONFIG_MAC80211_HT_DEBUG */ @@ -899,7 +899,7 @@ end: spin_unlock_bh(&sta->lock); end_no_lock: - ieee80211_send_addba_resp(sta->sdata, sta->addr, tid, + ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, dialog_token, status, 1, buf_size, timeout); } @@ -952,7 +952,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, /* this will allow the state check in stop_BA_session */ *state = HT_AGG_STATE_OPERATIONAL; spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, sta->addr, tid, + ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid, WLAN_BACK_INITIATOR); } } @@ -979,14 +979,14 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, #endif /* CONFIG_MAC80211_HT_DEBUG */ if (initiator == WLAN_BACK_INITIATOR) - ieee80211_sta_stop_rx_ba_session(sdata, sta->addr, tid, + ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, 0); else { /* WLAN_BACK_RECIPIENT */ spin_lock_bh(&sta->lock); sta->ampdu_mlme.tid_state_tx[tid] = HT_AGG_STATE_OPERATIONAL; spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid, + ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, WLAN_BACK_RECIPIENT); } } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f528962b13e..a7ef0289fbd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -336,7 +336,8 @@ static int ieee80211_stop(struct net_device *dev) list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sta->sdata == sdata) - ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr); + ieee80211_sta_tear_down_BA_sessions(sdata, + sta->sta.addr); } rcu_read_unlock(); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d5b95748db2..57afcd38cd9 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -123,7 +123,7 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key) addr = zero_addr; if (key->sta) - addr = key->sta->addr; + addr = key->sta->sta.addr; return addr; } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 1fad792ad25..15a5c99270a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -517,7 +517,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, spin_unlock_bh(&mpath->state_lock); goto fail; } - memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN); + memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); spin_unlock_bh(&mpath->state_lock); --ttl; flags = PREP_IE_FLAGS(prep_elem); @@ -529,7 +529,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, cpu_to_le32(orig_dsn), 0, dst_addr, - cpu_to_le32(dst_dsn), mpath->next_hop->addr, hopcount, ttl, + cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), 0, sdata); rcu_read_unlock(); @@ -557,7 +557,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_ACTIVE && - memcmp(ta, mpath->next_hop->addr, ETH_ALEN) == 0 && + memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 && (!(mpath->flags & MESH_PATH_DSN_VALID) || DSN_GT(dst_dsn, mpath->dsn))) { mpath->flags &= ~MESH_PATH_ACTIVE; @@ -799,7 +799,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); } - memcpy(hdr->addr1, mpath->next_hop->addr, + memcpy(hdr->addr1, mpath->next_hop->sta.addr, ETH_ALEN); } else { if (!(mpath->flags & MESH_PATH_RESOLVING)) { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 990a4b7f6bc..debf7834dbc 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -275,7 +275,7 @@ static void mesh_plink_timer(unsigned long data) return; } mpl_dbg("Mesh plink timer for %s fired on state %d\n", - print_mac(mac, sta->addr), sta->plink_state); + print_mac(mac, sta->sta.addr), sta->plink_state); reason = 0; llid = sta->llid; plid = sta->plid; @@ -288,7 +288,7 @@ static void mesh_plink_timer(unsigned long data) if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { u32 rand; mpl_dbg("Mesh plink for %s (retry, timeout): %d %d\n", - print_mac(mac, sta->addr), + print_mac(mac, sta->sta.addr), sta->plink_retries, sta->plink_timeout); get_random_bytes(&rand, sizeof(u32)); sta->plink_timeout = sta->plink_timeout + @@ -296,7 +296,7 @@ static void mesh_plink_timer(unsigned long data) ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); break; } @@ -309,7 +309,7 @@ static void mesh_plink_timer(unsigned long data) sta->plink_state = PLINK_HOLDING; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, plid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case PLINK_HOLDING: @@ -352,10 +352,10 @@ int mesh_plink_open(struct sta_info *sta) mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink: starting establishment with %s\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); return mesh_plink_frame_tx(sdata, PLINK_OPEN, - sta->addr, llid, 0, 0); + sta->sta.addr, llid, 0, 0); } void mesh_plink_block(struct sta_info *sta) @@ -379,7 +379,7 @@ int mesh_plink_close(struct sta_info *sta) #endif mpl_dbg("Mesh plink: closing link with %s\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); spin_lock_bh(&sta->lock); sta->reason = cpu_to_le16(MESH_LINK_CANCELLED); reason = sta->reason; @@ -400,7 +400,7 @@ int mesh_plink_close(struct sta_info *sta) llid = sta->llid; plid = sta->plid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); return 0; } @@ -577,9 +577,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->llid = llid; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: @@ -604,7 +604,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: @@ -613,7 +613,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->plid = plid; llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: @@ -646,13 +646,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: @@ -661,7 +661,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); break; default: spin_unlock_bh(&sta->lock); @@ -685,7 +685,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: @@ -694,8 +694,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->addr)); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid, + print_mac(mac, sta->sta.addr)); + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: @@ -714,13 +714,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: @@ -743,8 +743,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; reason = sta->reason; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, + llid, plid, reason); break; default: spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8a2cfd3609b..35c421b89dd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -804,7 +804,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - ieee80211_sta_tear_down_BA_sessions(sdata, sta->addr); + ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr); if (self_disconnected) { if (deauth) @@ -1507,7 +1507,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: updated supp_rates set " "for %s based on beacon info (0x%llx | " "0x%llx -> 0x%llx)\n", - sdata->dev->name, print_mac(mac, sta->addr), + sdata->dev->name, + print_mac(mac, sta->sta.addr), (unsigned long long) prev_rates, (unsigned long long) supp_rates, (unsigned long long) sta->supp_rates[band]); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8c3dda5f00b..92d898b901e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -661,7 +661,7 @@ static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", - dev->name, print_mac(mac, sta->addr), sta->aid); + dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -685,7 +685,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", - dev->name, print_mac(mac, sta->addr), sta->aid); + dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ /* Send all buffered frames to the station */ @@ -702,7 +702,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " "since STA not sleeping anymore\n", dev->name, - print_mac(mac, sta->addr), sta->aid); + print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ info->flags |= IEEE80211_TX_CTL_REQUEUE; dev_queue_xmit(skb); @@ -1007,7 +1007,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", - print_mac(mac, rx->sta->addr), rx->sta->aid, + print_mac(mac, rx->sta->sta.addr), rx->sta->sta.aid, skb_queue_len(&rx->sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ @@ -1032,7 +1032,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) */ printk(KERN_DEBUG "%s: STA %s sent PS Poll even " "though there are no buffered frames for it\n", - rx->dev->name, print_mac(mac, rx->sta->addr)); + rx->dev->name, print_mac(mac, rx->sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -2140,7 +2140,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->addr, + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); ret = 1; goto end_reorder; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 31246d8e532..d9774ac2e0f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -73,11 +73,11 @@ static int sta_info_hash_del(struct ieee80211_local *local, { struct sta_info *s; - s = local->sta_hash[STA_HASH(sta->addr)]; + s = local->sta_hash[STA_HASH(sta->sta.addr)]; if (!s) return -ENOENT; if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], s->hnext); return 0; } @@ -94,13 +94,13 @@ static int sta_info_hash_del(struct ieee80211_local *local, /* protected by RCU */ static struct sta_info *__sta_info_find(struct ieee80211_local *local, - u8 *addr) + const u8 *addr) { struct sta_info *sta; sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); while (sta) { - if (compare_ether_addr(sta->addr, addr) == 0) + if (compare_ether_addr(sta->sta.addr, addr) == 0) break; sta = rcu_dereference(sta->hnext); } @@ -151,7 +151,7 @@ static void __sta_info_free(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Destroyed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ kfree(sta); @@ -219,8 +219,8 @@ void sta_info_destroy(struct sta_info *sta) static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - sta->hnext = local->sta_hash[STA_HASH(sta->addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], sta); + sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, @@ -231,14 +231,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, int i; DECLARE_MAC_BUF(mbuf); - sta = kzalloc(sizeof(*sta), gfp); + sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); if (!sta) return NULL; spin_lock_init(&sta->lock); spin_lock_init(&sta->flaglock); - memcpy(sta->addr, addr, ETH_ALEN); + memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; @@ -271,7 +271,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Allocated STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_MESH @@ -300,15 +300,15 @@ int sta_info_insert(struct sta_info *sta) goto out_free; } - if (WARN_ON(compare_ether_addr(sta->addr, sdata->dev->dev_addr) == 0 || - is_multicast_ether_addr(sta->addr))) { + if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->dev->dev_addr) == 0 || + is_multicast_ether_addr(sta->sta.addr))) { err = -EINVAL; goto out_free; } spin_lock_irqsave(&local->sta_lock, flags); /* check if STA exists already */ - if (__sta_info_find(local, sta->addr)) { + if (__sta_info_find(local, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); err = -EEXIST; goto out_free; @@ -325,12 +325,12 @@ int sta_info_insert(struct sta_info *sta) u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, - STA_NOTIFY_ADD, sta->addr); + STA_NOTIFY_ADD, &sta->sta); } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Inserted STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mac, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mac, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ spin_unlock_irqrestore(&local->sta_lock, flags); @@ -379,11 +379,12 @@ static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss, { BUG_ON(!bss); - __bss_tim_set(bss, sta->aid); + __bss_tim_set(bss, sta->sta.aid); if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; - sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 1); + sta->local->ops->set_tim(local_to_hw(sta->local), + &sta->sta, true); sta->local->tim_in_locked_section = false; } } @@ -404,11 +405,12 @@ static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss, { BUG_ON(!bss); - __bss_tim_clear(bss, sta->aid); + __bss_tim_clear(bss, sta->sta.aid); if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; - sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 0); + sta->local->ops->set_tim(local_to_hw(sta->local), + &sta->sta, false); sta->local->tim_in_locked_section = false; } } @@ -462,7 +464,7 @@ static void __sta_info_unlink(struct sta_info **sta) u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, - STA_NOTIFY_REMOVE, (*sta)->addr); + STA_NOTIFY_REMOVE, &(*sta)->sta); } if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -474,7 +476,7 @@ static void __sta_info_unlink(struct sta_info **sta) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Removed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ /* @@ -570,7 +572,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, local->total_ps_buffered--; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "Buffered frame expired (STA " - "%s)\n", print_mac(mac, sta->addr)); + "%s)\n", print_mac(mac, sta->sta.addr)); #endif dev_kfree_skb(skb); @@ -817,7 +819,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, if (time_after(jiffies, sta->last_rx + exp_time)) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: expiring inactive STA %s\n", - sdata->dev->name, print_mac(mac, sta->addr)); + sdata->dev->name, print_mac(mac, sta->sta.addr)); #endif __sta_info_unlink(&sta); if (sta) @@ -828,3 +830,14 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, list_for_each_entry_safe(sta, tmp, &tmp_list, list) sta_info_destroy(sta); } + +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, + const u8 *addr) +{ + struct sta_info *sta = __sta_info_find(hw_to_local(hw), addr); + + if (!sta) + return NULL; + return &sta->sta; +} +EXPORT_SYMBOL(ieee80211_find_sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 22007990099..e7ce12dbf27 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -218,6 +218,7 @@ struct sta_ampdu_mlme { * @plink_timeout: TBD * @plink_timer: TBD * @debugfs: debug filesystem info + * @sta: station information we share with the driver */ struct sta_info { /* General information, mostly static */ @@ -232,8 +233,7 @@ struct sta_info { spinlock_t flaglock; struct ieee80211_ht_info ht_info; u64 supp_rates[IEEE80211_NUM_BANDS]; - u8 addr[ETH_ALEN]; - u16 aid; + u16 listen_interval; /* @@ -327,6 +327,9 @@ struct sta_info { struct dentry *agg_status; } debugfs; #endif + + /* keep last! */ + struct ieee80211_sta sta; }; static inline enum plink_state sta_plink_state(struct sta_info *sta) diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 995f7af3d25..34b32bc8f60 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -304,7 +304,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - u8 *sta_addr = key->sta->addr; + u8 *sta_addr = key->sta->sta.addr; if (is_multicast_ether_addr(ra)) sta_addr = bcast; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f4bcc589d67..07bf228d0b1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -381,7 +381,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " "before %d)\n", - print_mac(mac, sta->addr), sta->aid, + print_mac(mac, sta->sta.addr), sta->sta.aid, skb_queue_len(&sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -392,7 +392,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) if (net_ratelimit()) { printk(KERN_DEBUG "%s: STA %s TX " "buffer full - dropping oldest frame\n", - tx->dev->name, print_mac(mac, sta->addr)); + tx->dev->name, print_mac(mac, sta->sta.addr)); } #endif dev_kfree_skb(old); @@ -411,7 +411,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ clear_sta_flags(sta, WLAN_STA_PSPOLL); @@ -528,7 +528,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; if (tx->sta) - info->control.aid = tx->sta->aid; + info->control.sta = &tx->sta->sta; if (!info->control.retry_limit) { if (!is_multicast_ether_addr(hdr->addr1)) { @@ -608,7 +608,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) } if (tx->sta) - info->control.aid = tx->sta->aid; + info->control.sta = &tx->sta->sta; return TX_CONTINUE; } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 7229e958879..6748dedcab5 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -210,7 +210,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "allocated aggregation queue" " %d tid %d addr %s pool=0x%lX\n", - i, tid, print_mac(mac, sta->addr), + i, tid, print_mac(mac, sta->sta.addr), local->queue_pool[0]); } #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 78021780b88..37ae9a959f6 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -256,7 +256,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, key, skb->data + hdrlen, - skb->len - hdrlen, rx->sta->addr, + skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); -- cgit v1.2.3-70-g09d2 From 323ce79a9cdbf838ea577677b1ddace8e0b4d4c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 02:45:11 +0200 Subject: mac80211: share sta->supp_rates As more preparation for a saner rate control algorithm API, share the supported rates bitmap in the public API. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/rc.c | 2 +- drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 4 ++-- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 6 +++--- include/net/mac80211.h | 2 ++ net/mac80211/cfg.c | 2 +- net/mac80211/mesh_plink.c | 4 ++-- net/mac80211/mlme.c | 12 ++++++------ net/mac80211/rate.h | 2 +- net/mac80211/sta_info.h | 2 -- 9 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c index 390019ed398..226d70c73fe 100644 --- a/drivers/net/wireless/ath9k/rc.c +++ b/drivers/net/wireless/ath9k/rc.c @@ -1825,7 +1825,7 @@ static void ath_setup_rates(struct ieee80211_local *local, struct sta_info *sta) sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; for (i = 0; i < sband->n_bitrates; i++) { - if (sta->supp_rates[local->hw.conf.channel->band] & BIT(i)) { + if (sta->sta.supp_rates[local->hw.conf.channel->band] & BIT(i)) { rc_priv->neg_rates.rs_rates[j] = (sband->bitrates[i].bitrate * 2) / 10; j++; diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index 46b672c3458..f751b909759 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -333,7 +333,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, * after assoc.. */ for (i = IWL_RATE_COUNT - 1; i >= 0; i--) { - if (sta->supp_rates[local->hw.conf.channel->band] & (1 << i)) { + if (sta->sta.supp_rates[local->hw.conf.channel->band] & (1 << i)) { sta->txrate_idx = i; break; } @@ -680,7 +680,7 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev, rs_sta = (void *)sta->rate_ctrl_priv; - rate_mask = sta->supp_rates[sband->band]; + rate_mask = sta->sta.supp_rates[sband->band]; index = min(rs_sta->last_txrate_idx & 0xffff, IWL_RATE_COUNT - 1); if (sband->band == IEEE80211_BAND_5GHZ) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index 54f076bb202..f7191a9a7fb 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -1731,7 +1731,7 @@ static void rs_rate_scale_perform(struct iwl_priv *priv, return; lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; - lq_sta->supp_rates = sta->supp_rates[lq_sta->band]; + lq_sta->supp_rates = sta->sta.supp_rates[lq_sta->band]; tid = rs_tl_add_packet(lq_sta, hdr); @@ -2233,7 +2233,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; lq_sta->flush_timer = 0; - lq_sta->supp_rates = sta->supp_rates[sband->band]; + lq_sta->supp_rates = sta->sta.supp_rates[sband->band]; sta->txrate_idx = 3; for (j = 0; j < LQ_SIZE; j++) for (i = 0; i < IWL_RATE_COUNT; i++) @@ -2270,7 +2270,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, /* Find highest tx rate supported by hardware and destination station */ for (i = 0; i < sband->n_bitrates; i++) - if (sta->supp_rates[sband->band] & BIT(i)) + if (sta->sta.supp_rates[sband->band] & BIT(i)) sta->txrate_idx = i; lq_sta->last_txrate_idx = sta->txrate_idx; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5a6a029da4d..ef8e4cc32c2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -666,10 +666,12 @@ enum set_key_cmd { * * @addr: MAC address * @aid: AID we assigned to the station if we're an AP + * @supp_rates: Bitmap of supported rates (per band) * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ struct ieee80211_sta { + u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; u16 aid; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ed5e77ce627..47988d2eb15 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -667,7 +667,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, rates |= BIT(j); } } - sta->supp_rates[local->oper_channel->band] = rates; + sta->sta.supp_rates[local->oper_channel->band] = rates; } if (params->ht_capa) { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index debf7834dbc..faac101c0f8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -106,7 +106,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, return NULL; sta->flags = WLAN_STA_AUTHORIZED; - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; return sta; } @@ -243,7 +243,7 @@ void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data } sta->last_rx = jiffies; - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; if (peer_accepting_plinks && sta->plink_state == PLINK_LISTEN && sdata->u.mesh.accepting_plinks && sdata->u.mesh.mshcfg.auto_open_plinks) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 35c421b89dd..c049f336e58 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1301,7 +1301,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } } - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; sdata->bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ @@ -1497,13 +1497,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (sta) { u64 prev_rates; - prev_rates = sta->supp_rates[band]; + prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ - sta->supp_rates[band] = supp_rates | + sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(local, band); #ifdef CONFIG_MAC80211_IBSS_DEBUG - if (sta->supp_rates[band] != prev_rates) + if (sta->sta.supp_rates[band] != prev_rates) printk(KERN_DEBUG "%s: updated supp_rates set " "for %s based on beacon info (0x%llx | " "0x%llx -> 0x%llx)\n", @@ -1511,7 +1511,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, print_mac(mac, sta->sta.addr), (unsigned long long) prev_rates, (unsigned long long) supp_rates, - (unsigned long long) sta->supp_rates[band]); + (unsigned long long) sta->sta.supp_rates[band]); #endif } else { ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid, @@ -2339,7 +2339,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, set_sta_flags(sta, WLAN_STA_AUTHORIZED); /* make sure mandatory rates are always added */ - sta->supp_rates[band] = supp_rates | + sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(local, band); rate_control_rate_init(sta, local); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index ede7ab56f65..5f18c27eb90 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -134,7 +134,7 @@ static inline int rate_supported(struct sta_info *sta, enum ieee80211_band band, int index) { - return (sta == NULL || sta->supp_rates[band] & BIT(index)); + return (sta == NULL || sta->sta.supp_rates[band] & BIT(index)); } static inline s8 diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index df42d181545..4dafa044b2f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -168,7 +168,6 @@ struct sta_ampdu_mlme { * in the header file. * @flaglock: spinlock for flags accesses * @ht_info: HT capabilities of this STA - * @supp_rates: Bitmap of supported rates (per band) * @addr: MAC address of this STA * @aid: STA's unique AID (1..2007, 0 = not assigned yet), * only used in AP (and IBSS?) mode @@ -228,7 +227,6 @@ struct sta_info { spinlock_t lock; spinlock_t flaglock; struct ieee80211_ht_info ht_info; - u64 supp_rates[IEEE80211_NUM_BANDS]; u16 listen_interval; -- cgit v1.2.3-70-g09d2 From 687c7c0807371aeaa94ff2fff511eeb326b5c5de Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Sep 2008 03:14:11 +0200 Subject: mac80211: share sta_info->ht_info Rate control algorithms may need access to a station's HT capabilities, so share the ht_info struct in the public station API. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 6 +++--- include/net/mac80211.h | 2 ++ net/mac80211/cfg.c | 2 +- net/mac80211/mlme.c | 4 ++-- net/mac80211/sta_info.h | 2 -- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index a8711c314e6..f45a752e93c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -1154,10 +1154,10 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv, s8 is_green = lq_sta->is_green; if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->ht_info.ht_supported) + !sta->sta.ht_info.ht_supported) return -1; - if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2) + if (((sta->sta.ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2) == WLAN_HT_CAP_SM_PS_STATIC) return -1; @@ -1222,7 +1222,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv, s32 rate; if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->ht_info.ht_supported) + !sta->sta.ht_info.ht_supported) return -1; IWL_DEBUG_RATE("LQ: try to switch to SISO\n"); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ef8e4cc32c2..d6669fd3ffa 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -667,6 +667,7 @@ enum set_key_cmd { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) + * @ht_info: HT capabilities of this STA * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ @@ -674,6 +675,7 @@ struct ieee80211_sta { u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; u16 aid; + struct ieee80211_ht_info ht_info; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 47988d2eb15..e2574885db4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -672,7 +672,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, if (params->ht_capa) { ieee80211_ht_cap_ie_to_ht_info(params->ht_capa, - &sta->ht_info); + &sta->sta.ht_info); } if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c049f336e58..8611a8318c9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1316,11 +1316,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_ht_bss_info bss_info; ieee80211_ht_cap_ie_to_ht_info( (struct ieee80211_ht_cap *) - elems.ht_cap_elem, &sta->ht_info); + elems.ht_cap_elem, &sta->sta.ht_info); ieee80211_ht_addt_info_ie_to_ht_bss_info( (struct ieee80211_ht_addt_info *) elems.ht_info_elem, &bss_info); - ieee80211_handle_ht(local, 1, &sta->ht_info, &bss_info); + ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); } rate_control_rate_init(sta, local); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5d8fabf7a68..b773c7b8d29 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -167,7 +167,6 @@ struct sta_ampdu_mlme { * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses - * @ht_info: HT capabilities of this STA * @addr: MAC address of this STA * @aid: STA's unique AID (1..2007, 0 = not assigned yet), * only used in AP (and IBSS?) mode @@ -226,7 +225,6 @@ struct sta_info { void *rate_ctrl_priv; spinlock_t lock; spinlock_t flaglock; - struct ieee80211_ht_info ht_info; u16 listen_interval; -- cgit v1.2.3-70-g09d2 From 25d834e16294c8dfd923dae6bdb8a055391a99a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Sep 2008 22:52:47 +0200 Subject: mac80211: fix virtual interfaces vs. injection Currently, virtual interface pointers passed to drivers might be from monitor interfaces and as such completely uninitialised because we do not tell the driver about monitor interfaces when those are created. Instead of passing them, we should therefore indicate to the driver that there is no information; do that by passing a NULL value and adjust drivers to cope with it. As a result, some mac80211 API functions also need to cope with a NULL vif pointer so drivers can still call them unconditionally. Also, when injecting frames we really don't want to pass NULL all the time, if we know we are the source address of a frame and have a local interface for that address, we can to use that interface. This also helps with processing the frame correctly for that interface which will help the 802.11w implementation. It's not entirely correct for VLANs or WDS interfaces because there the MAC address isn't unique, but it's already a lot better than what we do now. Finally, when injecting without a matching local interface, don't assign sequence numbers at all. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 3 +- drivers/net/wireless/rt2x00/rt2x00queue.c | 21 ++++++---- include/net/mac80211.h | 1 + net/mac80211/tx.c | 64 +++++++++++++++++++++++++++++-- net/mac80211/util.c | 37 ++++++++++++------ 5 files changed, 101 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index da8aa83481f..631d65b73ed 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -258,7 +258,8 @@ static int mac80211_hwsim_tx(struct ieee80211_hw *hw, struct sk_buff *skb) txi = IEEE80211_SKB_CB(skb); - hwsim_check_magic(txi->control.vif); + if (txi->control.vif) + hwsim_check_magic(txi->control.vif); if (txi->control.sta) hwsim_check_sta_magic(txi->control.sta); diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index a5e965068c8..b7f4fe8fba6 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -155,7 +155,6 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry, { struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb); - struct rt2x00_intf *intf = vif_to_intf(tx_info->control.vif); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)entry->skb->data; struct ieee80211_rate *rate = ieee80211_get_tx_rate(rt2x00dev->hw, tx_info); @@ -278,16 +277,22 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry, * sequence counter given by mac80211. */ if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) { - spin_lock_irqsave(&intf->seqlock, irqflags); + if (likely(tx_info->control.vif)) { + struct rt2x00_intf *intf; - if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags)) - intf->seqno += 0x10; - hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); - hdr->seq_ctrl |= cpu_to_le16(intf->seqno); + intf = vif_to_intf(tx_info->control.vif); - spin_unlock_irqrestore(&intf->seqlock, irqflags); + spin_lock_irqsave(&intf->seqlock, irqflags); - __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); + if (test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags)) + intf->seqno += 0x10; + hdr->seq_ctrl &= cpu_to_le16(IEEE80211_SCTL_FRAG); + hdr->seq_ctrl |= cpu_to_le16(intf->seqno); + + spin_unlock_irqrestore(&intf->seqlock, irqflags); + + __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); + } } /* diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d6669fd3ffa..003e4a03874 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -330,6 +330,7 @@ struct ieee80211_tx_info { union { struct { + /* NB: vif can be NULL for injected frames */ struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; struct ieee80211_sta *sta; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 698c8233e6b..c12f361d718 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -624,7 +624,14 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) u8 *qc; int tid; - /* only for injected frames */ + /* + * Packet injection may want to control the sequence + * number, if we have no matching interface then we + * neither assign one ourselves nor ask the driver to. + */ + if (unlikely(!info->control.vif)) + return TX_CONTINUE; + if (unlikely(ieee80211_is_ctl(hdr->frame_control))) return TX_CONTINUE; @@ -849,7 +856,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, sband = tx->local->hw.wiphy->bands[tx->channel->band]; skb->do_not_encrypt = 1; - info->flags |= IEEE80211_TX_CTL_INJECTED; tx->flags &= ~IEEE80211_TX_FRAGMENTED; /* @@ -981,7 +987,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* process and remove the injection radiotap header */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) { + if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) { if (__ieee80211_parse_tx_radiotap(tx, skb) == TX_DROP) return TX_DROP; @@ -1300,6 +1306,11 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct ieee80211_sub_if_data *osdata; int headroom; bool may_encrypt; + enum { + NOT_MONITOR, + FOUND_SDATA, + UNKNOWN_ADDRESS, + } monitor_iface = NOT_MONITOR; int ret; if (skb->iif) @@ -1335,6 +1346,50 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, fwded_frames); } + } else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) { + struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = osdata->local; + struct ieee80211_hdr *hdr; + int hdrlen; + u16 len_rthdr; + + info->flags |= IEEE80211_TX_CTL_INJECTED; + monitor_iface = UNKNOWN_ADDRESS; + + len_rthdr = ieee80211_get_radiotap_len(skb->data); + hdr = (struct ieee80211_hdr *)skb->data + len_rthdr; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + /* check the header is complete in the frame */ + if (likely(skb->len >= len_rthdr + hdrlen)) { + /* + * We process outgoing injected frames that have a + * local address we handle as though they are our + * own frames. + * This code here isn't entirely correct, the local + * MAC address is not necessarily enough to find + * the interface to use; for that proper VLAN/WDS + * support we will need a different mechanism. + */ + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, + list) { + if (!netif_running(sdata->dev)) + continue; + if (compare_ether_addr(sdata->dev->dev_addr, + hdr->addr2)) { + dev_hold(sdata->dev); + dev_put(odev); + osdata = sdata; + odev = osdata->dev; + skb->iif = sdata->dev->ifindex; + monitor_iface = FOUND_SDATA; + break; + } + } + rcu_read_unlock(); + } } may_encrypt = !skb->do_not_encrypt; @@ -1355,7 +1410,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, osdata = container_of(osdata->bss, struct ieee80211_sub_if_data, u.ap); - info->control.vif = &osdata->vif; + if (likely(monitor_iface != UNKNOWN_ADDRESS)) + info->control.vif = &osdata->vif; ret = ieee80211_tx(odev, skb); dev_put(odev); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6eb222369bc..f32561ec224 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -231,16 +231,21 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; u16 dur; int erp; + bool short_preamble = false; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, erp, - sdata->bss_conf.use_short_preamble); + short_preamble); return cpu_to_le16(dur); } @@ -252,7 +257,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; bool short_preamble; int erp; u16 dur; @@ -260,13 +265,17 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = false; rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } /* CTS duration */ dur = ieee80211_frame_duration(local, 10, rate->bitrate, @@ -289,7 +298,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; bool short_preamble; int erp; u16 dur; @@ -297,12 +306,16 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = false; rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } /* Data frame duration */ dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, -- cgit v1.2.3-70-g09d2 From 64edc2736e23994e0334b70c5ff08dc33e2ebbd9 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 20 Sep 2008 21:18:32 -0700 Subject: tcp: Partial hint clearing has again become meaningless MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ie., the difference between partial and all clearing doesn't exists anymore since the SACK optimizations got dropped by an sacktag rewrite. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 7 +------ net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_output.c | 4 ++-- 3 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8983386356a..b7167632695 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1035,7 +1035,7 @@ static inline void tcp_mib_init(struct net *net) } /* from STCP */ -static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) +static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; @@ -1043,11 +1043,6 @@ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) tp->forward_skb_hint = NULL; } -static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) -{ - tcp_clear_retrans_hints_partial(tp); -} - /* MD5 Signature */ struct crypto_hash; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f79a5160729..7306bfb16cd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1883,7 +1883,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - tcp_clear_retrans_hints_partial(tp); + tcp_clear_all_retrans_hints(tp); } static void tcp_clear_retrans_partial(struct tcp_sock *tp) @@ -1934,12 +1934,11 @@ void tcp_enter_loss(struct sock *sk, int how) /* Push undo marker, if it was plain RTO and nothing * was retransmitted. */ tp->undo_marker = tp->snd_una; - tcp_clear_retrans_hints_partial(tp); } else { tp->sacked_out = 0; tp->fackets_out = 0; - tcp_clear_all_retrans_hints(tp); } + tcp_clear_all_retrans_hints(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8165f5aa8c7..11490958a09 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -750,7 +750,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, BUG_ON(len > skb->len); - tcp_clear_retrans_hints_partial(tp); + tcp_clear_all_retrans_hints(tp); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -1823,7 +1823,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, tp->packets_out -= tcp_skb_pcount(next_skb); /* changed transmit queue under us so clear hints */ - tcp_clear_retrans_hints_partial(tp); + tcp_clear_all_retrans_hints(tp); sk_wmem_free_skb(sk, next_skb); } -- cgit v1.2.3-70-g09d2 From 006f582c73f4eda35e06fd323193c3df43fb3459 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 20 Sep 2008 21:20:20 -0700 Subject: tcp: convert retransmit_cnt_hint to seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main benefit in this is that we can then freely point the retransmit_skb_hint to anywhere we want to because there's no longer need to know what would be the count changes involve, and since this is really used only as a terminator, unnecessary work is one time walk at most, and if some retransmissions are necessary after that point later on, the walk is not full waste of time anyway. Since retransmit_high must be kept valid, all lost markers must ensure that. Now I also have learned how those "holes" in the rexmittable skbs can appear, mtu probe does them. So I removed the misleading comment as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 2 ++ net/ipv4/tcp_input.c | 34 ++++++++++++++++++++-------------- net/ipv4/tcp_output.c | 25 +++++++------------------ 4 files changed, 30 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2e2557388e3..d7637c4b284 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -358,7 +358,7 @@ struct tcp_sock { */ int lost_cnt_hint; - int retransmit_cnt_hint; + u32 retransmit_high; /* L-bits may be on up to this seqno */ u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b7167632695..d0e90c50722 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,6 +472,8 @@ extern void tcp_send_delayed_ack(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); +extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12512336dbd..d271cc82500 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -979,17 +979,17 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } } -/* RFC: This is from the original, I doubt that this is necessary at all: - * clear xmit_retrans hint if seq of this skb is beyond hint. How could we - * retransmitted past LOST markings in the first place? I'm not fully sure - * about undo and end of connection cases, which can cause R without L? - */ +/* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if ((tp->retransmit_skb_hint != NULL) && + if ((tp->retransmit_skb_hint == NULL) || before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; + tp->retransmit_skb_hint = skb; + + if (!tp->lost_out || + after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) @@ -1002,6 +1002,16 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } +void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +{ + tcp_verify_retransmit_hint(tp, skb); + + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -1178,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) @@ -1890,6 +1894,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -1974,6 +1979,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 11490958a09..cfae61b40c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1838,7 +1838,7 @@ void tcp_simple_retransmit(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); - int lost = 0; + u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1849,17 +1849,13 @@ void tcp_simple_retransmit(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - lost = 1; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); } } tcp_clear_all_retrans_hints(tp); - if (!lost) + if (prior_lost == tp->lost_out) return; if (tcp_is_reno(tp)) @@ -2009,15 +2005,11 @@ void tcp_xmit_retransmit_queue(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int packet_cnt; - if (tp->retransmit_skb_hint) { + if (tp->retransmit_skb_hint) skb = tp->retransmit_skb_hint; - packet_cnt = tp->retransmit_cnt_hint; - } else { + else skb = tcp_write_queue_head(sk); - packet_cnt = 0; - } /* First pass: retransmit lost packets. */ if (tp->lost_out) { @@ -2028,7 +2020,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; /* we could do better than to assign each time */ tp->retransmit_skb_hint = skb; - tp->retransmit_cnt_hint = packet_cnt; /* Assume this retransmit will generate * only one packet for congestion window @@ -2039,6 +2030,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return; + if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) + break; if (sacked & TCPCB_LOST) { if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { @@ -2059,10 +2052,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk) inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } - - packet_cnt += tcp_skb_pcount(skb); - if (packet_cnt >= tp->lost_out) - break; } } } -- cgit v1.2.3-70-g09d2 From 0e1c54c2a405494281e0639aacc90db03b50ae77 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 20 Sep 2008 21:24:21 -0700 Subject: tcp: reorganize retransmit code loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both loops are quite similar, so they can be combined with little effort. As a result, forward_skb_hint becomes obsolete as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - include/net/tcp.h | 1 - net/ipv4/tcp_output.c | 79 +++++++++++++++++++++------------------------------ 3 files changed, 33 insertions(+), 48 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d7637c4b284..76729062829 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -342,7 +342,6 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *scoreboard_skb_hint; struct sk_buff *retransmit_skb_hint; - struct sk_buff *forward_skb_hint; struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d0e90c50722..220f54cf42e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1042,7 +1042,6 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; tp->retransmit_skb_hint = NULL; - tp->forward_skb_hint = NULL; } /* MD5 Signature */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9f44be633ef..b5b4ddcdda4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2032,7 +2032,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + struct sk_buff *hole = NULL; int mib_idx; + int fwd_rexmitting = 0; if (!tp->lost_out) tp->retransmit_high = tp->snd_una; @@ -2049,7 +2051,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == tcp_send_head(sk)) break; /* we could do better than to assign each time */ - tp->retransmit_skb_hint = skb; + if (hole == NULL) + tp->retransmit_skb_hint = skb; /* Assume this retransmit will generate * only one packet for congestion window @@ -2060,65 +2063,49 @@ void tcp_xmit_retransmit_queue(struct sock *sk) */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return; - if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) - break; - if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) - continue; - - if (!(sacked & TCPCB_LOST)) - continue; - - if (tcp_retransmit_skb(sk, skb)) { - tp->retransmit_skb_hint = NULL; - return; - } - if (icsk->icsk_ca_state != TCP_CA_Loss) - mib_idx = LINUX_MIB_TCPFASTRETRANS; - else - mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; - NET_INC_STATS_BH(sock_net(sk), mib_idx); - - if (skb == tcp_write_queue_head(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX); - } - - /* OK, demanded retransmission is finished. */ - if (!tcp_can_forward_retransmit(sk)) - return; - if (tp->forward_skb_hint) - skb = tp->forward_skb_hint; - else - skb = tcp_write_queue_head(sk); + if (fwd_rexmitting) { +begin_fwd: + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + break; + mib_idx = LINUX_MIB_TCPFORWARDRETRANS; - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - tp->forward_skb_hint = skb; + } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) { + if (!tcp_can_forward_retransmit(sk)) + break; + /* Backtrack if necessary to non-L'ed skb */ + if (hole != NULL) { + skb = hole; + hole = NULL; + } + fwd_rexmitting = 1; + goto begin_fwd; - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - break; + } else if (!(sacked & TCPCB_LOST)) { + if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) + hole = skb; + continue; - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - break; + } else { + if (icsk->icsk_ca_state != TCP_CA_Loss) + mib_idx = LINUX_MIB_TCPFASTRETRANS; + else + mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; + } - if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) + if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - /* Ok, retransmit it. */ if (tcp_retransmit_skb(sk, skb)) { - tp->forward_skb_hint = NULL; - break; + tp->retransmit_skb_hint = NULL; + return; } + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); - - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS); } } -- cgit v1.2.3-70-g09d2 From ef9da47c7cc64d69526331f315e76b5680d4048f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 20 Sep 2008 21:25:15 -0700 Subject: tcp: don't clear retransmit_skb_hint when not necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most importantly avoid doing it with cumulative ACK. Not clearing means that we no longer need n^2 processing in resolution of each fast recovery. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 7 ++++++- net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_output.c | 8 +++++--- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 220f54cf42e..ea815723d41 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1037,10 +1037,15 @@ static inline void tcp_mib_init(struct net *net) } /* from STCP */ -static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) +static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; tp->scoreboard_skb_hint = NULL; +} + +static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) +{ + tcp_clear_retrans_hints_partial(tp); tp->retransmit_skb_hint = NULL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d017aed6edd..44a4fffc2cc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2925,7 +2925,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = NULL; } if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f900fae8b87..239cea7b6c0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -750,7 +750,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, BUG_ON(len > skb->len); - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -1823,7 +1823,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, tp->packets_out -= tcp_skb_pcount(next_skb); /* changed transmit queue under us so clear hints */ - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); + if (next_skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = skb; sk_wmem_free_skb(sk, next_skb); } @@ -1853,7 +1855,7 @@ void tcp_simple_retransmit(struct sock *sk) } } - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); if (prior_lost == tp->lost_out) return; -- cgit v1.2.3-70-g09d2 From 43f59c89399fd76883a06c551f24794e98409432 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 21:28:51 -0700 Subject: net: Remove __skb_insert() calls outside of skbuff internals. This minor cleanup simplifies later changes which will convert struct sk_buff and friends over to using struct list_head. Signed-off-by: David S. Miller --- drivers/net/cassini.c | 2 +- drivers/net/ppp_generic.c | 2 +- drivers/net/pppol2tp.c | 2 +- include/net/tcp.h | 4 ++-- net/ipv4/tcp_input.c | 4 ++-- net/sctp/ulpqueue.c | 5 ++--- 6 files changed, 9 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index f1936d51b45..40ff6a90d0d 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2182,7 +2182,7 @@ static inline void cas_rx_flow_pkt(struct cas *cp, const u64 *words, * do any additional locking here. stick the buffer * at the end. */ - __skb_insert(skb, flow->prev, (struct sk_buff *) flow, flow); + __skb_queue_tail(flow, skb); if (words[0] & RX_COMP1_RELEASE_FLOW) { while ((skb = __skb_dequeue(flow))) { cas_skb_release(skb); diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index ddccc074a76..98e04958fef 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1864,7 +1864,7 @@ ppp_mp_insert(struct ppp *ppp, struct sk_buff *skb) for (p = list->next; p != (struct sk_buff *)list; p = p->next) if (seq_before(seq, p->sequence)) break; - __skb_insert(skb, p->prev, p, list); + __skb_queue_before(list, p, skb); } /* diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index ff175e8f36b..185b1dff10a 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -353,7 +353,7 @@ static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_ spin_lock_bh(&session->reorder_q.lock); skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (PPPOL2TP_SKB_CB(skbp)->ns > ns) { - __skb_insert(skb, skbp->prev, skbp, &session->reorder_q); + __skb_queue_before(&session->reorder_q, skbp, skb); PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG, "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n", session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns, diff --git a/include/net/tcp.h b/include/net/tcp.h index ea815723d41..f857c3eff71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1268,12 +1268,12 @@ static inline void tcp_insert_write_queue_after(struct sk_buff *skb, __skb_queue_after(&sk->sk_write_queue, skb, buff); } -/* Insert skb between prev and next on the write queue of sk. */ +/* Insert new before skb on the write queue of sk. */ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sk_buff *skb, struct sock *sk) { - __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); + __skb_queue_before(&sk->sk_write_queue, skb, new); if (sk->sk_send_head == skb) sk->sk_send_head = new; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85627f83665..cbfe13d5f42 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4156,7 +4156,7 @@ drop: skb1 = skb1->prev; } } - __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ while ((skb1 = skb->next) != @@ -4254,7 +4254,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->head, skb->head, header); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, list); + __skb_queue_before(list, skb, nskb); skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 5061a26c502..7b23803343c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -317,7 +317,7 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, } /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->reasm); + __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } @@ -825,8 +825,7 @@ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->lobby); - + __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, -- cgit v1.2.3-70-g09d2 From 5c1824587f0797373c95719a196f6098f7c6d20c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Sep 2008 19:48:19 -0700 Subject: ipsec: Fix xfrm_state_walk race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discovered by Timo Teräs, the currently xfrm_state_walk scheme is racy because if a second dump finishes before the first, we may free xfrm states that the first dump would walk over later. This patch fixes this by storing the dumps in a list in order to calculate the correct completion counter which cures this problem. I've expanded netlink_cb in order to accomodate the extra state related to this. It shouldn't be a big deal since netlink_cb is kmalloced for each dump and we're just increasing it by 4 or 8 bytes. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/net/xfrm.h | 10 +++------- net/xfrm/xfrm_state.c | 39 ++++++++++++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f..cbba7760545 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[6]; + long args[7]; }; struct netlink_notify diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4bb94992b5f..48630b26659 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1246,6 +1246,8 @@ struct xfrm6_tunnel { }; struct xfrm_state_walk { + struct list_head list; + unsigned long genid; struct xfrm_state *state; int count; u8 proto; @@ -1281,13 +1283,7 @@ static inline void xfrm6_fini(void) extern int xfrm_proc_init(void); #endif -static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) -{ - walk->proto = proto; - walk->state = NULL; - walk->count = 0; -} - +extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index abbe2702c40..053970e8765 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -64,6 +64,9 @@ static unsigned long xfrm_state_walk_ongoing; /* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ static unsigned long xfrm_state_walk_completed; +/* List of outstanding state walks used to set the completed counter. */ +static LIST_HEAD(xfrm_state_walks); + static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -1584,7 +1587,6 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, if (err) { xfrm_state_hold(last); walk->state = last; - xfrm_state_walk_ongoing++; goto out; } } @@ -1599,25 +1601,44 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, err = func(last, 0, data); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) { + if (old != NULL) xfrm_state_put(old); - xfrm_state_walk_completed++; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); - } return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + walk->proto = proto; + walk->state = NULL; + walk->count = 0; + list_add_tail(&walk->list, &xfrm_state_walks); + walk->genid = ++xfrm_state_walk_ongoing; +} +EXPORT_SYMBOL(xfrm_state_walk_init); + void xfrm_state_walk_done(struct xfrm_state_walk *walk) { + struct list_head *prev; + if (walk->state != NULL) { xfrm_state_put(walk->state); walk->state = NULL; - xfrm_state_walk_completed++; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); } + + prev = walk->list.prev; + list_del(&walk->list); + + if (prev != &xfrm_state_walks) { + list_entry(prev, struct xfrm_state_walk, list)->genid = + walk->genid; + return; + } + + xfrm_state_walk_completed = walk->genid; + + if (!list_empty(&xfrm_state_gc_leftovers)) + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(xfrm_state_walk_done); -- cgit v1.2.3-70-g09d2 From 4b07b3f69a8471cdc142c51461a331226fef248a Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:02:10 -0700 Subject: Phonet: PF_PHONET protocol family support This is the basis for the Phonet protocol families, and introduces the ETH_P_PHONET packet type and the PF_PHONET socket family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 74 +++++++++++++++ net/phonet/af_phonet.c | 216 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 290 insertions(+) create mode 100644 include/net/phonet/phonet.h create mode 100644 net/phonet/af_phonet.c (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h new file mode 100644 index 00000000000..c53f2abc059 --- /dev/null +++ b/include/net/phonet/phonet.h @@ -0,0 +1,74 @@ +/* + * File: af_phonet.h + * + * Phonet sockets kernel definitions + * + * Copyright (C) 2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef AF_PHONET_H +#define AF_PHONET_H + +/* + * The lower layers may not require more space, ever. Make sure it's + * enough. + */ +#define MAX_PHONET_HEADER 8 + +static inline struct phonethdr *pn_hdr(struct sk_buff *skb) +{ + return (struct phonethdr *)skb_network_header(skb); +} + +/* + * Get the other party's sockaddr from received skb. The skb begins + * with a Phonet header. + */ +static inline +void pn_skb_get_src_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) +{ + struct phonethdr *ph = pn_hdr(skb); + u16 obj = pn_object(ph->pn_sdev, ph->pn_sobj); + + sa->spn_family = AF_PHONET; + pn_sockaddr_set_object(sa, obj); + pn_sockaddr_set_resource(sa, ph->pn_res); + memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); +} + +static inline +void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) +{ + struct phonethdr *ph = pn_hdr(skb); + u16 obj = pn_object(ph->pn_rdev, ph->pn_robj); + + sa->spn_family = AF_PHONET; + pn_sockaddr_set_object(sa, obj); + pn_sockaddr_set_resource(sa, ph->pn_res); + memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); +} + +/* Protocols in Phonet protocol family. */ +struct phonet_protocol { + struct proto *prot; + int sock_type; +}; + +int phonet_proto_register(int protocol, struct phonet_protocol *pp); +void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); + +#endif diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c new file mode 100644 index 00000000000..0cfea9bc994 --- /dev/null +++ b/net/phonet/af_phonet.c @@ -0,0 +1,216 @@ +/* + * File: af_phonet.c + * + * Phonet protocols family + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#include +#include +#include + +static struct net_proto_family phonet_proto_family; +static struct phonet_protocol *phonet_proto_get(int protocol); +static inline void phonet_proto_put(struct phonet_protocol *pp); + +/* protocol family functions */ + +static int pn_socket_create(struct net *net, struct socket *sock, int protocol) +{ + struct phonet_protocol *pnp; + int err; + + if (net != &init_net) + return -EAFNOSUPPORT; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (protocol == 0) { + /* Default protocol selection */ + switch (sock->type) { + case SOCK_DGRAM: + protocol = PN_PROTO_PHONET; + break; + default: + return -EPROTONOSUPPORT; + } + } + + pnp = phonet_proto_get(protocol); + if (pnp == NULL) + return -EPROTONOSUPPORT; + if (sock->type != pnp->sock_type) { + err = -EPROTONOSUPPORT; + goto out; + } + + /* TODO: create and init the struct sock */ + err = -EPROTONOSUPPORT; + +out: + phonet_proto_put(pnp); + return err; +} + +static struct net_proto_family phonet_proto_family = { + .family = AF_PHONET, + .create = pn_socket_create, + .owner = THIS_MODULE, +}; + +/* packet type functions */ + +/* + * Stuff received packets to associated sockets. + * On error, returns non-zero and releases the skb. + */ +static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkttype, + struct net_device *orig_dev) +{ + struct phonethdr *ph; + struct sockaddr_pn sa; + u16 len; + + if (dev_net(dev) != &init_net) + goto out; + + /* check we have at least a full Phonet header */ + if (!pskb_pull(skb, sizeof(struct phonethdr))) + goto out; + + /* check that the advertised length is correct */ + ph = pn_hdr(skb); + len = get_unaligned_be16(&ph->pn_length); + if (len < 2) + goto out; + len -= 2; + if ((len > skb->len) || pskb_trim(skb, len)) + goto out; + skb_reset_transport_header(skb); + + pn_skb_get_dst_sockaddr(skb, &sa); + if (pn_sockaddr_get_addr(&sa) == 0) + goto out; /* currently, we cannot be device 0 */ + + /* TODO: put packets to sockets backlog */ + +out: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type phonet_packet_type = { + .type = __constant_htons(ETH_P_PHONET), + .dev = NULL, + .func = phonet_rcv, +}; + +/* Transport protocol registration */ +static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static DEFINE_SPINLOCK(proto_tab_lock); + +int __init_or_module phonet_proto_register(int protocol, + struct phonet_protocol *pp) +{ + int err = 0; + + if (protocol >= PHONET_NPROTO) + return -EINVAL; + + err = proto_register(pp->prot, 1); + if (err) + return err; + + spin_lock(&proto_tab_lock); + if (proto_tab[protocol]) + err = -EBUSY; + else + proto_tab[protocol] = pp; + spin_unlock(&proto_tab_lock); + + return err; +} +EXPORT_SYMBOL(phonet_proto_register); + +void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) +{ + spin_lock(&proto_tab_lock); + BUG_ON(proto_tab[protocol] != pp); + proto_tab[protocol] = NULL; + spin_unlock(&proto_tab_lock); + proto_unregister(pp->prot); +} +EXPORT_SYMBOL(phonet_proto_unregister); + +static struct phonet_protocol *phonet_proto_get(int protocol) +{ + struct phonet_protocol *pp; + + if (protocol >= PHONET_NPROTO) + return NULL; + + spin_lock(&proto_tab_lock); + pp = proto_tab[protocol]; + if (pp && !try_module_get(pp->prot->owner)) + pp = NULL; + spin_unlock(&proto_tab_lock); + + return pp; +} + +static inline void phonet_proto_put(struct phonet_protocol *pp) +{ + module_put(pp->prot->owner); +} + +/* Module registration */ +static int __init phonet_init(void) +{ + int err; + + err = sock_register(&phonet_proto_family); + if (err) { + printk(KERN_ALERT + "phonet protocol family initialization failed\n"); + return err; + } + + dev_add_pack(&phonet_packet_type); + return 0; +} + +static void __exit phonet_exit(void) +{ + sock_unregister(AF_PHONET); + dev_remove_pack(&phonet_packet_type); +} + +module_init(phonet_init); +module_exit(phonet_exit); +MODULE_DESCRIPTION("Phonet protocol stack for Linux"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From f8ff60283de2b6775d7a14619056a08e3083bd40 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:03:44 -0700 Subject: Phonet: network device and address handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides support for adding Phonet addresses to and removing Phonet addresses from network devices. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 50 +++++++++++ net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 3 + net/phonet/pn_dev.c | 208 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 262 insertions(+) create mode 100644 include/net/phonet/pn_dev.h create mode 100644 net/phonet/pn_dev.c (limited to 'include/net') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h new file mode 100644 index 00000000000..bbd2a836e04 --- /dev/null +++ b/include/net/phonet/pn_dev.h @@ -0,0 +1,50 @@ +/* + * File: pn_dev.h + * + * Phonet network device + * + * Copyright (C) 2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef PN_DEV_H +#define PN_DEV_H + +struct phonet_device_list { + struct list_head list; + spinlock_t lock; +}; + +extern struct phonet_device_list pndevs; + +struct phonet_device { + struct list_head list; + struct net_device *netdev; + DECLARE_BITMAP(addrs, 64); +}; + +void phonet_device_init(void); +void phonet_device_exit(void); +struct net_device *phonet_device_get(struct net *net); + +int phonet_address_add(struct net_device *dev, u8 addr); +int phonet_address_del(struct net_device *dev, u8 addr); +u8 phonet_address_get(struct net_device *dev, u8 addr); +int phonet_address_lookup(u8 addr); + +#define PN_NO_ADDR 0xff + +#endif diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 5dbff68a6f3..980a3866c9a 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_PHONET) += phonet.o phonet-objs := \ + pn_dev.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 0cfea9bc994..a8ba6f177b2 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -31,6 +31,7 @@ #include #include #include +#include static struct net_proto_family phonet_proto_family; static struct phonet_protocol *phonet_proto_get(int protocol); @@ -200,6 +201,7 @@ static int __init phonet_init(void) return err; } + phonet_device_init(); dev_add_pack(&phonet_packet_type); return 0; } @@ -208,6 +210,7 @@ static void __exit phonet_exit(void) { sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); } module_init(phonet_init); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c new file mode 100644 index 00000000000..53be9fc82aa --- /dev/null +++ b/net/phonet/pn_dev.c @@ -0,0 +1,208 @@ +/* + * File: pn_dev.c + * + * Phonet network device + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include + +/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ +struct phonet_device_list pndevs = { + .list = LIST_HEAD_INIT(pndevs.list), + .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock), +}; + +/* Allocate new Phonet device. */ +static struct phonet_device *__phonet_device_alloc(struct net_device *dev) +{ + struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); + if (pnd == NULL) + return NULL; + pnd->netdev = dev; + bitmap_zero(pnd->addrs, 64); + + list_add(&pnd->list, &pndevs.list); + return pnd; +} + +static struct phonet_device *__phonet_get(struct net_device *dev) +{ + struct phonet_device *pnd; + + list_for_each_entry(pnd, &pndevs.list, list) { + if (pnd->netdev == dev) + return pnd; + } + return NULL; +} + +static void __phonet_device_free(struct phonet_device *pnd) +{ + list_del(&pnd->list); + kfree(pnd); +} + +struct net_device *phonet_device_get(struct net *net) +{ + struct phonet_device *pnd; + struct net_device *dev; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + dev = pnd->netdev; + BUG_ON(!dev); + + if (dev_net(dev) == net && + (dev->reg_state == NETREG_REGISTERED) && + ((pnd->netdev->flags & IFF_UP)) == IFF_UP) + break; + dev = NULL; + } + if (dev) + dev_hold(dev); + spin_unlock_bh(&pndevs.lock); + return dev; +} + +int phonet_address_add(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + /* Find or create Phonet-specific device data */ + pnd = __phonet_get(dev); + if (pnd == NULL) + pnd = __phonet_device_alloc(dev); + if (unlikely(pnd == NULL)) + err = -ENOMEM; + else if (test_and_set_bit(addr >> 2, pnd->addrs)) + err = -EEXIST; + spin_unlock_bh(&pndevs.lock); + return err; +} + +int phonet_address_del(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) + err = -EADDRNOTAVAIL; + if (bitmap_empty(pnd->addrs, 64)) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + return err; +} + +/* Gets a source address toward a destination, through a interface. */ +u8 phonet_address_get(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) { + BUG_ON(bitmap_empty(pnd->addrs, 64)); + + /* Use same source address as destination, if possible */ + if (!test_bit(addr >> 2, pnd->addrs)) + addr = find_first_bit(pnd->addrs, 64) << 2; + } else + addr = PN_NO_ADDR; + spin_unlock_bh(&pndevs.lock); + return addr; +} + +int phonet_address_lookup(u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + /* Don't allow unregistering devices! */ + if ((pnd->netdev->reg_state != NETREG_REGISTERED) || + ((pnd->netdev->flags & IFF_UP)) != IFF_UP) + continue; + + if (test_bit(addr >> 2, pnd->addrs)) { + spin_unlock_bh(&pndevs.lock); + return 0; + } + } + spin_unlock_bh(&pndevs.lock); + return -EADDRNOTAVAIL; +} + +/* notify Phonet of device events */ +static int phonet_device_notify(struct notifier_block *me, unsigned long what, + void *arg) +{ + struct net_device *dev = arg; + + if (what == NETDEV_UNREGISTER) { + struct phonet_device *pnd; + + /* Destroy phonet-specific device data */ + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + } + return 0; + +} + +static struct notifier_block phonet_device_notifier = { + .notifier_call = phonet_device_notify, + .priority = 0, +}; + +/* Initialize Phonet devices list */ +void phonet_device_init(void) +{ + register_netdevice_notifier(&phonet_device_notifier); +} + +void phonet_device_exit(void) +{ + struct phonet_device *pnd, *n; + + rtnl_unregister_all(PF_PHONET); + rtnl_lock(); + spin_lock_bh(&pndevs.lock); + + list_for_each_entry_safe(pnd, n, &pndevs.list, list) + __phonet_device_free(pnd); + + spin_unlock_bh(&pndevs.lock); + rtnl_unlock(); + unregister_netdevice_notifier(&phonet_device_notifier); +} -- cgit v1.2.3-70-g09d2 From 8fb397406f6470f79040c41eec49af20900a9e3b Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:04:30 -0700 Subject: Phonet: Netlink interface This provides support for configuring Phonet addresses, notifying Phonet configuration changes, and dumping the configuration. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 1 + net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 1 + net/phonet/pn_netlink.c | 186 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 189 insertions(+) create mode 100644 net/phonet/pn_netlink.c (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index c53f2abc059..8b777943d20 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -71,4 +71,5 @@ struct phonet_protocol { int phonet_proto_register(int protocol, struct phonet_protocol *pp); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); +void phonet_netlink_register(void); #endif diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 980a3866c9a..4143c3e1dfd 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_PHONET) += phonet.o phonet-objs := \ pn_dev.o \ + pn_netlink.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index a8ba6f177b2..5c729ba5693 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -203,6 +203,7 @@ static int __init phonet_init(void) phonet_device_init(); dev_add_pack(&phonet_packet_type); + phonet_netlink_register(); return 0; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c new file mode 100644 index 00000000000..b1ea19a230d --- /dev/null +++ b/net/phonet/pn_netlink.c @@ -0,0 +1,186 @@ +/* + * File: pn_netlink.c + * + * Phonet netlink interface + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event); + +static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1), GFP_KERNEL); + if (skb == NULL) + goto errout; + err = fill_addr(skb, dev, addr, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); +} + +static int newaddr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) +{ + struct rtattr **rta = attr; + struct ifaddrmsg *ifm = NLMSG_DATA(nlm); + struct net_device *dev; + int err; + u8 pnaddr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + if (rta[IFA_LOCAL - 1] == NULL) + return -EINVAL; + + dev = __dev_get_by_index(&init_net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + if (ifm->ifa_prefixlen > 0) + return -EINVAL; + + memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1); + + err = phonet_address_add(dev, pnaddr); + if (!err) + rtmsg_notify(RTM_NEWADDR, dev, pnaddr); + return err; +} + +static int deladdr_doit(struct sk_buff *skb, struct nlmsghdr *nlm, void *attr) +{ + struct rtattr **rta = attr; + struct ifaddrmsg *ifm = NLMSG_DATA(nlm); + struct net_device *dev; + int err; + u8 pnaddr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + if (rta[IFA_LOCAL - 1] == NULL) + return -EINVAL; + + dev = __dev_get_by_index(&init_net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + if (ifm->ifa_prefixlen > 0) + return -EADDRNOTAVAIL; + + memcpy(&pnaddr, RTA_DATA(rta[IFA_LOCAL - 1]), 1); + + err = phonet_address_del(dev, pnaddr); + if (!err) + rtmsg_notify(RTM_DELADDR, dev, pnaddr); + return err; +} + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + unsigned int orig_len = skb->len; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct ifaddrmsg)); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_PHONET; + ifm->ifa_prefixlen = 0; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_HOST; + ifm->ifa_index = dev->ifindex; + RTA_PUT(skb, IFA_LOCAL, 1, &addr); + nlh->nlmsg_len = skb->len - orig_len; + + return 0; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, orig_len); + + return -1; +} + +static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct phonet_device *pnd; + int dev_idx = 0, dev_start_idx = cb->args[0]; + int addr_idx = 0, addr_start_idx = cb->args[1]; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + u8 addr; + + if (dev_idx > dev_start_idx) + addr_start_idx = 0; + if (dev_idx++ < dev_start_idx) + continue; + + addr_idx = 0; + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) { + if (addr_idx++ < addr_start_idx) + continue; + + if (fill_addr(skb, pnd->netdev, addr << 2, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR)) + goto out; + } + } + +out: + spin_unlock_bh(&pndevs.lock); + cb->args[0] = dev_idx; + cb->args[1] = addr_idx; + + return skb->len; +} + +void __init phonet_netlink_register(void) +{ + rtnl_register(PF_PHONET, RTM_NEWADDR, newaddr_doit, NULL); + rtnl_register(PF_PHONET, RTM_DELADDR, deladdr_doit, NULL); + rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); +} -- cgit v1.2.3-70-g09d2 From ba113a94b7503ee23ffe819e7045134b0c1d31de Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:19 -0700 Subject: Phonet: common socket glue This provides the socket API for the Phonet protocols family. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 + include/net/phonet/phonet.h | 23 ++++ net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 28 +++- net/phonet/socket.c | 311 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 363 insertions(+), 3 deletions(-) create mode 100644 net/phonet/socket.c (limited to 'include/net') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 6a764f8584a..001c0e67909 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -32,6 +32,9 @@ #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* ioctls */ +#define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) + /* Phonet protocol header */ struct phonethdr { __u8 pn_rdev; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 8b777943d20..2ae5cbb59b6 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -29,6 +29,28 @@ */ #define MAX_PHONET_HEADER 8 +/* + * Every Phonet* socket has this structure first in its + * protocol-specific structure under name c. + */ +struct pn_sock { + struct sock sk; + u16 sobject; + u8 resource; +}; + +static inline struct pn_sock *pn_sk(struct sock *sk) +{ + return (struct pn_sock *)sk; +} + +extern const struct proto_ops phonet_dgram_ops; + +struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa); +void pn_sock_hash(struct sock *sk); +void pn_sock_unhash(struct sock *sk); +int pn_sock_get_port(struct sock *sk, unsigned short sport); + static inline struct phonethdr *pn_hdr(struct sk_buff *skb) { return (struct phonethdr *)skb_network_header(skb); @@ -64,6 +86,7 @@ void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) /* Protocols in Phonet protocol family. */ struct phonet_protocol { + const struct proto_ops *ops; struct proto *prot; int sock_type; }; diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 4143c3e1dfd..c1d671de783 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -3,4 +3,5 @@ obj-$(CONFIG_PHONET) += phonet.o phonet-objs := \ pn_dev.o \ pn_netlink.o \ + socket.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 5c729ba5693..ba54d53020f 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -41,6 +41,8 @@ static inline void phonet_proto_put(struct phonet_protocol *pp); static int pn_socket_create(struct net *net, struct socket *sock, int protocol) { + struct sock *sk; + struct pn_sock *pn; struct phonet_protocol *pnp; int err; @@ -69,8 +71,22 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) goto out; } - /* TODO: create and init the struct sock */ - err = -EPROTONOSUPPORT; + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + if (sk == NULL) { + err = -ENOMEM; + goto out; + } + + sock_init_data(sock, sk); + sock->state = SS_UNCONNECTED; + sock->ops = pnp->ops; + sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sk->sk_protocol = protocol; + pn = pn_sk(sk); + pn->sobject = 0; + pn->resource = 0; + sk->sk_prot->init(sk); + err = 0; out: phonet_proto_put(pnp); @@ -94,6 +110,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct net_device *orig_dev) { struct phonethdr *ph; + struct sock *sk; struct sockaddr_pn sa; u16 len; @@ -118,7 +135,12 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, if (pn_sockaddr_get_addr(&sa) == 0) goto out; /* currently, we cannot be device 0 */ - /* TODO: put packets to sockets backlog */ + sk = pn_find_sock_by_sa(&sa); + if (sk == NULL) + goto out; + + /* Push data to the socket (or other sockets connected to it). */ + return sk_receive_skb(sk, skb, 0); out: kfree_skb(skb); diff --git a/net/phonet/socket.c b/net/phonet/socket.c new file mode 100644 index 00000000000..99a4945d565 --- /dev/null +++ b/net/phonet/socket.c @@ -0,0 +1,311 @@ +/* + * File: socket.c + * + * Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#include +#include +#include + +static int pn_socket_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static struct { + struct hlist_head hlist; + spinlock_t lock; +} pnsocks = { + .hlist = HLIST_HEAD_INIT, + .lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock), +}; + +/* + * Find address based on socket address, match only certain fields. + * Also grab sock if it was found. Remember to sock_put it later. + */ +struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) +{ + struct hlist_node *node; + struct sock *sknode; + struct sock *rval = NULL; + u16 obj = pn_sockaddr_get_object(spn); + u8 res = spn->spn_resource; + + spin_lock_bh(&pnsocks.lock); + + sk_for_each(sknode, node, &pnsocks.hlist) { + struct pn_sock *pn = pn_sk(sknode); + BUG_ON(!pn->sobject); /* unbound socket */ + + if (pn_port(obj)) { + /* Look up socket by port */ + if (pn_port(pn->sobject) != pn_port(obj)) + continue; + } else { + /* If port is zero, look up by resource */ + if (pn->resource != res) + continue; + } + if (pn_addr(pn->sobject) + && pn_addr(pn->sobject) != pn_addr(obj)) + continue; + + rval = sknode; + sock_hold(sknode); + break; + } + + spin_unlock_bh(&pnsocks.lock); + + return rval; + +} + +void pn_sock_hash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_add_node(sk, &pnsocks.hlist); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_hash); + +void pn_sock_unhash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_del_node_init(sk); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_unhash); + +static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + int err; + u16 handle; + u8 saddr; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, addr, len); + + if (len < sizeof(struct sockaddr_pn)) + return -EINVAL; + if (spn->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr); + saddr = pn_addr(handle); + if (saddr && phonet_address_lookup(saddr)) + return -EADDRNOTAVAIL; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) { + err = -EINVAL; /* attempt to rebind */ + goto out; + } + err = sk->sk_prot->get_port(sk, pn_port(handle)); + if (err) + goto out; + + /* get_port() sets the port, bind() sets the address if applicable */ + pn->sobject = pn_object(saddr, pn_port(pn->sobject)); + pn->resource = spn->spn_resource; + + /* Enable RX on the socket */ + sk->sk_prot->hash(sk); +out: + release_sock(sk); + return err; +} + +static int pn_socket_autobind(struct socket *sock) +{ + struct sockaddr_pn sa; + int err; + + memset(&sa, 0, sizeof(sa)); + sa.spn_family = AF_PHONET; + err = pn_socket_bind(sock, (struct sockaddr *)&sa, + sizeof(struct sockaddr_pn)); + if (err != -EINVAL) + return err; + BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); + return 0; /* socket was already bound */ +} + +static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + memset(addr, 0, sizeof(struct sockaddr_pn)); + addr->sa_family = AF_PHONET; + if (!peer) /* Race with bind() here is userland's problem. */ + pn_sockaddr_set_object((struct sockaddr_pn *)addr, + pn->sobject); + + *sockaddr_len = sizeof(struct sockaddr_pn); + return 0; +} + +static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + if (cmd == SIOCPNGETOBJECT) { + struct net_device *dev; + u16 handle; + u8 saddr; + + if (get_user(handle, (__u16 __user *)arg)) + return -EFAULT; + + lock_sock(sk); + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), + sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (dev && (dev->flags & IFF_UP)) + saddr = phonet_address_get(dev, pn_addr(handle)); + else + saddr = PN_NO_ADDR; + release_sock(sk); + + if (dev) + dev_put(dev); + if (saddr == PN_NO_ADDR) + return -EHOSTUNREACH; + + handle = pn_object(saddr, pn_port(pn->sobject)); + return put_user(handle, (__u16 __user *)arg); + } + + return sk->sk_prot->ioctl(sk, cmd, arg); +} + +static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct sock *sk = sock->sk; + + if (pn_socket_autobind(sock)) + return -EAGAIN; + + return sk->sk_prot->sendmsg(iocb, sk, m, total_len); +} + +const struct proto_ops phonet_dgram_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pn_socket_getname, + .poll = datagram_poll, + .ioctl = pn_socket_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = sock_no_setsockopt, + .compat_getsockopt = sock_no_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static DEFINE_MUTEX(port_mutex); + +/* allocate port for a socket */ +int pn_sock_get_port(struct sock *sk, unsigned short sport) +{ + static int port_cur; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn try_sa; + struct sock *tmpsk; + + memset(&try_sa, 0, sizeof(struct sockaddr_pn)); + try_sa.spn_family = AF_PHONET; + + mutex_lock(&port_mutex); + + if (!sport) { + /* search free port */ + int port, pmin = 0x40, pmax = 0x7f; + + for (port = pmin; port <= pmax; port++) { + port_cur++; + if (port_cur < pmin || port_cur > pmax) + port_cur = pmin; + + pn_sockaddr_set_port(&try_sa, port_cur); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) { + sport = port_cur; + goto found; + } else + sock_put(tmpsk); + } + } else { + /* try to find specific port */ + pn_sockaddr_set_port(&try_sa, sport); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) + /* No sock there! We can use that port... */ + goto found; + else + sock_put(tmpsk); + } + mutex_unlock(&port_mutex); + + /* the port must be in use already */ + return -EADDRINUSE; + +found: + mutex_unlock(&port_mutex); + pn->sobject = pn_object(pn_addr(pn->sobject), sport); + return 0; +} +EXPORT_SYMBOL(pn_sock_get_port); -- cgit v1.2.3-70-g09d2 From 107d0d9b8d9a236883db72841fb61cedd5be845e Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:05:57 -0700 Subject: Phonet: Phonet datagram transport protocol This provides the basic SOCK_DGRAM transport protocol for Phonet. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 6 ++ net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 106 ++++++++++++++++++++++++ net/phonet/datagram.c | 197 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 310 insertions(+) create mode 100644 net/phonet/datagram.c (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 2ae5cbb59b6..d3957d3be0f 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -51,6 +51,9 @@ void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); +int pn_skb_send(struct sock *sk, struct sk_buff *skb, + const struct sockaddr_pn *target); + static inline struct phonethdr *pn_hdr(struct sk_buff *skb) { return (struct phonethdr *)skb_network_header(skb); @@ -95,4 +98,7 @@ int phonet_proto_register(int protocol, struct phonet_protocol *pp); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); void phonet_netlink_register(void); +int isi_register(void); +void isi_unregister(void); + #endif diff --git a/net/phonet/Makefile b/net/phonet/Makefile index c1d671de783..d218abc3f06 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -4,4 +4,5 @@ phonet-objs := \ pn_dev.o \ pn_netlink.o \ socket.o \ + datagram.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index ba54d53020f..e6771d3961c 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -99,6 +99,101 @@ static struct net_proto_family phonet_proto_family = { .owner = THIS_MODULE, }; +/* + * Prepends an ISI header and sends a datagram. + */ +static int pn_send(struct sk_buff *skb, struct net_device *dev, + u16 dst, u16 src, u8 res) +{ + struct phonethdr *ph; + int err; + + if (skb->len + 2 > 0xffff) { + /* Phonet length field would overflow */ + err = -EMSGSIZE; + goto drop; + } + + skb_reset_transport_header(skb); + WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */ + skb_push(skb, sizeof(struct phonethdr)); + skb_reset_network_header(skb); + ph = pn_hdr(skb); + ph->pn_rdev = pn_dev(dst); + ph->pn_sdev = pn_dev(src); + ph->pn_res = res; + ph->pn_length = __cpu_to_be16(skb->len + 2 - sizeof(*ph)); + ph->pn_robj = pn_obj(dst); + ph->pn_sobj = pn_obj(src); + + skb->protocol = htons(ETH_P_PHONET); + skb->priority = 0; + skb->dev = dev; + + if (pn_addr(src) == pn_addr(dst)) { + skb_reset_mac_header(skb); + skb->pkt_type = PACKET_LOOPBACK; + skb_orphan(skb); + netif_rx_ni(skb); + err = 0; + } else { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + NULL, NULL, skb->len); + if (err < 0) { + err = -EHOSTUNREACH; + goto drop; + } + err = dev_queue_xmit(skb); + } + + return err; +drop: + kfree_skb(skb); + return err; +} + +/* + * Create a Phonet header for the skb and send it out. Returns + * non-zero error code if failed. The skb is freed then. + */ +int pn_skb_send(struct sock *sk, struct sk_buff *skb, + const struct sockaddr_pn *target) +{ + struct net_device *dev; + struct pn_sock *pn = pn_sk(sk); + int err; + u16 src; + u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; + + err = -EHOSTUNREACH; + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (!dev || !(dev->flags & IFF_UP)) + goto drop; + + saddr = phonet_address_get(dev, daddr); + if (saddr == PN_NO_ADDR) + goto drop; + + src = pn->sobject; + if (!pn_addr(src)) + src = pn_object(saddr, pn_obj(src)); + + err = pn_send(skb, dev, pn_sockaddr_get_object(target), + src, pn_sockaddr_get_resource(target)); + dev_put(dev); + return err; + +drop: + kfree_skb(skb); + if (dev) + dev_put(dev); + return err; +} +EXPORT_SYMBOL(pn_skb_send); + /* packet type functions */ /* @@ -226,11 +321,22 @@ static int __init phonet_init(void) phonet_device_init(); dev_add_pack(&phonet_packet_type); phonet_netlink_register(); + + err = isi_register(); + if (err) + goto err; return 0; + +err: + sock_unregister(AF_PHONET); + dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); + return err; } static void __exit phonet_exit(void) { + isi_unregister(); sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c new file mode 100644 index 00000000000..e087862ed7e --- /dev/null +++ b/net/phonet/datagram.c @@ -0,0 +1,197 @@ +/* + * File: datagram.c + * + * Datagram (ISI) Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * Original author: Sakari Ailus + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#include +#include + +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); + +/* associated socket ceases to exist */ +static void pn_sock_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int answ; + + switch (cmd) { + case SIOCINQ: + lock_sock(sk); + skb = skb_peek(&sk->sk_receive_queue); + answ = skb ? skb->len : 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +/* Destroy socket. All references are gone. */ +static void pn_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pn_init(struct sock *sk) +{ + sk->sk_destruct = pn_destruct; + return 0; +} + +static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct sockaddr_pn *target; + struct sk_buff *skb; + int err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (msg->msg_name == NULL) + return -EDESTADDRREQ; + + if (msg->msg_namelen < sizeof(struct sockaddr_pn)) + return -EINVAL; + + target = (struct sockaddr_pn *)msg->msg_name; + if (target->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, + msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) + return err; + skb_reserve(skb, MAX_PHONET_HEADER); + + err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len); + if (err < 0) { + kfree_skb(skb); + return err; + } + + /* + * Fill in the Phonet header and + * finally pass the packet forwards. + */ + err = pn_skb_send(sk, skb, target); + + /* If ok, return len. */ + return (err >= 0) ? len : err; +} + +static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb = NULL; + struct sockaddr_pn sa; + int rval = -EOPNOTSUPP; + int copylen; + + if (flags & MSG_OOB) + goto out_nofree; + + if (addr_len) + *addr_len = sizeof(sa); + + skb = skb_recv_datagram(sk, flags, noblock, &rval); + if (skb == NULL) + goto out_nofree; + + pn_skb_get_src_sockaddr(skb, &sa); + + copylen = skb->len; + if (len < copylen) { + msg->msg_flags |= MSG_TRUNC; + copylen = len; + } + + rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen); + if (rval) { + rval = -EFAULT; + goto out; + } + + rval = (flags & MSG_TRUNC) ? skb->len : copylen; + + if (msg->msg_name != NULL) + memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); + +out: + skb_free_datagram(sk, skb); + +out_nofree: + return rval; +} + +/* Queue an skb for a sock. */ +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + return err ? NET_RX_DROP : NET_RX_SUCCESS; +} + +/* Module registration */ +static struct proto pn_proto = { + .close = pn_sock_close, + .ioctl = pn_ioctl, + .init = pn_init, + .sendmsg = pn_sendmsg, + .recvmsg = pn_recvmsg, + .backlog_rcv = pn_backlog_rcv, + .hash = pn_sock_hash, + .unhash = pn_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pn_sock), + .owner = THIS_MODULE, + .name = "PHONET", +}; + +static struct phonet_protocol pn_dgram_proto = { + .ops = &phonet_dgram_ops, + .prot = &pn_proto, + .sock_type = SOCK_DGRAM, +}; + +int __init isi_register(void) +{ + return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); +} + +void __exit isi_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); +} -- cgit v1.2.3-70-g09d2 From 87ab4e20b445c6d2d2727ab4f96fa17f7259511e Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:08:39 -0700 Subject: Phonet: proc interface for port range Phonet endpoints are bound to individual ports. This provides a /proc/sys/net/phonet (or sysctl) interface for selecting the range of automatically allocated ports (much like the ip_local_port_range with IPv4). Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 3 ++ net/phonet/Makefile | 1 + net/phonet/af_phonet.c | 3 ++ net/phonet/socket.c | 3 +- net/phonet/sysctl.c | 113 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 net/phonet/sysctl.c (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index d3957d3be0f..1c6f7e7d5fe 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -47,6 +47,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk) extern const struct proto_ops phonet_dgram_ops; struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa); +void phonet_get_local_port_range(int *min, int *max); void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); @@ -97,6 +98,8 @@ struct phonet_protocol { int phonet_proto_register(int protocol, struct phonet_protocol *pp); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp); +int phonet_sysctl_init(void); +void phonet_sysctl_exit(void); void phonet_netlink_register(void); int isi_register(void); void isi_unregister(void); diff --git a/net/phonet/Makefile b/net/phonet/Makefile index d218abc3f06..ae9c3ed5be8 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -5,4 +5,5 @@ phonet-objs := \ pn_netlink.o \ socket.o \ datagram.o \ + sysctl.o \ af_phonet.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 51397ff308b..50dc258d5aa 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -350,6 +350,7 @@ static int __init phonet_init(void) phonet_device_init(); dev_add_pack(&phonet_packet_type); phonet_netlink_register(); + phonet_sysctl_init(); err = isi_register(); if (err) @@ -357,6 +358,7 @@ static int __init phonet_init(void) return 0; err: + phonet_sysctl_exit(); sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); @@ -366,6 +368,7 @@ err: static void __exit phonet_exit(void) { isi_unregister(); + phonet_sysctl_exit(); sock_unregister(AF_PHONET); dev_remove_pack(&phonet_packet_type); phonet_device_exit(); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 99a4945d565..dfd4061646d 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -273,8 +273,9 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) if (!sport) { /* search free port */ - int port, pmin = 0x40, pmax = 0x7f; + int port, pmin, pmax; + phonet_get_local_port_range(&pmin, &pmax); for (port = pmin; port <= pmax; port++) { port_cur++; if (port_cur < pmin || port_cur > pmax) diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c new file mode 100644 index 00000000000..600a4309b8c --- /dev/null +++ b/net/phonet/sysctl.c @@ -0,0 +1,113 @@ +/* + * File: sysctl.c + * + * Phonet /proc/sys/net/phonet interface implementation + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include + +#define DYNAMIC_PORT_MIN 0x40 +#define DYNAMIC_PORT_MAX 0x7f + +static DEFINE_SEQLOCK(local_port_range_lock); +static int local_port_range_min[2] = {0, 0}; +static int local_port_range_max[2] = {1023, 1023}; +static int local_port_range[2] = {DYNAMIC_PORT_MIN, DYNAMIC_PORT_MAX}; +static struct ctl_table_header *phonet_table_hrd; + +static void set_local_port_range(int range[2]) +{ + write_seqlock(&local_port_range_lock); + local_port_range[0] = range[0]; + local_port_range[1] = range[1]; + write_sequnlock(&local_port_range_lock); +} + +void phonet_get_local_port_range(int *min, int *max) +{ + unsigned seq; + do { + seq = read_seqbegin(&local_port_range_lock); + if (min) + *min = local_port_range[0]; + if (max) + *max = local_port_range[1]; + } while (read_seqretry(&local_port_range_lock, seq)); +} + +static int proc_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = {local_port_range[0], local_port_range[1]}; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &local_port_range_min, + .extra2 = &local_port_range_max, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] < range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +static struct ctl_table phonet_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "local_port_range", + .data = &local_port_range, + .maxlen = sizeof(local_port_range), + .mode = 0644, + .proc_handler = &proc_local_port_range, + .strategy = NULL, + }, + { .ctl_name = 0 } +}; + +struct ctl_path phonet_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, + { }, +}; + +int __init phonet_sysctl_init(void) +{ + phonet_table_hrd = register_sysctl_paths(phonet_ctl_path, phonet_table); + return phonet_table_hrd == NULL ? -ENOMEM : 0; +} + +void phonet_sysctl_exit(void) +{ + unregister_sysctl_table(phonet_table_hrd); +} -- cgit v1.2.3-70-g09d2 From be0c52bfed7f7828494fa00060efd5d758e92580 Mon Sep 17 00:00:00 2001 From: Remi Denis-Courmont Date: Mon, 22 Sep 2008 20:09:13 -0700 Subject: Phonet: emit errors when a packet cannot be delivered locally When there is no listener socket for a received packet, send an error back to the sender. Signed-off-by: Remi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 32 +++++++++++++++ include/net/phonet/phonet.h | 5 +++ net/phonet/af_phonet.c | 96 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 129 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 001c0e67909..3a027f588a4 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -45,6 +45,38 @@ struct phonethdr { __u8 pn_sobj; } __attribute__((packed)); +/* Common Phonet payload header */ +struct phonetmsg { + __u8 pn_trans_id; /* transaction ID */ + __u8 pn_msg_id; /* message type */ + union { + struct { + __u8 pn_submsg_id; /* message subtype */ + __u8 pn_data[5]; + } base; + struct { + __u16 pn_e_res_id; /* extended resource ID */ + __u8 pn_e_submsg_id; /* message subtype */ + __u8 pn_e_data[3]; + } ext; + } pn_msg_u; +}; +#define PN_COMMON_MESSAGE 0xF0 +#define PN_PREFIX 0xE0 /* resource for extended messages */ +#define pn_submsg_id pn_msg_u.base.pn_submsg_id +#define pn_e_submsg_id pn_msg_u.ext.pn_e_submsg_id +#define pn_e_res_id pn_msg_u.ext.pn_e_res_id +#define pn_data pn_msg_u.base.pn_data +#define pn_e_data pn_msg_u.ext.pn_e_data + +/* data for unreachable errors */ +#define PN_COMM_SERVICE_NOT_IDENTIFIED_RESP 0x01 +#define PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP 0x14 +#define pn_orig_msg_id pn_data[0] +#define pn_status pn_data[1] +#define pn_e_orig_msg_id pn_e_data[0] +#define pn_e_status pn_e_data[1] + /* Phonet socket address structure */ struct sockaddr_pn { sa_family_t spn_family; diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 1c6f7e7d5fe..d4e72508e14 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -60,6 +60,11 @@ static inline struct phonethdr *pn_hdr(struct sk_buff *skb) return (struct phonethdr *)skb_network_header(skb); } +static inline struct phonetmsg *pn_msg(struct sk_buff *skb) +{ + return (struct phonetmsg *)skb_transport_header(skb); +} + /* * Get the other party's sockaddr from received skb. The skb begins * with a Phonet header. diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 50dc258d5aa..1d8df6b7e3d 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -132,7 +132,7 @@ EXPORT_SYMBOL(phonet_header_ops); * Prepends an ISI header and sends a datagram. */ static int pn_send(struct sk_buff *skb, struct net_device *dev, - u16 dst, u16 src, u8 res) + u16 dst, u16 src, u8 res, u8 irq) { struct phonethdr *ph; int err; @@ -163,7 +163,10 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, skb_reset_mac_header(skb); skb->pkt_type = PACKET_LOOPBACK; skb_orphan(skb); - netif_rx_ni(skb); + if (irq) + netif_rx(skb); + else + netif_rx_ni(skb); err = 0; } else { err = dev_hard_header(skb, dev, ntohs(skb->protocol), @@ -181,6 +184,19 @@ drop: return err; } +static int pn_raw_send(const void *data, int len, struct net_device *dev, + u16 dst, u16 src, u8 res) +{ + struct sk_buff *skb = alloc_skb(MAX_PHONET_HEADER + len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, MAX_PHONET_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + return pn_send(skb, dev, dst, src, res, 1); +} + /* * Create a Phonet header for the skb and send it out. Returns * non-zero error code if failed. The skb is freed then. @@ -211,7 +227,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, src = pn_object(saddr, pn_obj(src)); err = pn_send(skb, dev, pn_sockaddr_get_object(target), - src, pn_sockaddr_get_resource(target)); + src, pn_sockaddr_get_resource(target), 0); dev_put(dev); return err; @@ -223,6 +239,73 @@ drop: } EXPORT_SYMBOL(pn_skb_send); +/* Do not send an error message in response to an error message */ +static inline int can_respond(struct sk_buff *skb) +{ + const struct phonethdr *ph; + const struct phonetmsg *pm; + u8 submsg_id; + + if (!pskb_may_pull(skb, 3)) + return 0; + + ph = pn_hdr(skb); + if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev) + return 0; /* we are not the destination */ + if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) + return 0; + + ph = pn_hdr(skb); /* re-acquires the pointer */ + pm = pn_msg(skb); + if (pm->pn_msg_id != PN_COMMON_MESSAGE) + return 1; + submsg_id = (ph->pn_res == PN_PREFIX) + ? pm->pn_e_submsg_id : pm->pn_submsg_id; + if (submsg_id != PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP && + pm->pn_e_submsg_id != PN_COMM_SERVICE_NOT_IDENTIFIED_RESP) + return 1; + return 0; +} + +static int send_obj_unreachable(struct sk_buff *rskb) +{ + const struct phonethdr *oph = pn_hdr(rskb); + const struct phonetmsg *opm = pn_msg(rskb); + struct phonetmsg resp; + + memset(&resp, 0, sizeof(resp)); + resp.pn_trans_id = opm->pn_trans_id; + resp.pn_msg_id = PN_COMMON_MESSAGE; + if (oph->pn_res == PN_PREFIX) { + resp.pn_e_res_id = opm->pn_e_res_id; + resp.pn_e_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_e_orig_msg_id = opm->pn_msg_id; + resp.pn_e_status = 0; + } else { + resp.pn_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_orig_msg_id = opm->pn_msg_id; + resp.pn_status = 0; + } + return pn_raw_send(&resp, sizeof(resp), rskb->dev, + pn_object(oph->pn_sdev, oph->pn_sobj), + pn_object(oph->pn_rdev, oph->pn_robj), + oph->pn_res); +} + +static int send_reset_indications(struct sk_buff *rskb) +{ + struct phonethdr *oph = pn_hdr(rskb); + static const u8 data[4] = { + 0x00 /* trans ID */, 0x10 /* subscribe msg */, + 0x00 /* subscription count */, 0x00 /* dummy */ + }; + + return pn_raw_send(data, sizeof(data), rskb->dev, + pn_object(oph->pn_sdev, 0x00), + pn_object(oph->pn_rdev, oph->pn_robj), 0x10); +} + + /* packet type functions */ /* @@ -260,8 +343,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, goto out; /* currently, we cannot be device 0 */ sk = pn_find_sock_by_sa(&sa); - if (sk == NULL) + if (sk == NULL) { + if (can_respond(skb)) { + send_obj_unreachable(skb); + send_reset_indications(skb); + } goto out; + } /* Push data to the socket (or other sockets connected to it). */ return sk_receive_skb(sk, skb, 0); -- cgit v1.2.3-70-g09d2 From 3d09274cc9d816d62945408840a9cb76a5e7aac7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:14:36 -0700 Subject: sctp: Use skb_queue_walk_safe() and skb_queue_split_tail_init(). Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 17b932b8a55..703305d0036 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -406,10 +406,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id); /* A macro to walk a list of skbs. */ #define sctp_skb_for_each(pos, head, tmp) \ -for (pos = (head)->next;\ - tmp = (pos)->next, pos != ((struct sk_buff *)(head));\ - pos = tmp) - + skb_queue_walk_safe(head, pos, tmp) /* A helper to append an entire skb list (list) to another (head). */ static inline void sctp_skb_list_tail(struct sk_buff_head *list, @@ -420,10 +417,7 @@ static inline void sctp_skb_list_tail(struct sk_buff_head *list, sctp_spin_lock_irqsave(&head->lock, flags); sctp_spin_lock(&list->lock); - list_splice((struct list_head *)list, (struct list_head *)head->prev); - - head->qlen += list->qlen; - list->qlen = 0; + skb_queue_splice_tail_init(list, head); sctp_spin_unlock(&list->lock); sctp_spin_unlock_irqrestore(&head->lock, flags); -- cgit v1.2.3-70-g09d2 From 242f8bfefe4bed626df4e4727ac8f315d80b567a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 22 Sep 2008 22:15:30 -0700 Subject: pkt_sched: Make qdisc->gso_skb a list. The idea is that we can use this to get rid of ->requeue(). Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- net/sched/sch_generic.c | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e5569625d2a..3b983e8a055 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,7 +52,7 @@ struct Qdisc u32 parent; atomic_t refcnt; unsigned long state; - struct sk_buff *gso_skb; + struct sk_buff_head requeue; struct sk_buff_head q; struct netdev_queue *dev_queue; struct Qdisc *next_sched; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ec0a0839ce5..5961536be60 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -45,7 +45,7 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) - q->gso_skb = skb; + __skb_queue_head(&q->requeue, skb); else q->ops->requeue(skb, q); @@ -57,9 +57,8 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb; - if ((skb = q->gso_skb)) - q->gso_skb = NULL; - else + skb = __skb_dequeue(&q->requeue); + if (!skb) skb = q->dequeue(q); return skb; @@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -352,6 +352,7 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -472,6 +473,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); + skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -539,7 +541,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb(qdisc->gso_skb); + __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); } -- cgit v1.2.3-70-g09d2 From d258b4914bcda9177bcc7bbd8e1a97b281b460af Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:34:37 -0700 Subject: tcp: Use skb_queue_is_last() instead of by-hand version. Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index f857c3eff71..5c5327e9a55 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1284,10 +1284,10 @@ static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) __skb_unlink(skb, &sk->sk_write_queue); } -static inline int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) +static inline bool tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) { - return skb->next == (struct sk_buff *)&sk->sk_write_queue; + return skb_queue_is_last(&sk->sk_write_queue, skb); } static inline int tcp_write_queue_empty(struct sock *sk) -- cgit v1.2.3-70-g09d2 From cd07a8ea0dd4b204919b4c9ced8d9efdd9924495 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Sep 2008 00:50:13 -0700 Subject: tcp: Use SKB queue handling interfaces instead of by-hand versions. Signed-off-by: David S. Miller --- include/net/tcp.h | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5c5327e9a55..12c9b4fec04 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1181,49 +1181,45 @@ static inline void tcp_write_queue_purge(struct sock *sk) static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) { - struct sk_buff *skb = sk->sk_write_queue.next; - if (skb == (struct sk_buff *) &sk->sk_write_queue) - return NULL; - return skb; + return skb_peek(&sk->sk_write_queue); } static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk) { - struct sk_buff *skb = sk->sk_write_queue.prev; - if (skb == (struct sk_buff *) &sk->sk_write_queue) - return NULL; - return skb; + return skb_peek_tail(&sk->sk_write_queue); } static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb) { - return skb->next; + return skb_queue_next(&sk->sk_write_queue, skb); } #define tcp_for_write_queue(skb, sk) \ - for (skb = (sk)->sk_write_queue.next; \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) + skb_queue_walk(&(sk)->sk_write_queue, skb) #define tcp_for_write_queue_from(skb, sk) \ - for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\ - skb = skb->next) + skb_queue_walk_from(&(sk)->sk_write_queue, skb) #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ - for (tmp = skb->next; \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = tmp, tmp = skb->next) + skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) static inline struct sk_buff *tcp_send_head(struct sock *sk) { return sk->sk_send_head; } +static inline bool tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb_queue_is_last(&sk->sk_write_queue, skb); +} + static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { - sk->sk_send_head = skb->next; - if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) + if (tcp_skb_is_last(sk, skb)) sk->sk_send_head = NULL; + else + sk->sk_send_head = tcp_write_queue_next(sk, skb); } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) @@ -1284,12 +1280,6 @@ static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) __skb_unlink(skb, &sk->sk_write_queue); } -static inline bool tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb_queue_is_last(&sk->sk_write_queue, skb); -} - static inline int tcp_write_queue_empty(struct sock *sk) { return skb_queue_empty(&sk->sk_write_queue); -- cgit v1.2.3-70-g09d2 From f4ab543201992fe499bef5c406e09f23aa97b4d5 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 23 Sep 2008 01:05:56 -0700 Subject: pkt_sched: Remove the tx queue state check in qdisc_run() The current check wrongly uses the state of one (currently the first) tx queue for all tx queues in case of non-default qdiscs. This check mainly prevented requeuing loop with __netif_schedule(), but now it's controlled inside __qdisc_run(), while dequeuing. The wrongness of this check was first noticed by Herbert Xu. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b786a5b0925..4082f39f507 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -90,10 +90,7 @@ extern void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { - struct netdev_queue *txq = q->dev_queue; - - if (!netif_tx_queue_stopped(txq) && - !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) + if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) __qdisc_run(q); } -- cgit v1.2.3-70-g09d2 From e07aa3783e9f66b03d72e7afd9f709d7f7059662 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Sep 2008 13:11:19 +0200 Subject: cfg80211: fix code ordering in header file Luis added the regulatory hint stuff to this file without observing that __ieee80211_get_channel and ieee80211_get_channel really belong together. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/wireless.h b/include/net/wireless.h index e4378cc6bf8..0de7fde9f33 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -328,6 +328,15 @@ extern int ieee80211_frequency_to_channel(int freq); */ extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy, int freq); +/** + * ieee80211_get_channel - get channel struct from wiphy for specified frequency + */ +static inline struct ieee80211_channel * +ieee80211_get_channel(struct wiphy *wiphy, int freq) +{ + return __ieee80211_get_channel(wiphy, freq); +} + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if a driver is providing the hint this is the driver's very @@ -380,13 +389,4 @@ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, */ extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2, struct ieee80211_regdomain *rd); - -/** - * ieee80211_get_channel - get channel struct from wiphy for specified frequency - */ -static inline struct ieee80211_channel * -ieee80211_get_channel(struct wiphy *wiphy, int freq) -{ - return __ieee80211_get_channel(wiphy, freq); -} #endif /* __NET_WIRELESS_H */ -- cgit v1.2.3-70-g09d2 From 60719ffd721f6764b7d07ca188c0d944a4330b69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Sep 2008 14:55:09 +0200 Subject: cfg80211: show interface type This patch makes cfg80211 show the interface in the nl80211 information about a specific interface. API users are required to keep the type updated (everything else is fairly complicated) but you will get a warning if you fail to keep it updated. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 6 ++++-- include/net/wireless.h | 2 ++ net/mac80211/iface.c | 1 + net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 6 +++++- 5 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9f40c4d417d..0e85ec39b63 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -363,11 +363,13 @@ struct wiphy; * wireless extensions but this is subject to reevaluation as soon as this * code is used more widely and we have a first user without wext. * - * @add_virtual_intf: create a new virtual interface with the given name + * @add_virtual_intf: create a new virtual interface with the given name, + * must set the struct wireless_dev's iftype. * * @del_virtual_intf: remove the virtual interface determined by ifindex. * - * @change_virtual_intf: change type of virtual interface + * @change_virtual_intf: change type/configuration of virtual interface, + * keep the struct wireless_dev's iftype updated. * * @add_key: add a key with the given parameters. @mac_addr will be %NULL * when adding a group key. diff --git a/include/net/wireless.h b/include/net/wireless.h index 0de7fde9f33..721efb363db 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -223,9 +223,11 @@ struct wiphy { * the netdev.) * * @wiphy: pointer to hardware description + * @iftype: interface type */ struct wireless_dev { struct wiphy *wiphy; + enum nl80211_iftype iftype; /* private to the generic wireless code */ struct list_head list; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a72fbebb8ea..b5cd91e8971 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -625,6 +625,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* and set some type-dependent values */ sdata->vif.type = type; sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->wdev.iftype = type; /* only monitor differs */ sdata->dev->type = ARPHRD_ETHER; diff --git a/net/wireless/core.c b/net/wireless/core.c index 88cb7339486..d6940085d59 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -384,6 +384,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + WARN_ON(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_UNSPECIFIED); + switch (state) { case NETDEV_REGISTER: mutex_lock(&rdev->devlist_mtx); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1221d726ed5..44771a690d5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -299,7 +299,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); - /* TODO: interface type */ + NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype); return genlmsg_end(msg, hdr); nla_put_failure: @@ -453,6 +453,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) &flags); err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, type, err ? NULL : &flags, ¶ms); + + dev = __dev_get_by_index(&init_net, ifindex); + WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); + rtnl_unlock(); unlock: -- cgit v1.2.3-70-g09d2 From 4b7679a561e552eeda1e3567119bef2bca99b66e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Sep 2008 18:14:18 +0200 Subject: mac80211: clean up rate control API Long awaited, hard work. This patch totally cleans up the rate control API to remove the requirement to include internal headers outside of net/mac80211/. There's one internal use in the PID algorithm left for mesh networking, we'll have to figure out a way to clean that one up and decide how to do the peer link evaluation, possibly independent of the rate control algorithm or via new API. Additionally, ath9k is left using the cross-inclusion hack for now, we will add new API where necessary to make this work properly, but right now I'm not expert enough to do it. It's still off better than before. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/rc.c | 98 +++++++-------- drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 182 ++++++---------------------- drivers/net/wireless/iwlwifi/iwl-3945-rs.h | 9 -- drivers/net/wireless/iwlwifi/iwl-3945.h | 4 + drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 146 +++++++++------------- drivers/net/wireless/iwlwifi/iwl3945-base.c | 11 +- include/net/mac80211.h | 68 +++++++++++ net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 2 + net/mac80211/main.c | 4 +- net/mac80211/mlme.c | 4 +- net/mac80211/rate.c | 71 ++++++++--- net/mac80211/rate.h | 102 ++++------------ net/mac80211/rc80211_pid.h | 2 - net/mac80211/rc80211_pid_algo.c | 158 +++++++++--------------- net/mac80211/sta_info.c | 17 +-- net/mac80211/sta_info.h | 2 +- net/mac80211/tx.c | 5 +- 18 files changed, 373 insertions(+), 514 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c index 1cc9daf4455..cca2fc5b076 100644 --- a/drivers/net/wireless/ath9k/rc.c +++ b/drivers/net/wireless/ath9k/rc.c @@ -20,6 +20,7 @@ */ #include "core.h" +/* FIXME: remove this include! */ #include "../net/mac80211/rate.h" static u32 tx_triglevel_max; @@ -1812,20 +1813,18 @@ static void ath_rc_sib_init(struct ath_rate_node *ath_rc_priv) } -static void ath_setup_rates(struct ieee80211_local *local, struct sta_info *sta) +static void ath_setup_rates(struct ath_softc *sc, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct ath_rate_node *rc_priv) { - struct ieee80211_supported_band *sband; - struct ieee80211_hw *hw = local_to_hw(local); - struct ath_softc *sc = hw->priv; - struct ath_rate_node *rc_priv = sta->rate_ctrl_priv; int i, j = 0; DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; for (i = 0; i < sband->n_bitrates; i++) { - if (sta->sta.supp_rates[local->hw.conf.channel->band] & BIT(i)) { + if (sta->supp_rates[sband->band] & BIT(i)) { rc_priv->neg_rates.rs_rates[j] = (sband->bitrates[i].bitrate * 2) / 10; j++; @@ -1852,19 +1851,17 @@ void ath_rc_node_update(struct ieee80211_hw *hw, struct ath_rate_node *rc_priv) } /* Rate Control callbacks */ -static void ath_tx_status(void *priv, struct net_device *dev, +static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { struct ath_softc *sc = priv; struct ath_tx_info_priv *tx_info_priv; struct ath_node *an; - struct sta_info *sta; - struct ieee80211_local *local; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr; __le16 fc; - local = hw_to_local(sc->hw); hdr = (struct ieee80211_hdr *)skb->data; fc = hdr->frame_control; tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; @@ -1873,8 +1870,7 @@ static void ath_tx_status(void *priv, struct net_device *dev, an = ath_node_find(sc, hdr->addr1); spin_unlock_bh(&sc->node_lock); - sta = sta_info_get(local, hdr->addr1); - if (!an || !sta || !ieee80211_is_data(fc)) { + if (!an || !priv_sta || !ieee80211_is_data(fc)) { if (tx_info->driver_data[0] != NULL) { kfree(tx_info->driver_data[0]); tx_info->driver_data[0] = NULL; @@ -1882,24 +1878,22 @@ static void ath_tx_status(void *priv, struct net_device *dev, return; } if (tx_info->driver_data[0] != NULL) { - ath_rate_tx_complete(sc, an, sta->rate_ctrl_priv, tx_info_priv); + ath_rate_tx_complete(sc, an, priv_sta, tx_info_priv); kfree(tx_info->driver_data[0]); tx_info->driver_data[0] = NULL; } } static void ath_tx_aggr_resp(struct ath_softc *sc, - struct sta_info *sta, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, struct ath_node *an, u8 tidno) { - struct ieee80211_hw *hw = sc->hw; - struct ieee80211_local *local; struct ath_atx_tid *txtid; - struct ieee80211_supported_band *sband; u16 buffersize = 0; int state; - DECLARE_MAC_BUF(mac); + struct sta_info *si; if (!(sc->sc_flags & SC_OP_TXAGGR)) return; @@ -1908,11 +1902,16 @@ static void ath_tx_aggr_resp(struct ath_softc *sc, if (!txtid->paused) return; - local = hw_to_local(sc->hw); - sband = hw->wiphy->bands[hw->conf.channel->band]; + /* + * XXX: This is entirely busted, we aren't supposed to + * access the sta from here because it's internal + * to mac80211, and looking at the state without + * locking is wrong too. + */ + si = container_of(sta, struct sta_info, sta); buffersize = IEEE80211_MIN_AMPDU_BUF << sband->ht_info.ampdu_factor; /* FIXME */ - state = sta->ampdu_mlme.tid_state_tx[tidno]; + state = si->ampdu_mlme.tid_state_tx[tidno]; if (state & HT_ADDBA_RECEIVED_MSK) { txtid->addba_exchangecomplete = 1; @@ -1928,18 +1927,15 @@ static void ath_tx_aggr_resp(struct ath_softc *sc, } } -static void ath_get_rate(void *priv, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, struct rate_selection *sel) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - struct ath_softc *sc = (struct ath_softc *)priv; + struct ath_softc *sc = priv; struct ieee80211_hw *hw = sc->hw; struct ath_tx_info_priv *tx_info_priv; - struct ath_rate_node *ath_rc_priv; + struct ath_rate_node *ath_rc_priv = priv_sta; struct ath_node *an; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); int is_probe = FALSE, chk, ret; @@ -1955,8 +1951,7 @@ static void ath_get_rate(void *priv, struct net_device *dev, ASSERT(tx_info->driver_data[0] != NULL); tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; - sta = sta_info_get(local, hdr->addr1); - lowest_idx = rate_lowest_index(local, sband, sta); + lowest_idx = rate_lowest_index(sband, sta); tx_info_priv->min_rate = (sband->bitrates[lowest_idx].bitrate * 2) / 10; /* lowest rate for management and multicast/broadcast frames */ if (!ieee80211_is_data(fc) || @@ -1965,8 +1960,6 @@ static void ath_get_rate(void *priv, struct net_device *dev, return; } - ath_rc_priv = sta->rate_ctrl_priv; - /* Find tx rate for unicast frames */ ath_rate_findrate(sc, ath_rc_priv, ATH_11N_TXMAXTRY, 4, @@ -1975,8 +1968,7 @@ static void ath_get_rate(void *priv, struct net_device *dev, &is_probe, false); if (is_probe) - sel->probe_idx = ((struct ath_tx_ratectrl *) - sta->rate_ctrl_priv)->probe_rate; + sel->probe_idx = ath_rc_priv->tx_ratectrl.probe_rate; /* Ratecontrol sometimes returns invalid rate index */ if (tx_info_priv->rcs[0].rix != 0xff) @@ -2020,37 +2012,31 @@ static void ath_get_rate(void *priv, struct net_device *dev, __func__, print_mac(mac, hdr->addr1)); } else if (chk == AGGR_EXCHANGE_PROGRESS) - ath_tx_aggr_resp(sc, sta, an, tid); + ath_tx_aggr_resp(sc, sband, sta, an, tid); } } } -static void ath_rate_init(void *priv, void *priv_sta, - struct ieee80211_local *local, - struct sta_info *sta) +static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { - struct ieee80211_supported_band *sband; - struct ieee80211_hw *hw = local_to_hw(local); - struct ieee80211_conf *conf = &local->hw.conf; - struct ath_softc *sc = hw->priv; + struct ath_softc *sc = priv; struct ath_rate_node *ath_rc_priv = priv_sta; int i, j = 0; DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - ath_setup_rates(local, sta); - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { + ath_setup_rates(sc, sband, sta, ath_rc_priv); + if (sc->hw->conf.flags & IEEE80211_CONF_SUPPORT_HT_MODE) { for (i = 0; i < MCS_SET_SIZE; i++) { - if (conf->ht_conf.supp_mcs_set[i/8] & (1<<(i%8))) + if (sc->hw->conf.ht_conf.supp_mcs_set[i/8] & (1<<(i%8))) ath_rc_priv->neg_ht_rates.rs_rates[j++] = i; if (j == ATH_RATE_MAX) break; } ath_rc_priv->neg_ht_rates.rs_nrates = j; } - ath_rc_node_update(hw, priv_sta); + ath_rc_node_update(sc->hw, priv_sta); } static void ath_rate_clear(void *priv) @@ -2058,13 +2044,12 @@ static void ath_rate_clear(void *priv) return; } -static void *ath_rate_alloc(struct ieee80211_local *local) +static void *ath_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { - struct ieee80211_hw *hw = local_to_hw(local); struct ath_softc *sc = hw->priv; DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__); - return local->hw.priv; + return hw->priv; } static void ath_rate_free(void *priv) @@ -2072,7 +2057,7 @@ static void ath_rate_free(void *priv) return; } -static void *ath_rate_alloc_sta(void *priv, gfp_t gfp) +static void *ath_rate_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct ath_softc *sc = priv; struct ath_vap *avp = sc->sc_vaps[0]; @@ -2092,7 +2077,8 @@ static void *ath_rate_alloc_sta(void *priv, gfp_t gfp) return rate_priv; } -static void ath_rate_free_sta(void *priv, void *priv_sta) +static void ath_rate_free_sta(void *priv, struct ieee80211_sta *sta, + void *priv_sta) { struct ath_rate_node *rate_priv = priv_sta; struct ath_softc *sc = priv; @@ -2111,7 +2097,7 @@ static struct rate_control_ops ath_rate_ops = { .alloc = ath_rate_alloc, .free = ath_rate_free, .alloc_sta = ath_rate_alloc_sta, - .free_sta = ath_rate_free_sta + .free_sta = ath_rate_free_sta, }; int ath_rate_control_register(void) diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index a279bf1dc9b..6fc5e7361f2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -36,8 +36,6 @@ #include -#include "../net/mac80211/rate.h" - #include "iwl-3945.h" #define RS_NAME "iwl-3945-rs" @@ -319,10 +317,10 @@ static void iwl3945_collect_tx_data(struct iwl3945_rs_sta *rs_sta, } } -static void rs_rate_init(void *priv_rate, void *priv_sta, - struct ieee80211_local *local, struct sta_info *sta) +static void rs_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { - struct iwl3945_rs_sta *rs_sta = (void *)sta->rate_ctrl_priv; + struct iwl3945_rs_sta *rs_sta = priv_sta; int i; IWL_DEBUG_RATE("enter\n"); @@ -333,22 +331,22 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, * after assoc.. */ for (i = IWL_RATE_COUNT - 1; i >= 0; i--) { - if (sta->sta.supp_rates[local->hw.conf.channel->band] & (1 << i)) { + if (sta->supp_rates[sband->band] & (1 << i)) { rs_sta->last_txrate_idx = i; break; } } /* For 5 GHz band it start at IWL_FIRST_OFDM_RATE */ - if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) + if (sband->band == IEEE80211_BAND_5GHZ) rs_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE; IWL_DEBUG_RATE("leave\n"); } -static void *rs_alloc(struct ieee80211_local *local) +static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { - return local->hw.priv; + return hw->priv; } /* rate scale requires free function to be implemented */ @@ -356,17 +354,24 @@ static void rs_free(void *priv) { return; } + static void rs_clear(void *priv) { return; } -static void *rs_alloc_sta(void *priv, gfp_t gfp) +static void *rs_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct iwl3945_rs_sta *rs_sta; + struct iwl3945_sta_priv *psta = (void *) sta->drv_priv; int i; + /* + * XXX: If it's using sta->drv_priv anyway, it might + * as well just put all the information there. + */ + IWL_DEBUG_RATE("enter\n"); rs_sta = kzalloc(sizeof(struct iwl3945_rs_sta), gfp); @@ -375,6 +380,8 @@ static void *rs_alloc_sta(void *priv, gfp_t gfp) return NULL; } + psta->rs_sta = rs_sta; + spin_lock_init(&rs_sta->lock); rs_sta->start_rate = IWL_RATE_INVALID; @@ -400,10 +407,14 @@ static void *rs_alloc_sta(void *priv, gfp_t gfp) return rs_sta; } -static void rs_free_sta(void *priv, void *priv_sta) +static void rs_free_sta(void *priv, struct ieee80211_sta *sta, + void *priv_sta) { + struct iwl3945_sta_priv *psta = (void *) sta->drv_priv; struct iwl3945_rs_sta *rs_sta = priv_sta; + psta->rs_sta = NULL; + IWL_DEBUG_RATE("enter\n"); del_timer_sync(&rs_sta->rate_scale_flush); kfree(rs_sta); @@ -445,26 +456,19 @@ static int rs_adjust_next_rate(struct iwl3945_priv *priv, int rate) * NOTE: Uses iwl3945_priv->retry_rate for the # of retries attempted by * the hardware for each rate. */ -static void rs_tx_status(void *priv_rate, - struct net_device *dev, +static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { u8 retries, current_count; int scale_rate_index, first_index, last_index; unsigned long flags; - struct sta_info *sta; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_rate; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct iwl3945_rs_sta *rs_sta; - struct ieee80211_supported_band *sband; + struct iwl3945_rs_sta *rs_sta = priv_sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); IWL_DEBUG_RATE("enter\n"); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - retries = info->status.retry_count; first_index = sband->bitrates[info->tx_rate_idx].hw_value; if ((first_index < 0) || (first_index >= IWL_RATE_COUNT)) { @@ -472,17 +476,11 @@ static void rs_tx_status(void *priv_rate, return; } - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - if (!sta || !sta->rate_ctrl_priv) { - rcu_read_unlock(); + if (!priv_sta) { IWL_DEBUG_RATE("leave: No STA priv data to update!\n"); return; } - rs_sta = (void *)sta->rate_ctrl_priv; - rs_sta->tx_packets++; scale_rate_index = first_index; @@ -549,8 +547,6 @@ static void rs_tx_status(void *priv_rate, spin_unlock_irqrestore(&rs_sta->lock, flags); - rcu_read_unlock(); - IWL_DEBUG_RATE("leave\n"); return; @@ -634,16 +630,15 @@ static u16 iwl3945_get_adjacent_rate(struct iwl3945_rs_sta *rs_sta, * rate table and must reference the driver allocated rate table * */ -static void rs_get_rate(void *priv_rate, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, struct rate_selection *sel) { u8 low = IWL_RATE_INVALID; u8 high = IWL_RATE_INVALID; u16 high_low; int index; - struct iwl3945_rs_sta *rs_sta; + struct iwl3945_rs_sta *rs_sta = priv_sta; struct iwl3945_rate_scale_data *window = NULL; int current_tpt = IWL_INV_TPT; int low_tpt = IWL_INV_TPT; @@ -651,34 +646,25 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev, u32 fail_count; s8 scale_action = 0; unsigned long flags; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct sta_info *sta; u16 fc, rate_mask; - struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_rate; + struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_r; DECLARE_MAC_BUF(mac); IWL_DEBUG_RATE("enter\n"); - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || is_multicast_ether_addr(hdr->addr1) || - !sta || !sta->rate_ctrl_priv) { + !sta || !priv_sta) { IWL_DEBUG_RATE("leave: No STA priv data to update!\n"); - sel->rate_idx = rate_lowest_index(local, sband, sta); - rcu_read_unlock(); + sel->rate_idx = rate_lowest_index(sband, sta); return; } - rs_sta = (void *)sta->rate_ctrl_priv; - - rate_mask = sta->sta.supp_rates[sband->band]; + rate_mask = sta->supp_rates[sband->band]; index = min(rs_sta->last_txrate_idx & 0xffff, IWL_RATE_COUNT - 1); if (sband->band == IEEE80211_BAND_5GHZ) @@ -811,8 +797,6 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev, else sel->rate_idx = rs_sta->last_txrate_idx; - rcu_read_unlock(); - IWL_DEBUG_RATE("leave: %d\n", index); } @@ -829,114 +813,28 @@ static struct rate_control_ops rs_ops = { .free_sta = rs_free_sta, }; -int iwl3945_fill_rs_info(struct ieee80211_hw *hw, char *buf, u8 sta_id) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct iwl3945_priv *priv = hw->priv; - struct iwl3945_rs_sta *rs_sta; - struct sta_info *sta; - unsigned long flags; - int count = 0, i; - u32 samples = 0, success = 0, good = 0; - unsigned long now = jiffies; - u32 max_time = 0; - - rcu_read_lock(); - - sta = sta_info_get(local, priv->stations[sta_id].sta.sta.addr); - if (!sta || !sta->rate_ctrl_priv) { - if (sta) - IWL_DEBUG_RATE("leave - no private rate data!\n"); - else - IWL_DEBUG_RATE("leave - no station!\n"); - rcu_read_unlock(); - return sprintf(buf, "station %d not found\n", sta_id); - } - - rs_sta = (void *)sta->rate_ctrl_priv; - spin_lock_irqsave(&rs_sta->lock, flags); - i = IWL_RATE_54M_INDEX; - while (1) { - u64 mask; - int j; - - count += - sprintf(&buf[count], " %2dMbs: ", iwl3945_rates[i].ieee / 2); - - mask = (1ULL << (IWL_RATE_MAX_WINDOW - 1)); - for (j = 0; j < IWL_RATE_MAX_WINDOW; j++, mask >>= 1) - buf[count++] = - (rs_sta->win[i].data & mask) ? '1' : '0'; - - samples += rs_sta->win[i].counter; - good += rs_sta->win[i].success_counter; - success += rs_sta->win[i].success_counter * - iwl3945_rates[i].ieee; - - if (rs_sta->win[i].stamp) { - int delta = - jiffies_to_msecs(now - rs_sta->win[i].stamp); - - if (delta > max_time) - max_time = delta; - - count += sprintf(&buf[count], "%5dms\n", delta); - } else - buf[count++] = '\n'; - - j = iwl3945_get_prev_ieee_rate(i); - if (j == i) - break; - i = j; - } - spin_unlock_irqrestore(&rs_sta->lock, flags); - rcu_read_unlock(); - - /* Display the average rate of all samples taken. - * - * NOTE: We multiple # of samples by 2 since the IEEE measurement - * added from iwl3945_rates is actually 2X the rate */ - if (samples) - count += sprintf( - &buf[count], - "\nAverage rate is %3d.%02dMbs over last %4dms\n" - "%3d%% success (%d good packets over %d tries)\n", - success / (2 * samples), (success * 5 / samples) % 10, - max_time, good * 100 / samples, good, samples); - else - count += sprintf(&buf[count], "\nAverage rate: 0Mbs\n"); - - return count; -} - void iwl3945_rate_scale_init(struct ieee80211_hw *hw, s32 sta_id) { struct iwl3945_priv *priv = hw->priv; s32 rssi = 0; unsigned long flags; - struct ieee80211_local *local = hw_to_local(hw); struct iwl3945_rs_sta *rs_sta; - struct sta_info *sta; + struct ieee80211_sta *sta; + struct iwl3945_sta_priv *psta; IWL_DEBUG_RATE("enter\n"); - if (!local->rate_ctrl->ops->name || - strcmp(local->rate_ctrl->ops->name, RS_NAME)) { - IWL_WARNING("iwl-3945-rs not selected as rate control algo!\n"); - IWL_DEBUG_RATE("leave - mac80211 picked the wrong RC algo.\n"); - return; - } - rcu_read_lock(); - sta = sta_info_get(local, priv->stations[sta_id].sta.sta.addr); - if (!sta || !sta->rate_ctrl_priv) { + sta = ieee80211_find_sta(hw, priv->stations[sta_id].sta.sta.addr); + psta = (void *) sta->drv_priv; + if (!sta || !psta) { IWL_DEBUG_RATE("leave - no private rate data!\n"); rcu_read_unlock(); return; } - rs_sta = (void *)sta->rate_ctrl_priv; + rs_sta = psta->rs_sta; spin_lock_irqsave(&rs_sta->lock, flags); diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.h b/drivers/net/wireless/iwlwifi/iwl-3945-rs.h index f085d330bdc..98b17ae6ef2 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.h @@ -175,15 +175,6 @@ static inline u8 iwl3945_get_prev_ieee_rate(u8 rate_index) return rate; } -/** - * iwl3945_fill_rs_info - Fill an output text buffer with the rate representation - * - * NOTE: This is provided as a quick mechanism for a user to visualize - * the performance of the rate control algorithm and is not meant to be - * parsed software. - */ -extern int iwl3945_fill_rs_info(struct ieee80211_hw *, char *buf, u8 sta_id); - /** * iwl3945_rate_scale_init - Initialize the rate scale table based on assoc info * diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index 2a4933b5fb6..bdd32475b99 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h @@ -73,6 +73,10 @@ extern struct pci_device_id iwl3945_hw_card_ids[]; extern int iwl3945_param_hwcrypto; extern int iwl3945_param_queues_num; +struct iwl3945_sta_priv { + struct iwl3945_rs_sta *rs_sta; +}; + enum iwl3945_antenna { IWL_ANTENNA_DIVERSITY, IWL_ANTENNA_MAIN, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index 8b57b390c8b..93944de923c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -35,8 +35,6 @@ #include -#include "../net/mac80211/rate.h" - #include "iwl-dev.h" #include "iwl-sta.h" #include "iwl-core.h" @@ -169,9 +167,9 @@ struct iwl_lq_sta { }; static void rs_rate_scale_perform(struct iwl_priv *priv, - struct net_device *dev, struct ieee80211_hdr *hdr, - struct sta_info *sta); + struct ieee80211_sta *sta, + struct iwl_lq_sta *lq_sta); static void rs_fill_link_cmd(const struct iwl_priv *priv, struct iwl_lq_sta *lq_sta, u32 rate_n_flags); @@ -357,20 +355,20 @@ static u32 rs_tl_get_load(struct iwl_lq_sta *lq_data, u8 tid) static void rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv, struct iwl_lq_sta *lq_data, u8 tid, - struct sta_info *sta) + struct ieee80211_sta *sta) { DECLARE_MAC_BUF(mac); if (rs_tl_get_load(lq_data, tid) > IWL_AGG_LOAD_THRESHOLD) { IWL_DEBUG_HT("Starting Tx agg: STA: %s tid: %d\n", - print_mac(mac, sta->sta.addr), tid); - ieee80211_start_tx_ba_session(priv->hw, sta->sta.addr, tid); + print_mac(mac, sta->addr), tid); + ieee80211_start_tx_ba_session(priv->hw, sta->addr, tid); } } static void rs_tl_turn_on_agg(struct iwl_priv *priv, u8 tid, struct iwl_lq_sta *lq_data, - struct sta_info *sta) + struct ieee80211_sta *sta) { if ((tid < TID_MAX_LOAD_COUNT)) rs_tl_turn_on_agg_for_tid(priv, lq_data, tid, sta); @@ -770,7 +768,8 @@ out: /* * mac80211 sends us Tx status */ -static void rs_tx_status(void *priv_rate, struct net_device *dev, +static void rs_tx_status(void *priv_r, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { int status; @@ -778,11 +777,9 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev, int rs_index, index = 0; struct iwl_lq_sta *lq_sta; struct iwl_link_quality_cmd *table; - struct sta_info *sta; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct iwl_priv *priv = (struct iwl_priv *)priv_rate; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = local_to_hw(local); + struct iwl_priv *priv = (struct iwl_priv *)priv_r; + struct ieee80211_hw *hw = priv->hw; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct iwl_rate_scale_data *window = NULL; struct iwl_rate_scale_data *search_win = NULL; @@ -808,15 +805,7 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev, if (retries > 15) retries = 15; - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - - if (!sta || !sta->rate_ctrl_priv) - goto out; - - - lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; + lq_sta = (struct iwl_lq_sta *)priv_sta; if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && !lq_sta->ibss_sta_added) @@ -962,9 +951,8 @@ static void rs_tx_status(void *priv_rate, struct net_device *dev, } /* See if there's a better rate or modulation mode to try. */ - rs_rate_scale_perform(priv, dev, hdr, sta); + rs_rate_scale_perform(priv, hdr, sta, lq_sta); out: - rcu_read_unlock(); return; } @@ -1140,7 +1128,7 @@ static s32 rs_get_best_rate(struct iwl_priv *priv, static int rs_switch_to_mimo2(struct iwl_priv *priv, struct iwl_lq_sta *lq_sta, struct ieee80211_conf *conf, - struct sta_info *sta, + struct ieee80211_sta *sta, struct iwl_scale_tbl_info *tbl, int index) { u16 rate_mask; @@ -1148,10 +1136,10 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv, s8 is_green = lq_sta->is_green; if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->sta.ht_info.ht_supported) + !sta->ht_info.ht_supported) return -1; - if (((sta->sta.ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2) + if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2) == WLAN_HT_CAP_SM_PS_STATIC) return -1; @@ -1208,7 +1196,7 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv, static int rs_switch_to_siso(struct iwl_priv *priv, struct iwl_lq_sta *lq_sta, struct ieee80211_conf *conf, - struct sta_info *sta, + struct ieee80211_sta *sta, struct iwl_scale_tbl_info *tbl, int index) { u16 rate_mask; @@ -1216,7 +1204,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv, s32 rate; if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->sta.ht_info.ht_supported) + !sta->ht_info.ht_supported) return -1; IWL_DEBUG_RATE("LQ: try to switch to SISO\n"); @@ -1268,7 +1256,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv, static int rs_move_legacy_other(struct iwl_priv *priv, struct iwl_lq_sta *lq_sta, struct ieee80211_conf *conf, - struct sta_info *sta, + struct ieee80211_sta *sta, int index) { struct iwl_scale_tbl_info *tbl = &(lq_sta->lq_info[lq_sta->active_tbl]); @@ -1376,7 +1364,7 @@ out: static int rs_move_siso_to_other(struct iwl_priv *priv, struct iwl_lq_sta *lq_sta, struct ieee80211_conf *conf, - struct sta_info *sta, int index) + struct ieee80211_sta *sta, int index) { u8 is_green = lq_sta->is_green; struct iwl_scale_tbl_info *tbl = &(lq_sta->lq_info[lq_sta->active_tbl]); @@ -1487,7 +1475,7 @@ static int rs_move_siso_to_other(struct iwl_priv *priv, static int rs_move_mimo_to_other(struct iwl_priv *priv, struct iwl_lq_sta *lq_sta, struct ieee80211_conf *conf, - struct sta_info *sta, int index) + struct ieee80211_sta *sta, int index) { s8 is_green = lq_sta->is_green; struct iwl_scale_tbl_info *tbl = &(lq_sta->lq_info[lq_sta->active_tbl]); @@ -1680,12 +1668,11 @@ static void rs_stay_in_table(struct iwl_lq_sta *lq_sta) * Do rate scaling and search for new modulation mode. */ static void rs_rate_scale_perform(struct iwl_priv *priv, - struct net_device *dev, struct ieee80211_hdr *hdr, - struct sta_info *sta) + struct ieee80211_sta *sta, + struct iwl_lq_sta *lq_sta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = local_to_hw(local); + struct ieee80211_hw *hw = priv->hw; struct ieee80211_conf *conf = &hw->conf; int low = IWL_RATE_INVALID; int high = IWL_RATE_INVALID; @@ -1700,7 +1687,6 @@ static void rs_rate_scale_perform(struct iwl_priv *priv, __le16 fc; u16 rate_mask; u8 update_lq = 0; - struct iwl_lq_sta *lq_sta; struct iwl_scale_tbl_info *tbl, *tbl1; u16 rate_scale_index_msk = 0; u32 rate; @@ -1721,11 +1707,10 @@ static void rs_rate_scale_perform(struct iwl_priv *priv, return; } - if (!sta || !sta->rate_ctrl_priv) + if (!sta || !lq_sta) return; - lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; - lq_sta->supp_rates = sta->sta.supp_rates[lq_sta->band]; + lq_sta->supp_rates = sta->supp_rates[lq_sta->band]; tid = rs_tl_add_packet(lq_sta, hdr); @@ -2064,9 +2049,9 @@ out: static void rs_initialize_lq(struct iwl_priv *priv, struct ieee80211_conf *conf, - struct sta_info *sta) + struct ieee80211_sta *sta, + struct iwl_lq_sta *lq_sta) { - struct iwl_lq_sta *lq_sta; struct iwl_scale_tbl_info *tbl; int rate_idx; int i; @@ -2075,10 +2060,9 @@ static void rs_initialize_lq(struct iwl_priv *priv, u8 active_tbl = 0; u8 valid_tx_ant; - if (!sta || !sta->rate_ctrl_priv) + if (!sta || !lq_sta) goto out; - lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; i = lq_sta->last_txrate_idx; if ((lq_sta->lq.sta_id == 0xff) && @@ -2119,37 +2103,30 @@ static void rs_initialize_lq(struct iwl_priv *priv, return; } -static void rs_get_rate(void *priv_rate, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, struct rate_selection *sel) { int i; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; + struct iwl_priv *priv = (struct iwl_priv *)priv_r; + struct ieee80211_conf *conf = &priv->hw->conf; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct sta_info *sta; __le16 fc; - struct iwl_priv *priv = (struct iwl_priv *)priv_rate; struct iwl_lq_sta *lq_sta; IWL_DEBUG_RATE_LIMIT("rate scale calculate new rate for skb\n"); - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = hdr->frame_control; if (!ieee80211_is_data(fc) || is_multicast_ether_addr(hdr->addr1) || - !sta || !sta->rate_ctrl_priv) { - sel->rate_idx = rate_lowest_index(local, sband, sta); - goto out; + !sta || !priv_sta) { + sel->rate_idx = rate_lowest_index(sband, sta); + return; } - lq_sta = (struct iwl_lq_sta *)sta->rate_ctrl_priv; + lq_sta = (struct iwl_lq_sta *)priv_sta; i = lq_sta->last_txrate_idx; if ((priv->iw_mode == NL80211_IFTYPE_ADHOC) && @@ -2167,23 +2144,22 @@ static void rs_get_rate(void *priv_rate, struct net_device *dev, lq_sta->lq.sta_id = sta_id; lq_sta->lq.rs_table[0].rate_n_flags = 0; lq_sta->ibss_sta_added = 1; - rs_initialize_lq(priv, conf, sta); + rs_initialize_lq(priv, conf, sta, lq_sta); } } if ((i < 0) || (i > IWL_RATE_COUNT)) { - sel->rate_idx = rate_lowest_index(local, sband, sta); - goto out; + sel->rate_idx = rate_lowest_index(sband, sta); + return; } if (sband->band == IEEE80211_BAND_5GHZ) i -= IWL_FIRST_OFDM_RATE; sel->rate_idx = i; -out: - rcu_read_unlock(); } -static void *rs_alloc_sta(void *priv_rate, gfp_t gfp) +static void *rs_alloc_sta(void *priv_rate, struct ieee80211_sta *sta, + gfp_t gfp) { struct iwl_lq_sta *lq_sta; struct iwl_priv *priv; @@ -2206,20 +2182,16 @@ static void *rs_alloc_sta(void *priv_rate, gfp_t gfp) return lq_sta; } -static void rs_rate_init(void *priv_rate, void *priv_sta, - struct ieee80211_local *local, - struct sta_info *sta) +static void rs_rate_init(void *priv_r, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { int i, j; - struct ieee80211_conf *conf = &local->hw.conf; - struct ieee80211_supported_band *sband; - struct iwl_priv *priv = (struct iwl_priv *)priv_rate; + struct iwl_priv *priv = (struct iwl_priv *)priv_r; + struct ieee80211_conf *conf = &priv->hw->conf; struct iwl_lq_sta *lq_sta = priv_sta; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - lq_sta->flush_timer = 0; - lq_sta->supp_rates = sta->sta.supp_rates[sband->band]; + lq_sta->supp_rates = sta->supp_rates[sband->band]; for (j = 0; j < LQ_SIZE; j++) for (i = 0; i < IWL_RATE_COUNT; i++) rs_rate_scale_clear_window(&lq_sta->lq_info[j].win[i]); @@ -2232,17 +2204,17 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, lq_sta->ibss_sta_added = 0; if (priv->iw_mode == NL80211_IFTYPE_AP) { - u8 sta_id = iwl_find_station(priv, sta->sta.addr); + u8 sta_id = iwl_find_station(priv, sta->addr); DECLARE_MAC_BUF(mac); /* for IBSS the call are from tasklet */ IWL_DEBUG_RATE("LQ: ADD station %s\n", - print_mac(mac, sta->sta.addr)); + print_mac(mac, sta->addr)); if (sta_id == IWL_INVALID_STATION) { IWL_DEBUG_RATE("LQ: ADD station %s\n", - print_mac(mac, sta->sta.addr)); - sta_id = iwl_add_station_flags(priv, sta->sta.addr, + print_mac(mac, sta->addr)); + sta_id = iwl_add_station_flags(priv, sta->addr, 0, CMD_ASYNC, NULL); } if ((sta_id != IWL_INVALID_STATION)) { @@ -2256,11 +2228,11 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, /* Find highest tx rate supported by hardware and destination station */ lq_sta->last_txrate_idx = 3; for (i = 0; i < sband->n_bitrates; i++) - if (sta->sta.supp_rates[sband->band] & BIT(i)) + if (sta->supp_rates[sband->band] & BIT(i)) lq_sta->last_txrate_idx = i; /* For MODE_IEEE80211A, skip over cck rates in global rate table */ - if (local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) + if (sband->band == IEEE80211_BAND_5GHZ) lq_sta->last_txrate_idx += IWL_FIRST_OFDM_RATE; lq_sta->is_dup = 0; @@ -2301,7 +2273,7 @@ static void rs_rate_init(void *priv_rate, void *priv_sta, lq_sta->tx_agg_tid_en = IWL_AGG_ALL_TID; lq_sta->drv = priv; - rs_initialize_lq(priv, conf, sta); + rs_initialize_lq(priv, conf, sta, lq_sta); } static void rs_fill_link_cmd(const struct iwl_priv *priv, @@ -2423,9 +2395,9 @@ static void rs_fill_link_cmd(const struct iwl_priv *priv, lq_cmd->agg_params.agg_time_limit = cpu_to_le16(4000); } -static void *rs_alloc(struct ieee80211_local *local) +static void *rs_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { - return local->hw.priv; + return hw->priv; } /* rate scale requires free function to be implemented */ static void rs_free(void *priv_rate) @@ -2446,12 +2418,12 @@ static void rs_clear(void *priv_rate) #endif /* CONFIG_IWLWIFI_DEBUG */ } -static void rs_free_sta(void *priv_rate, void *priv_sta) +static void rs_free_sta(void *priv_r, struct ieee80211_sta *sta, + void *priv_sta) { struct iwl_lq_sta *lq_sta = priv_sta; - struct iwl_priv *priv; + struct iwl_priv *priv = priv_r; - priv = (struct iwl_priv *)priv_rate; IWL_DEBUG_RATE("enter\n"); kfree(lq_sta); IWL_DEBUG_RATE("leave\n"); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 62b26befddc..d15a2c99795 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -7370,15 +7370,6 @@ static ssize_t show_temperature(struct device *d, static DEVICE_ATTR(temperature, S_IRUGO, show_temperature, NULL); -static ssize_t show_rs_window(struct device *d, - struct device_attribute *attr, - char *buf) -{ - struct iwl3945_priv *priv = d->driver_data; - return iwl3945_fill_rs_info(priv->hw, buf, IWL_AP_ID); -} -static DEVICE_ATTR(rs_window, S_IRUGO, show_rs_window, NULL); - static ssize_t show_tx_power(struct device *d, struct device_attribute *attr, char *buf) { @@ -7840,7 +7831,6 @@ static struct attribute *iwl3945_sysfs_entries[] = { #endif &dev_attr_power_level.attr, &dev_attr_retry_rate.attr, - &dev_attr_rs_window.attr, &dev_attr_statistics.attr, &dev_attr_status.attr, &dev_attr_temperature.attr, @@ -7908,6 +7898,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e SET_IEEE80211_DEV(hw, &pdev->dev); hw->rate_control_algorithm = "iwl-3945-rs"; + hw->sta_data_size = sizeof(struct iwl3945_sta_priv); IWL_DEBUG_INFO("*** LOAD DRIVER ***\n"); priv = hw->priv; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 003e4a03874..f5f5b1ff158 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1800,4 +1800,72 @@ void ieee80211_notify_mac(struct ieee80211_hw *hw, struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, const u8 *addr); + +/* Rate control API */ +/** + * struct rate_selection - rate information for/from rate control algorithms + * + * @rate_idx: selected transmission rate index + * @nonerp_idx: Non-ERP rate to use instead if ERP cannot be used + * @probe_idx: rate for probing (or -1) + * @max_rate_idx: maximum rate index that can be used, this is + * input to the algorithm and will be enforced + */ +struct rate_selection { + s8 rate_idx, nonerp_idx, probe_idx, max_rate_idx; +}; + +struct rate_control_ops { + struct module *module; + const char *name; + void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); + void (*clear)(void *priv); + void (*free)(void *priv); + + void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp); + void (*rate_init)(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta); + void (*free_sta)(void *priv, struct ieee80211_sta *sta, + void *priv_sta); + + void (*tx_status)(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb); + void (*get_rate)(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, + struct rate_selection *sel); + + void (*add_sta_debugfs)(void *priv, void *priv_sta, + struct dentry *dir); + void (*remove_sta_debugfs)(void *priv, void *priv_sta); +}; + +static inline int rate_supported(struct ieee80211_sta *sta, + enum ieee80211_band band, + int index) +{ + return (sta == NULL || sta->supp_rates[band] & BIT(index)); +} + +static inline s8 +rate_lowest_index(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + int i; + + for (i = 0; i < sband->n_bitrates; i++) + if (rate_supported(sta, sband->band, i)) + return i; + + /* warn when we cannot find a rate. */ + WARN_ON(1); + + return 0; +} + + +int ieee80211_rate_control_register(struct rate_control_ops *ops); +void ieee80211_rate_control_unregister(struct rate_control_ops *ops); + #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 89a183c2327..855126a3039 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -693,7 +693,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, sta_apply_parameters(local, sta, params); - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); rcu_read_lock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0b25b0f46b1..8025b294588 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -724,6 +724,8 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { + struct dentry *rcdir; + struct dentry *rcname; struct dentry *frequency; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7d2d5a041e2..d608c44047c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -542,6 +542,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 frag, type; __le16 fc; + struct ieee80211_supported_band *sband; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; @@ -588,7 +589,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sta->tx_retry_count += info->status.retry_count; } - rate_control_tx_status(local->mdev, skb); + sband = local->hw.wiphy->bands[info->band]; + rate_control_tx_status(local, sband, sta, skb); } rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8611a8318c9..109c3a7e63a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1323,7 +1323,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); } - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); if (elems.wmm_param) { set_sta_flags(sta, WLAN_STA_WME); @@ -2342,7 +2342,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(local, band); - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); if (sta_info_insert(sta)) return NULL; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 0388c090dfe..5d786720d93 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -12,6 +12,7 @@ #include #include "rate.h" #include "ieee80211_i.h" +#include "debugfs.h" struct rate_control_alg { struct list_head list; @@ -127,19 +128,46 @@ static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops) module_put(ops->module); } +#ifdef CONFIG_MAC80211_DEBUGFS +static ssize_t rcname_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct rate_control_ref *ref = file->private_data; + int len = strlen(ref->ops->name); + + return simple_read_from_buffer(userbuf, count, ppos, + ref->ops->name, len); +} + +static const struct file_operations rcname_ops = { + .read = rcname_read, + .open = mac80211_open_file_generic, +}; +#endif + struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local) { + struct dentry *debugfsdir = NULL; struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) goto fail_ref; kref_init(&ref->kref); + ref->local = local; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto fail_ops; - ref->priv = ref->ops->alloc(local); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); + local->debugfs.rcdir = debugfsdir; + local->debugfs.rcname = debugfs_create_file("name", 0400, debugfsdir, + ref, &rcname_ops); +#endif + + ref->priv = ref->ops->alloc(&local->hw, debugfsdir); if (!ref->priv) goto fail_priv; return ref; @@ -158,29 +186,46 @@ static void rate_control_release(struct kref *kref) ctrl_ref = container_of(kref, struct rate_control_ref, kref); ctrl_ref->ops->free(ctrl_ref->priv); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfs_remove(ctrl_ref->local->debugfs.rcname); + ctrl_ref->local->debugfs.rcname = NULL; + debugfs_remove(ctrl_ref->local->debugfs.rcdir); + ctrl_ref->local->debugfs.rcdir = NULL; +#endif + ieee80211_rate_control_ops_put(ctrl_ref->ops); kfree(ctrl_ref); } -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct rate_control_ref *ref = local->rate_ctrl; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta; + struct rate_control_ref *ref = sdata->local->rate_ctrl; + void *priv_sta = NULL; + struct ieee80211_sta *ista = NULL; int i; - rcu_read_lock(); - sta = sta_info_get(local, hdr->addr1); - sel->rate_idx = -1; sel->nonerp_idx = -1; sel->probe_idx = -1; + sel->max_rate_idx = sdata->max_ratectrl_rateidx; + + if (sta) { + ista = &sta->sta; + priv_sta = sta->rate_ctrl_priv; + } + + if (sta && sdata->force_unicast_rateidx > -1) + sel->rate_idx = sdata->force_unicast_rateidx; + else + ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel); - ref->ops->get_rate(ref->priv, dev, sband, skb, sel); + if (sdata->max_ratectrl_rateidx > -1 && + sel->rate_idx > sdata->max_ratectrl_rateidx) + sel->rate_idx = sdata->max_ratectrl_rateidx; BUG_ON(sel->rate_idx < 0); @@ -191,13 +236,11 @@ void rate_control_get_rate(struct net_device *dev, if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) break; - if (rate_supported(sta, sband->band, i) && + if (rate_supported(ista, sband->band, i) && !(rate->flags & IEEE80211_RATE_ERP_G)) sel->nonerp_idx = i; } } - - rcu_read_unlock(); } struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5f18c27eb90..eb94e584d24 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -19,77 +19,48 @@ #include "ieee80211_i.h" #include "sta_info.h" -/** - * struct rate_selection - rate selection for rate control algos - * @rate: selected transmission rate index - * @nonerp: Non-ERP rate to use instead if ERP cannot be used - * @probe: rate for probing (or -1) - * - */ -struct rate_selection { - s8 rate_idx, nonerp_idx, probe_idx; -}; - -struct rate_control_ops { - struct module *module; - const char *name; - void (*tx_status)(void *priv, struct net_device *dev, - struct sk_buff *skb); - void (*get_rate)(void *priv, struct net_device *dev, - struct ieee80211_supported_band *band, - struct sk_buff *skb, - struct rate_selection *sel); - void (*rate_init)(void *priv, void *priv_sta, - struct ieee80211_local *local, struct sta_info *sta); - void (*clear)(void *priv); - - void *(*alloc)(struct ieee80211_local *local); - void (*free)(void *priv); - void *(*alloc_sta)(void *priv, gfp_t gfp); - void (*free_sta)(void *priv, void *priv_sta); - - int (*add_attrs)(void *priv, struct kobject *kobj); - void (*remove_attrs)(void *priv, struct kobject *kobj); - void (*add_sta_debugfs)(void *priv, void *priv_sta, - struct dentry *dir); - void (*remove_sta_debugfs)(void *priv, void *priv_sta); -}; - struct rate_control_ref { + struct ieee80211_local *local; struct rate_control_ops *ops; void *priv; struct kref kref; }; -int ieee80211_rate_control_register(struct rate_control_ops *ops); -void ieee80211_rate_control_unregister(struct rate_control_ops *ops); - /* Get a reference to the rate control algorithm. If `name' is NULL, get the * first available algorithm. */ struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local); -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel); struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); -static inline void rate_control_tx_status(struct net_device *dev, +static inline void rate_control_tx_status(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct rate_control_ref *ref = local->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; - ref->ops->tx_status(ref->priv, dev, skb); + ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); } -static inline void rate_control_rate_init(struct sta_info *sta, - struct ieee80211_local *local) +static inline void rate_control_rate_init(struct sta_info *sta) { + struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; - ref->ops->rate_init(ref->priv, sta->rate_ctrl_priv, local, sta); + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + ref->ops->rate_init(ref->priv, sband, ista, priv_sta); } @@ -100,15 +71,19 @@ static inline void rate_control_clear(struct ieee80211_local *local) } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, + struct ieee80211_sta *sta, gfp_t gfp) { - return ref->ops->alloc_sta(ref->priv, gfp); + return ref->ops->alloc_sta(ref->priv, sta, gfp); } -static inline void rate_control_free_sta(struct rate_control_ref *ref, - void *priv) +static inline void rate_control_free_sta(struct sta_info *sta) { - ref->ops->free_sta(ref->priv, priv); + struct rate_control_ref *ref = sta->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + + ref->ops->free_sta(ref->priv, ista, priv_sta); } static inline void rate_control_add_sta_debugfs(struct sta_info *sta) @@ -130,31 +105,6 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } -static inline int rate_supported(struct sta_info *sta, - enum ieee80211_band band, - int index) -{ - return (sta == NULL || sta->sta.supp_rates[band] & BIT(index)); -} - -static inline s8 -rate_lowest_index(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta) -{ - int i; - - for (i = 0; i < sband->n_bitrates; i++) - if (rate_supported(sta, sband->band, i)) - return i; - - /* warn when we cannot find a rate. */ - WARN_ON(1); - - return 0; -} - - /* functions for rate control related to a device */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name); diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index ffafc5da572..01d64d53f3b 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -124,7 +124,6 @@ struct rc_pid_events_file_info { * struct rc_pid_debugfs_entries - tunable parameters * * Algorithm parameters, tunable via debugfs. - * @dir: the debugfs directory for a specific phy * @target: target percentage for failed frames * @sampling_period: error sampling interval in milliseconds * @coeff_p: absolute value of the proportional coefficient @@ -143,7 +142,6 @@ struct rc_pid_events_file_info { * ordering of rates) */ struct rc_pid_debugfs_entries { - struct dentry *dir; struct dentry *target; struct dentry *sampling_period; struct dentry *coeff_p; diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index bc1c4569caa..86eb374e3b8 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -68,18 +68,14 @@ * exhibited a worse failed frames behaviour and we'll choose the highest rate * whose failed frames behaviour is not worse than the one of the original rate * target. While at it, check that the new rate is valid. */ -static void rate_control_pid_adjust_rate(struct ieee80211_local *local, - struct sta_info *sta, int adj, +static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo, int adj, struct rc_pid_rateinfo *rinfo) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_supported_band *sband; int cur_sorted, new_sorted, probe, tmp, n_bitrates, band; - struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv; int cur = spinfo->txrate_idx; - sdata = sta->sdata; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; band = sband->band; n_bitrates = sband->n_bitrates; @@ -146,13 +142,11 @@ static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l) } static void rate_control_pid_sample(struct rc_pid_info *pinfo, - struct ieee80211_local *local, - struct sta_info *sta) + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo) { - struct ieee80211_sub_if_data *sdata = sta->sdata; - struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv; struct rc_pid_rateinfo *rinfo = pinfo->rinfo; - struct ieee80211_supported_band *sband; u32 pf; s32 err_avg; u32 err_prop; @@ -161,9 +155,6 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, int adj, i, j, tmp; unsigned long period; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - spinfo = sta->rate_ctrl_priv; - /* In case nothing happened during the previous control interval, turn * the sharpening factor on. */ period = (HZ * pinfo->sampling_period + 500) / 1000; @@ -179,11 +170,15 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, if (unlikely(spinfo->tx_num_xmit == 0)) pf = spinfo->last_pf; else { + /* XXX: BAD HACK!!! */ + struct sta_info *si = container_of(sta, struct sta_info, sta); + pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit; - if (ieee80211_vif_is_mesh(&sdata->vif) && pf == 100) - mesh_plink_broken(sta); + + if (ieee80211_vif_is_mesh(&si->sdata->vif) && pf == 100) + mesh_plink_broken(si); pf <<= RC_PID_ARITH_SHIFT; - sta->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) + si->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) >> RC_PID_ARITH_SHIFT; } @@ -229,43 +224,25 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, /* Change rate. */ if (adj) - rate_control_pid_adjust_rate(local, sta, adj, rinfo); + rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo); } -static void rate_control_pid_tx_status(void *priv, struct net_device *dev, +static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; struct rc_pid_info *pinfo = priv; - struct sta_info *sta; - struct rc_pid_sta_info *spinfo; + struct rc_pid_sta_info *spinfo = priv_sta; unsigned long period; - struct ieee80211_supported_band *sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - if (!sta) - goto unlock; - - spinfo = sta->rate_ctrl_priv; - - /* Don't update the state if we're not controlling the rate. */ - sdata = sta->sdata; - if (sdata->force_unicast_rateidx > -1) { - spinfo->txrate_idx = sdata->max_ratectrl_rateidx; - goto unlock; - } + if (!spinfo) + return; /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ if (info->tx_rate_idx != spinfo->txrate_idx) - goto unlock; + return; spinfo->tx_num_xmit++; @@ -289,78 +266,63 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev, if (!period) period = 1; if (time_after(jiffies, spinfo->last_sample + period)) - rate_control_pid_sample(pinfo, local, sta); - - unlock: - rcu_read_unlock(); + rate_control_pid_sample(pinfo, sband, sta, spinfo); } -static void rate_control_pid_get_rate(void *priv, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void +rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, + struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; - struct rc_pid_sta_info *spinfo; - struct sta_info *sta; + struct rc_pid_sta_info *spinfo = priv_sta; int rateidx; u16 fc; - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); - if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - is_multicast_ether_addr(hdr->addr1) || !sta) { - sel->rate_idx = rate_lowest_index(local, sband, sta); - rcu_read_unlock(); + if (!sta || !spinfo || + (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + is_multicast_ether_addr(hdr->addr1)) { + sel->rate_idx = rate_lowest_index(sband, sta); return; } - /* If a forced rate is in effect, select it. */ - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - spinfo = (struct rc_pid_sta_info *)sta->rate_ctrl_priv; - if (sdata->force_unicast_rateidx > -1) - spinfo->txrate_idx = sdata->force_unicast_rateidx; - rateidx = spinfo->txrate_idx; if (rateidx >= sband->n_bitrates) rateidx = sband->n_bitrates - 1; - rcu_read_unlock(); - sel->rate_idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_rate( - &((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events, + rate_control_pid_event_tx_rate(&spinfo->events, rateidx, sband->bitrates[rateidx].bitrate); #endif } -static void rate_control_pid_rate_init(void *priv, void *priv_sta, - struct ieee80211_local *local, - struct sta_info *sta) +static void +rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { + struct rc_pid_sta_info *spinfo = priv_sta; + struct sta_info *si; + /* TODO: This routine should consider using RSSI from previous packets * as we need to have IEEE 802.1X auth succeed immediately after assoc.. * Until that method is implemented, we will use the lowest supported * rate as a workaround. */ - struct ieee80211_supported_band *sband; - struct rc_pid_sta_info *spinfo = (void *)sta->rate_ctrl_priv; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - spinfo->txrate_idx = rate_lowest_index(local, sband, sta); - sta->fail_avg = 0; + spinfo->txrate_idx = rate_lowest_index(sband, sta); + /* HACK */ + si = container_of(sta, struct sta_info, sta); + si->fail_avg = 0; } -static void *rate_control_pid_alloc(struct ieee80211_local *local) +static void *rate_control_pid_alloc(struct ieee80211_hw *hw, + struct dentry *debugfsdir) { struct rc_pid_info *pinfo; struct rc_pid_rateinfo *rinfo; @@ -371,7 +333,7 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) struct rc_pid_debugfs_entries *de; #endif - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = hw->wiphy->bands[hw->conf.channel->band]; pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); if (!pinfo) @@ -426,30 +388,28 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; - de->dir = debugfs_create_dir("rc80211_pid", - local->hw.wiphy->debugfsdir); de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, - de->dir, &pinfo->target); + debugfsdir, &pinfo->target); de->sampling_period = debugfs_create_u32("sampling_period", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sampling_period); de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_p); + debugfsdir, &pinfo->coeff_p); de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_i); + debugfsdir, &pinfo->coeff_i); de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_d); + debugfsdir, &pinfo->coeff_d); de->smoothing_shift = debugfs_create_u32("smoothing_shift", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->smoothing_shift); de->sharpen_factor = debugfs_create_u32("sharpen_factor", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_factor); de->sharpen_duration = debugfs_create_u32("sharpen_duration", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_duration); de->norm_offset = debugfs_create_u32("norm_offset", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->norm_offset); #endif @@ -471,7 +431,6 @@ static void rate_control_pid_free(void *priv) debugfs_remove(de->coeff_p); debugfs_remove(de->sampling_period); debugfs_remove(de->target); - debugfs_remove(de->dir); #endif kfree(pinfo->rinfo); @@ -482,7 +441,8 @@ static void rate_control_pid_clear(void *priv) { } -static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) +static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, + gfp_t gfp) { struct rc_pid_sta_info *spinfo; @@ -500,10 +460,10 @@ static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) return spinfo; } -static void rate_control_pid_free_sta(void *priv, void *priv_sta) +static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta, + void *priv_sta) { - struct rc_pid_sta_info *spinfo = priv_sta; - kfree(spinfo); + kfree(priv_sta); } static struct rate_control_ops mac80211_rcpid = { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d9774ac2e0f..9b72d15bc8d 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -93,8 +93,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, } /* protected by RCU */ -static struct sta_info *__sta_info_find(struct ieee80211_local *local, - const u8 *addr) +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) { struct sta_info *sta; @@ -107,12 +106,6 @@ static struct sta_info *__sta_info_find(struct ieee80211_local *local, return sta; } -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) -{ - return __sta_info_find(local, addr); -} -EXPORT_SYMBOL(sta_info_get); - struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, struct net_device *dev) { @@ -146,7 +139,7 @@ static void __sta_info_free(struct ieee80211_local *local, { DECLARE_MAC_BUF(mbuf); - rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); + rate_control_free_sta(sta); rate_control_put(sta->rate_ctrl); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -244,7 +237,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->rate_ctrl = rate_control_get(local->rate_ctrl); sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - gfp); + &sta->sta, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); kfree(sta); @@ -308,7 +301,7 @@ int sta_info_insert(struct sta_info *sta) spin_lock_irqsave(&local->sta_lock, flags); /* check if STA exists already */ - if (__sta_info_find(local, sta->sta.addr)) { + if (sta_info_get(local, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); err = -EEXIST; goto out_free; @@ -834,7 +827,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, const u8 *addr) { - struct sta_info *sta = __sta_info_find(hw_to_local(hw), addr); + struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); if (!sta) return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index daedfa9e1c6..c3f43696462 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -416,7 +416,7 @@ static inline u32 get_sta_flags(struct sta_info *sta) /* * Get a STA info, must have be under RCU read lock. */ -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr); +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr); /* * Get STA info by index, BROKEN! */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 00d96e63dce..0cc2e23f082 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -446,7 +446,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; if (likely(tx->rate_idx < 0)) { - rate_control_get_rate(tx->dev, sband, tx->skb, &rsel); + rate_control_get_rate(tx->sdata, sband, tx->sta, + tx->skb, &rsel); if (tx->sta) tx->sta->last_txrate_idx = rsel.rate_idx; tx->rate_idx = rsel.rate_idx; @@ -1955,7 +1956,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, skb->do_not_encrypt = 1; info->band = band; - rate_control_get_rate(local->mdev, sband, skb, &rsel); + rate_control_get_rate(sdata, sband, NULL, skb, &rsel); if (unlikely(rsel.rate_idx < 0)) { if (net_ratelimit()) { -- cgit v1.2.3-70-g09d2 From 72029fe85d8d060b3f966f2dbc36b3c75b5a6532 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Sep 2008 16:22:23 -0500 Subject: 9p: implement proper trans module refcounting and unregistration 9p trans modules aren't refcounted nor were they unregistered properly. Fix it. * Add 9p_trans_module->owner and reference the module on each trans instance creation and put it on destruction. * Protect v9fs_trans_list with a spinlock. This isn't strictly necessary as the list is manipulated only during module loading / unloading but it's a good idea to make the API safe. * Unregister trans modules when the corresponding module is being unloaded. * While at it, kill unnecessary EXPORT_SYMBOL on p9_trans_fd_init(). Signed-off-by: Tejun Heo Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 1 + include/net/9p/transport.h | 9 ++++- net/9p/client.c | 10 ++++- net/9p/mod.c | 92 +++++++++++++++++++++++++++++++++------------- net/9p/trans_fd.c | 11 +++++- net/9p/trans_virtio.c | 2 + 6 files changed, 95 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index b3d3e27c629..c3626c0ba9d 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -596,4 +596,5 @@ int p9_idpool_check(int id, struct p9_idpool *p); int p9_error_init(void); int p9_errstr2errno(char *, int); int p9_trans_fd_init(void); +void p9_trans_fd_exit(void); #endif /* NET_9P_H */ diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 0db3a4038dc..3ca737120a9 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -26,6 +26,8 @@ #ifndef NET_9P_TRANSPORT_H #define NET_9P_TRANSPORT_H +#include + /** * enum p9_trans_status - different states of underlying transports * @Connected: transport is connected and healthy @@ -91,9 +93,12 @@ struct p9_trans_module { int maxsize; /* max message size of transport */ int def; /* this transport should be default */ struct p9_trans * (*create)(const char *, char *, int, unsigned char); + struct module *owner; }; void v9fs_register_trans(struct p9_trans_module *m); -struct p9_trans_module *v9fs_match_trans(const substring_t *name); -struct p9_trans_module *v9fs_default_trans(void); +void v9fs_unregister_trans(struct p9_trans_module *m); +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name); +struct p9_trans_module *v9fs_get_default_trans(void); +void v9fs_put_trans(struct p9_trans_module *m); #endif /* NET_9P_TRANSPORT_H */ diff --git a/net/9p/client.c b/net/9p/client.c index 2ffe40cf2f0..10e320307ec 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; @@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_match_trans(&args[0]); + clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt) continue; } } + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + kfree(options); return ret; } @@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans_mod = NULL; clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); @@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt) clnt->trans = NULL; } + v9fs_put_trans(clnt->trans_mod); + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) p9_fid_destroy(fid); diff --git a/net/9p/mod.c b/net/9p/mod.c index bdee1fb7cc6..1084feb24cb 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level"); * */ +static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); -static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p @@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport; */ void v9fs_register_trans(struct p9_trans_module *m) { + spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); - if (m->def) - v9fs_default_transport = m; + spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** - * v9fs_match_trans - match transport versus registered transports + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove + * + */ +void v9fs_unregister_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_unregister_trans); + +/** + * v9fs_get_trans_by_name - get transport with the matching name * @name: string identifying transport * */ -struct p9_trans_module *v9fs_match_trans(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) { - struct list_head *p; - struct p9_trans_module *t = NULL; - - list_for_each(p, &v9fs_trans_list) { - t = list_entry(p, struct p9_trans_module, list); - if (strncmp(t->name, name->from, name->to-name->from) == 0) - return t; - } - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strncmp(t->name, name->from, name->to-name->from) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_match_trans); +EXPORT_SYMBOL(v9fs_get_trans_by_name); /** - * v9fs_default_trans - returns pointer to default transport + * v9fs_get_default_trans - get the default transport * */ -struct p9_trans_module *v9fs_default_trans(void) +struct p9_trans_module *v9fs_get_default_trans(void) { - if (v9fs_default_transport) - return v9fs_default_transport; - else if (!list_empty(&v9fs_trans_list)) - return list_first_entry(&v9fs_trans_list, - struct p9_trans_module, list); - else - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_default_trans); +EXPORT_SYMBOL(v9fs_get_default_trans); +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} /** * v9fs_init - Initialize module @@ -120,6 +160,8 @@ static int __init init_p9(void) static void __exit exit_p9(void) { printk(KERN_INFO "Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); } module_init(init_p9) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index cdf137af7ad..6a32ffdb942 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1629,6 +1629,7 @@ static struct p9_trans_module p9_tcp_trans = { .maxsize = MAX_SOCK_BUF, .def = 1, .create = p9_trans_create_tcp, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { @@ -1636,6 +1637,7 @@ static struct p9_trans_module p9_unix_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_unix, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { @@ -1643,6 +1645,7 @@ static struct p9_trans_module p9_fd_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_fd, + .owner = THIS_MODULE, }; int p9_trans_fd_init(void) @@ -1659,4 +1662,10 @@ int p9_trans_fd_init(void) return 0; } -EXPORT_SYMBOL(p9_trans_fd_init); + +void p9_trans_fd_exit(void) +{ + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); +} diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 42adc052b14..94912e077a5 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .maxsize = PAGE_SIZE*16, .def = 0, + .owner = THIS_MODULE, }; /* The standard init function */ @@ -545,6 +546,7 @@ static int __init p9_virtio_init(void) static void __exit p9_virtio_cleanup(void) { unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); } module_init(p9_virtio_init); -- cgit v1.2.3-70-g09d2 From ba0166708ef4da7eeb61dd92bbba4d5a749d6561 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 30 Sep 2008 05:32:24 -0700 Subject: sctp: Fix kernel panic while process protocol violation parameter Since call to function sctp_sf_abort_violation() need paramter 'arg' with 'struct sctp_chunk' type, it will read the chunk type and chunk length from the chunk_hdr member of chunk. But call to sctp_sf_violation_paramlen() always with 'struct sctp_paramhdr' type's parameter, it will be passed to sctp_sf_abort_violation(). This may cause kernel panic. sctp_sf_violation_paramlen() |-- sctp_sf_abort_violation() |-- sctp_make_abort_violation() This patch fixed this problem. This patch also fix two place which called sctp_sf_violation_paramlen() with wrong paramter type. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 3 +++ net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++------------- net/sctp/sm_statefuns.c | 48 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 64 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 24811732bdb..029a54a0239 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -227,6 +227,9 @@ struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *, const struct sctp_chunk *, const __u8 *, const size_t ); +struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *, + const struct sctp_chunk *, + struct sctp_paramhdr *); struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *, const struct sctp_transport *, const void *payload, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b599cbba4fb..d68869f966c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1012,6 +1012,29 @@ end: return retval; } +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param) +{ + struct sctp_chunk *retval; + static const char error[] = "The following parameter had invalid length:"; + size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + sizeof(sctp_paramhdr_t); + + retval = sctp_make_abort(asoc, chunk, payload_len); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, + sizeof(error) + sizeof(sctp_paramhdr_t)); + sctp_addto_chunk(retval, sizeof(error), error); + sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, @@ -1782,11 +1805,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_chunk **errp) { - static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = WORD_ROUND(sizeof(error)) + - sizeof(sctp_paramhdr_t); - - /* This is a fatal error. Any accumulated non-fatal errors are * not reported. */ @@ -1794,14 +1812,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, sctp_chunk_free(*errp); /* Create an error chunk and fill it in with our payload. */ - *errp = sctp_make_op_error_space(asoc, chunk, payload_len); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); - sctp_addto_chunk(*errp, sizeof(error), error); - sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param); - } + *errp = sctp_make_violation_paramlen(asoc, chunk, param); return 0; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8848d329aa2..7c622af2ce5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, + void *arg, void *ext, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( @@ -3425,7 +3425,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, + return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3433,8 +3433,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value * the endpoint stored in a new association variable @@ -3542,8 +3542,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); if (last_asconf) { addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; @@ -4240,12 +4240,38 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) { - static const char err_str[] = "The following parameter had invalid length:"; + void *arg, void *ext, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + struct sctp_paramhdr *param = ext; + struct sctp_chunk *abort = NULL; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, - sizeof(err_str)); + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + + /* Make the abort chunk. */ + abort = sctp_make_violation_paramlen(asoc, chunk, param); + if (!abort) + goto nomem; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + + return SCTP_DISPOSITION_ABORT; +nomem: + return SCTP_DISPOSITION_NOMEM; } /* Handle a protocol violation when the peer trying to advance the -- cgit v1.2.3-70-g09d2 From 55ad175fb65a4a3a7e4d1aa13c460de281b4e8ac Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 29 Sep 2008 16:28:21 -0400 Subject: ieee80211.h: remove superfluous ETH_P_PAE definition Signed-off-by: John W. Linville --- include/net/ieee80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index b31399e1fd8..6048579d0b2 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -190,10 +190,6 @@ const char *escape_essid(const char *essid, u8 essid_len); #endif #include /* new driver API */ -#ifndef ETH_P_PAE -#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ -#endif /* ETH_P_PAE */ - #define ETH_P_PREAUTH 0x88C7 /* IEEE 802.11i pre-authentication */ #ifndef ETH_P_80211_RAW -- cgit v1.2.3-70-g09d2 From 93c8b90f01f0dc73891da4e84b26524b61d29d66 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Wed, 1 Oct 2008 02:48:31 -0700 Subject: ipv6: almost identical frag hashing funcs combined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ diff-funcs ip6qhashfn reassembly.c netfilter/nf_conntrack_reasm.c --- reassembly.c:ip6qhashfn() +++ netfilter/nf_conntrack_reasm.c:ip6qhashfn() @@ -1,5 +1,5 @@ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { u32 a, b, c; @@ -9,7 +9,7 @@ a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += nf_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; And codiff xx.o.old xx.o.new: net/ipv6/netfilter/nf_conntrack_reasm.c: ip6qhashfn | -512 nf_hashfn | +6 nf_ct_frag6_gather | +36 3 functions changed, 42 bytes added, 512 bytes removed, diff: -470 net/ipv6/reassembly.c: ip6qhashfn | -512 ip6_hashfn | +7 ipv6_frag_rcv | +89 3 functions changed, 96 bytes added, 512 bytes removed, diff: -416 net/ipv6/reassembly.c: inet6_hash_frag | +510 1 function changed, 510 bytes added, diff: +510 Total: -376 Compile tested. Signed-off-by: Ilpo Järvinen Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 32 ++------------------------------ net/ipv6/reassembly.c | 11 ++++++----- 3 files changed, 10 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 113028fb8f6..dfa7ae3c560 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -576,6 +576,8 @@ extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct group_filter __user *optval, int __user *optlen); +extern unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr, u32 rnd); #ifdef CONFIG_PROC_FS extern int ac6_proc_init(struct net *net); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 52d06dd4b81..9967ac7a01a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -103,39 +102,12 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { }; #endif -static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += nf_frags.rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); - - return c & (INETFRAGS_HASHSZ - 1); -} - static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct nf_ct_frag6_queue *nq; nq = container_of(q, struct nf_ct_frag6_queue, q); - return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); + return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } static void nf_skb_free(struct sk_buff *skb) @@ -209,7 +181,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; read_lock_bh(&nf_frags.lock); - hash = ip6qhashfn(id, src, dst); + hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 89184b576e2..2eeadfa039c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -99,8 +99,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr, u32 rnd) { u32 a, b, c; @@ -110,7 +110,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -125,13 +125,14 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, return c & (INETFRAGS_HASHSZ - 1); } +EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); } int ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -247,7 +248,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.dst = dst; read_lock(&ip6_frags.lock); - hash = ip6qhashfn(id, src, dst); + hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) -- cgit v1.2.3-70-g09d2 From 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Oct 2008 07:03:24 -0700 Subject: ipsec: Put dumpers on the dump list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Herbert Xu came up with the idea and the original patch to make xfrm_state dump list contain also dumpers: As it is we go to extraordinary lengths to ensure that states don't go away while dumpers go to sleep. It's much easier if we just put the dumpers themselves on the list since they can't go away while they're going. I've also changed the order of addition on new states to prevent a never-ending dump. Timo Teräs improved the patch to apply cleanly to latest tree, modified iteration code to be more readable by using a common struct for entries in the list, implemented the same idea for xfrm_policy dumping and moved the af_key specific "last" entry caching to af_key. Signed-off-by: Herbert Xu Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/net/xfrm.h | 70 ++++++++++++------------------ net/key/af_key.c | 38 ++++++++++++++--- net/xfrm/xfrm_policy.c | 111 +++++++++++++++++++++++++----------------------- net/xfrm/xfrm_state.c | 109 ++++++++++++++++------------------------------- net/xfrm/xfrm_user.c | 4 +- 6 files changed, 159 insertions(+), 175 deletions(-) (limited to 'include/net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cbba7760545..9ff1b54908f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -220,7 +220,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[7]; + long args[6]; }; struct netlink_notify diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 48630b26659..b98d2056f27 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -117,12 +117,21 @@ extern struct mutex xfrm_cfg_mutex; metrics. Plus, it will be made via sk->sk_dst_cache. Solved. */ +struct xfrm_state_walk { + struct list_head all; + u8 state; + union { + u8 dying; + u8 proto; + }; + u32 seq; +}; + /* Full description of state of transformer. */ struct xfrm_state { - struct list_head all; union { - struct list_head gclist; + struct hlist_node gclist; struct hlist_node bydst; }; struct hlist_node bysrc; @@ -136,12 +145,8 @@ struct xfrm_state u32 genid; - /* Key manger bits */ - struct { - u8 state; - u8 dying; - u32 seq; - } km; + /* Key manager bits */ + struct xfrm_state_walk km; /* Parameters of this state. */ struct { @@ -449,10 +454,20 @@ struct xfrm_tmpl #define XFRM_MAX_DEPTH 6 +struct xfrm_policy_walk_entry { + struct list_head all; + u8 dead; +}; + +struct xfrm_policy_walk { + struct xfrm_policy_walk_entry walk; + u8 type; + u32 seq; +}; + struct xfrm_policy { struct xfrm_policy *next; - struct list_head bytype; struct hlist_node bydst; struct hlist_node byidx; @@ -467,13 +482,12 @@ struct xfrm_policy struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; struct dst_entry *bundles; - u16 family; + struct xfrm_policy_walk_entry walk; u8 type; u8 action; u8 flags; - u8 dead; u8 xfrm_nr; - /* XXX 1 byte hole, try to pack */ + u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; @@ -1245,20 +1259,6 @@ struct xfrm6_tunnel { int priority; }; -struct xfrm_state_walk { - struct list_head list; - unsigned long genid; - struct xfrm_state *state; - int count; - u8 proto; -}; - -struct xfrm_policy_walk { - struct xfrm_policy *policy; - int count; - u8 type, cur_type; -}; - extern void xfrm_init(void); extern void xfrm4_init(void); extern void xfrm_state_init(void); @@ -1410,24 +1410,10 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -static inline void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) -{ - walk->cur_type = XFRM_POLICY_TYPE_MAIN; - walk->type = type; - walk->policy = NULL; - walk->count = 0; -} - -static inline void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) -{ - if (walk->policy != NULL) { - xfrm_pol_put(walk->policy); - walk->policy = NULL; - } -} - +extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, diff --git a/net/key/af_key.c b/net/key/af_key.c index b7f5a1c353e..7ae641df70b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -58,6 +58,7 @@ struct pfkey_sock { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; + struct sk_buff *skb; } dump; }; @@ -76,6 +77,10 @@ static int pfkey_can_dump(struct sock *sk) static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { + if (pfk->dump.skb) { + kfree_skb(pfk->dump.skb); + pfk->dump.skb = NULL; + } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; @@ -308,12 +313,25 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, static int pfkey_do_dump(struct pfkey_sock *pfk) { + struct sadb_msg *hdr; int rc; rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) return 0; + if (pfk->dump.skb) { + if (!pfkey_can_dump(&pfk->sk)) + return 0; + + hdr = (struct sadb_msg *) pfk->dump.skb->data; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_errno = rc; + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = NULL; + } + pfkey_terminate_dump(pfk); return rc; } @@ -1744,9 +1762,14 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -2245,7 +2268,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } @@ -2583,9 +2606,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ef9ccbc3875..b7ec08025ff 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; +static struct list_head xfrm_policy_all; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -164,7 +164,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (xp->walk.dead) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -236,7 +236,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - INIT_LIST_HEAD(&policy->bytype); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -252,17 +252,13 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); BUG_ON(policy->bundles); if (del_timer(&policy->timer)) BUG(); - write_lock_bh(&xfrm_policy_lock); - list_del(&policy->bytype); - write_unlock_bh(&xfrm_policy_lock); - security_xfrm_policy_free(policy->security); kfree(policy); } @@ -310,8 +306,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) int dead; write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; + dead = policy->walk.dead; + policy->walk.dead = 1; write_unlock_bh(&policy->lock); if (unlikely(dead)) { @@ -609,6 +605,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); + list_del(&delpol->walk.all); xfrm_policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); @@ -617,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); + list_add(&policy->walk.all, &xfrm_policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -684,6 +681,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -727,6 +725,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -840,6 +839,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, @@ -867,60 +867,68 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *old, *pol, *last = NULL; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; - if (walk->policy == NULL && walk->count != 0) + if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - old = pol = walk->policy; - walk->policy = NULL; - read_lock_bh(&xfrm_policy_lock); - - for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { - if (walk->type != walk->cur_type && - walk->type != XFRM_POLICY_TYPE_ANY) + write_lock_bh(&xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &xfrm_policy_all, all) { + if (x->dead) continue; - - if (pol == NULL) { - pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], - struct xfrm_policy, bytype); - } - list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { - if (pol->dead) - continue; - if (last) { - error = func(last, xfrm_policy_id2dir(last->index), - walk->count, data); - if (error) { - xfrm_pol_hold(last); - walk->policy = last; - goto out; - } - } - last = pol; - walk->count++; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } - pol = NULL; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - if (last) - error = func(last, xfrm_policy_id2dir(last->index), 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); - if (old != NULL) - xfrm_pol_put(old); + write_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&xfrm_policy_lock); + list_del(&walk->walk.all); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * @@ -1077,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) struct hlist_head *chain = policy_hash_bysel(&pol->selector, pol->family, dir); - list_add_tail(&pol->bytype, &xfrm_policy_bytype[pol->type]); + list_add(&pol->walk.all, &xfrm_policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; @@ -1095,6 +1103,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; return pol; @@ -1720,7 +1729,7 @@ restart: for (pi = 0; pi < npols; pi++) { read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; + pol_dead |= pols[pi]->walk.dead; read_unlock_bh(&pols[pi]->lock); } @@ -2415,9 +2424,7 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } - for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) - INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); - + INIT_LIST_HEAD(&xfrm_policy_all); INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } @@ -2601,7 +2608,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 053970e8765..747fd8c291a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -/* Counter indicating ongoing walk, protected by xfrm_state_lock. */ -static unsigned long xfrm_state_walk_ongoing; -/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */ -static unsigned long xfrm_state_walk_completed; - -/* List of outstanding state walks used to set the completed counter. */ -static LIST_HEAD(xfrm_state_walks); - static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); @@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static LIST_HEAD(xfrm_state_gc_leftovers); -static LIST_HEAD(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -412,23 +403,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *data) { - struct xfrm_state *x, *tmp; - unsigned long completed; + struct xfrm_state *x; + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; - mutex_lock(&xfrm_cfg_mutex); spin_lock_bh(&xfrm_state_gc_lock); - list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - completed = xfrm_state_walk_completed; - mutex_unlock(&xfrm_cfg_mutex); - - list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) { - if ((long)(x->lastused - completed) > 0) - break; - list_del(&x->gclist); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - } wake_up(&km_waitq); } @@ -529,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->all); + INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -556,7 +540,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add_tail(&x->gclist, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -569,8 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - x->lastused = xfrm_state_walk_ongoing; - list_del_rcu(&x->all); + list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -871,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -940,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); @@ -1069,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1566,79 +1549,59 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { - struct xfrm_state *old, *x, *last = NULL; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - if (walk->state == NULL && walk->count != 0) + if (walk->seq != 0 && list_empty(&walk->all)) return 0; - old = x = walk->state; - walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - if (x == NULL) - x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + if (list_empty(&walk->all)) + x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); list_for_each_entry_from(x, &xfrm_state_all, all) { - if (x->km.state == XFRM_STATE_DEAD) + if (x->state == XFRM_STATE_DEAD) continue; - if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; - if (last) { - err = func(last, walk->count, data); - if (err) { - xfrm_state_hold(last); - walk->state = last; - goto out; - } + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } - last = x; - walk->count++; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - if (last) - err = func(last, 0, data); + list_del_init(&walk->all); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) - xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) { + INIT_LIST_HEAD(&walk->all); walk->proto = proto; - walk->state = NULL; - walk->count = 0; - list_add_tail(&walk->list, &xfrm_state_walks); - walk->genid = ++xfrm_state_walk_ongoing; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; } EXPORT_SYMBOL(xfrm_state_walk_init); void xfrm_state_walk_done(struct xfrm_state_walk *walk) { - struct list_head *prev; - - if (walk->state != NULL) { - xfrm_state_put(walk->state); - walk->state = NULL; - } - - prev = walk->list.prev; - list_del(&walk->list); - - if (prev != &xfrm_state_walks) { - list_entry(prev, struct xfrm_state_walk, list)->genid = - walk->genid; + if (list_empty(&walk->all)) return; - } - - xfrm_state_walk_completed = walk->genid; - if (!list_empty(&xfrm_state_gc_leftovers)) - schedule_work(&xfrm_state_gc_work); + spin_lock_bh(&xfrm_state_lock); + list_del(&walk->all); + spin_lock_bh(&xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_walk_done); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 04c41504f84..76f75df21e1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1102,7 +1102,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return xp; error: *errp = err; - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -1595,7 +1595,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOENT; read_lock(&xp->lock); - if (xp->dead) { + if (xp->walk.dead) { read_unlock(&xp->lock); goto out; } -- cgit v1.2.3-70-g09d2 From a210d01ae3ee006b59e54e772a7f212486e0f021 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 1 Oct 2008 07:28:28 -0700 Subject: ipv4: Loosen source address check on IPv4 output ip_route_output() contains a check to make sure that no flows with non-local source IP addresses are routed. This obviously makes using such addresses impossible. This patch introduces a flowi flag which makes omitting this check possible. The new flag provides a way of handling transparent and non-transparent connections differently. Signed-off-by: Julian Anastasov Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/flow.h | 2 ++ net/ipv4/route.c | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 228b2477cee..b45a5e4fcad 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -47,6 +47,8 @@ struct flowi { #define fl4_scope nl_u.ip4_u.scope __u8 proto; + __u8 flags; +#define FLOWI_FLAG_ANYSRC 0x01 union { struct { __be16 sport; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f62187bb6d0..a6d7c584f53 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2361,11 +2361,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, ipv4_is_zeronet(oldflp->fl4_src)) goto out; - /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(net, oldflp->fl4_src); - if (dev_out == NULL) - goto out; - /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(net, saddr) can return wrong iface, if saddr @@ -2377,6 +2372,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2395,9 +2395,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, fl.oif = dev_out->ifindex; goto make_route; } - if (dev_out) + + if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; dev_put(dev_out); - dev_out = NULL; + dev_out = NULL; + } } -- cgit v1.2.3-70-g09d2 From f5715aea4564f233767ea1d944b2637a5fd7cd2e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:30:02 -0700 Subject: ipv4: Implement IP_TRANSPARENT socket option This patch introduces the IP_TRANSPARENT socket option: enabling that will make the IPv4 routing omit the non-local source address check on output. Setting IP_TRANSPARENT requires NET_ADMIN capability. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/linux/in.h | 1 + include/net/inet_sock.h | 3 ++- include/net/inet_timewait_sock.h | 3 ++- net/ipv4/inet_timewait_sock.c | 1 + net/ipv4/ip_sockglue.c | 15 ++++++++++++++- 5 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/linux/in.h b/include/linux/in.h index 4065313cd7e..db458beef19 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -75,6 +75,7 @@ struct in_addr { #define IP_IPSEC_POLICY 16 #define IP_XFRM_POLICY 17 #define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 643e26be058..e97b66e2a9d 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -129,7 +129,8 @@ struct inet_sock { is_icsk:1, freebind:1, hdrincl:1, - mc_loop:1; + mc_loop:1, + transparent:1; int mc_index; __be32 mc_addr; struct ip_mc_socklist *mc_list; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 91324908fcc..80e4977631b 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -128,7 +128,8 @@ struct inet_timewait_sock { __be16 tw_dport; __u16 tw_num; /* And these are ours. */ - __u8 tw_ipv6only:1; + __u8 tw_ipv6only:1, + tw_transparent:1; /* 15 bits hole, try to pack */ __u16 tw_ipv6_offset; unsigned long tw_ttd; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 743f011b9a8..1c5fd38f882 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -126,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; + tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); atomic_set(&tw->tw_refcnt, 1); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 105d92a039b..465abf0a986 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -419,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<= sizeof(int)) { @@ -878,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = xfrm_user_policy(sk, optname, optval, optlen); break; + case IP_TRANSPARENT: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (optlen < 1) + goto e_inval; + inet->transparent = !!val; + break; + default: err = -ENOPROTOOPT; break; @@ -1130,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_FREEBIND: val = inet->freebind; break; + case IP_TRANSPARENT: + val = inet->transparent; + break; default: release_sock(sk); return -ENOPROTOOPT; -- cgit v1.2.3-70-g09d2 From 1668e010cbe1a7567c81d4c02d31dde9859e9da1 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:33:10 -0700 Subject: ipv4: Make inet_sock.h independent of route.h inet_iif() in inet_sock.h requires route.h. Since users of inet_iif() usually require other route.h functionality anyway this patch moves inet_iif() to route.h. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet_sock.h | 7 ------- include/net/ip_vs.h | 1 + include/net/route.h | 5 +++++ net/ipv4/netfilter/nf_nat_helper.c | 1 + net/ipv6/af_inet6.c | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e97b66e2a9d..139b78b4dfe 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -24,7 +24,6 @@ #include #include #include -#include #include /** struct ip_options - IP Options @@ -195,12 +194,6 @@ static inline int inet_sk_ehashfn(const struct sock *sk) return inet_ehashfn(net, laddr, lport, faddr, fport); } - -static inline int inet_iif(const struct sk_buff *skb) -{ - return skb->rtable->rt_iif; -} - static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops) { struct request_sock *req = reqsk_alloc(ops); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 33e2ac6ceb3..0b2071d9326 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -22,6 +22,7 @@ #include #include /* for union nf_inet_addr */ +#include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ diff --git a/include/net/route.h b/include/net/route.h index 4f0d8c14736..31d1485b624 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -204,4 +204,9 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +static inline int inet_iif(const struct sk_buff *skb) +{ + return skb->rtable->rt_iif; +} + #endif /* _ROUTE_H */ diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 11976ea2988..112dcfa1290 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 95055f8c3f3..f018704ecb8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 79876874ce20d37ecdc7f481ebf142466999152f Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:35:39 -0700 Subject: ipv4: Conditionally enable transparent flow flag when connecting Set FLOWI_FLAG_ANYSRC in flowi->flags if the socket has the transparent socket option set. This way we selectively enable certain connections with non-local source addresses to be routed. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/route.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 31d1485b624..4e8cae0e584 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -161,6 +161,10 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, int err; struct net *net = sock_net(sk); + + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; + if (!dst || !src) { err = __ip_route_output_key(net, rp, &fl); if (err) -- cgit v1.2.3-70-g09d2 From 88ef4a5a78e63420dd1dd770f1bd1dc198926b04 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:41:00 -0700 Subject: tcp: Handle TCP SYN+ACK/ACK/RST transparency The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to incoming packets. The non-local source address check on output bites us again, as replies for transparently redirected traffic won't have a chance to leave the node. This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the route lookup for those replies. Transparent replies are enabled if the listening socket has the transparent socket flag set. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet_sock.h | 8 +++++++- include/net/ip.h | 3 +++ net/ipv4/tcp_ipv4.c | 12 +++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 139b78b4dfe..dced3f64f97 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -72,7 +72,8 @@ struct inet_request_sock { sack_ok : 1, wscale_ok : 1, ecn_ok : 1, - acked : 1; + acked : 1, + no_srccheck: 1; struct ip_options *opt; }; @@ -204,4 +205,9 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops return req; } +static inline __u8 inet_sk_flowi_flags(const struct sock *sk) +{ + return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0; +} + #endif /* _INET_SOCK_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 250e6ef025a..90b27f634b7 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -140,12 +140,15 @@ static inline void ip_tr_mc_map(__be32 addr, char *buf) struct ip_reply_arg { struct kvec iov[1]; + int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; }; +#define IP_REPLY_ARG_NOSRCCHECK 1 + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d13688e3558..8b24bd833cb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -591,6 +591,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; net = dev_net(skb->dst->dev); ip_send_reply(net->ipv4.tcp_sock, skb, @@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif, - struct tcp_md5sig_key *key) + struct tcp_md5sig_key *key, + int reply_flags) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ip_hdr(skb)->daddr, &rep.th); } #endif + arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); @@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent, tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw) + tcp_twsk_md5_key(tcptw), + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 ); inet_twsk_put(tw); @@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr)); + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); } /* @@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); -- cgit v1.2.3-70-g09d2 From 86b08d867d7de001ab224180ed7865fab93fd56e Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:44:42 -0700 Subject: ipv4: Make Netfilter's ip_route_me_harder() non-local address compatible Netfilter's ip_route_me_harder() tries to re-route packets either generated or re-routed by Netfilter. This patch changes ip_route_me_harder() to handle packets from non-locally-bound sockets with IP_TRANSPARENT set as local and to set the appropriate flowi flags when re-doing the routing lookup. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/ip.h | 6 ++++++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 4 +++- net/ipv4/netfilter.c | 3 +++ net/ipv4/syncookies.c | 2 ++ 5 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 90b27f634b7..d678ea3d474 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -29,6 +29,7 @@ #include #include +#include struct sock; @@ -149,6 +150,11 @@ struct ip_reply_arg { #define IP_REPLY_ARG_NOSRCCHECK 1 +static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) +{ + return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; +} + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0c1ae68ee84..432c570c9f5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -335,6 +335,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d533a89e08d..d2a8f8bb78a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacdf991..01671ad51ed 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -20,6 +20,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int type; type = inet_addr_type(&init_net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,6 +35,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; if (ip_route_output_key(&init_net, &rt, &fl) != 0) return -1; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9d38005abba..929302b2ba9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include #include #include +#include /* Timestamps: lowest 9 bits store TCP options */ #define TSBITS 9 @@ -337,6 +338,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; -- cgit v1.2.3-70-g09d2 From a3116ac5c216fc3c145906a46df9ce542ff7dcf2 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:46:49 -0700 Subject: tcp: Port redirection support for TCP Current TCP code relies on the local port of the listening socket being the same as the destination address of the incoming connection. Port redirection used by many transparent proxying techniques obviously breaks this, so we have to store the original destination port address. This patch extends struct inet_request_sock and stores the incoming destination port value there. It also modifies the handshake code to use that value as the source port when sending reply packets. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- include/net/tcp.h | 1 + net/ipv4/inet_connection_sock.c | 2 ++ net/ipv4/syncookies.c | 1 + net/ipv4/tcp_output.c | 2 +- 5 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index dced3f64f97..de0ecc71cf0 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -61,8 +61,8 @@ struct inet_request_sock { struct request_sock req; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) u16 inet6_rsk_offset; - /* 2 bytes hole, try to pack */ #endif + __be16 loc_port; __be32 loc_addr; __be32 rmt_addr; __be16 rmt_port; diff --git a/include/net/tcp.h b/include/net/tcp.h index 12c9b4fec04..f6cc3414315 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -976,6 +976,7 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->acked = 0; ireq->ecn_ok = 0; ireq->rmt_port = tcp_hdr(skb)->source; + ireq->loc_port = tcp_hdr(skb)->dest; } extern void tcp_enter_memory_pressure(struct sock *sk); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 432c570c9f5..21fcc5a9045 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -516,6 +516,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 929302b2ba9..d346c22aa6a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -297,6 +297,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; + ireq->loc_port = th->dest; ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a8499ef3234..493553c71d3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2275,7 +2275,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = inet_sk(sk)->sport; + th->source = ireq->loc_port; th->dest = ireq->rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) -- cgit v1.2.3-70-g09d2 From bcd41303f422015ab662c9276d108414aa75b796 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 1 Oct 2008 07:48:10 -0700 Subject: udp: Export UDP socket lookup function The iptables tproxy code has to be able to do UDP socket hash lookups, so we have to provide an exported lookup function for this purpose. Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/udp.h | 4 ++++ net/ipv4/udp.c | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index addcdc67234..d38f6f2419f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -148,6 +148,10 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen, int (*push_pending_frames)(struct sock *)); +extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif); + DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 57e26fa6618..c83d0ef469c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -302,6 +302,13 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) +{ + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup); + static inline struct sock *udp_v4_mcast_next(struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, -- cgit v1.2.3-70-g09d2 From c226ef9b83694311327f3ab0036c6de9c22e9daf Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 25 Jul 2008 12:44:09 -0400 Subject: sctp: reduce memory footprint of sctp_chunk structure sctp_chunks should be put on a diet. This is some of the low hanging fruit that we can strip out. Changes all the __s8/__u8 flags to bitfields. Saves 12 bytes per chunk. Signed-off-by: Neil Horman Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 31 +++++++++++++++++-------------- net/sctp/output.c | 2 +- net/sctp/outqueue.c | 14 +++++++------- net/sctp/sm_make_chunk.c | 2 +- 4 files changed, 26 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ab1c472ea75..0dc1b7267c3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -731,20 +731,23 @@ struct sctp_chunk { */ struct sk_buff *auth_chunk; - __u8 rtt_in_progress; /* Is this chunk used for RTT calculation? */ - __u8 resent; /* Has this chunk ever been retransmitted. */ - __u8 has_tsn; /* Does this chunk have a TSN yet? */ - __u8 has_ssn; /* Does this chunk have a SSN yet? */ - __u8 singleton; /* Was this the only chunk in the packet? */ - __u8 end_of_packet; /* Was this the last chunk in the packet? */ - __u8 ecn_ce_done; /* Have we processed the ECN CE bit? */ - __u8 pdiscard; /* Discard the whole packet now? */ - __u8 tsn_gap_acked; /* Is this chunk acked by a GAP ACK? */ - __s8 fast_retransmit; /* Is this chunk fast retransmitted? */ - __u8 tsn_missing_report; /* Data chunk missing counter. */ - __u8 data_accepted; /* At least 1 chunk in this packet accepted */ - __u8 auth; /* IN: was auth'ed | OUT: needs auth */ - __u8 has_asconf; /* IN: have seen an asconf before */ +#define SCTP_CAN_FRTX 0x0 +#define SCTP_NEED_FRTX 0x1 +#define SCTP_DONT_FRTX 0x2 + __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ + resent:1, /* Has this chunk ever been resent. */ + has_tsn:1, /* Does this chunk have a TSN yet? */ + has_ssn:1, /* Does this chunk have a SSN yet? */ + singleton:1, /* Only chunk in the packet? */ + end_of_packet:1, /* Last chunk in the packet? */ + ecn_ce_done:1, /* Have we processed the ECN CE bit? */ + pdiscard:1, /* Discard the whole packet now? */ + tsn_gap_acked:1, /* Is this chunk acked by a GAP ACK? */ + data_accepted:1, /* At least 1 chunk accepted */ + auth:1, /* IN: was auth'ed | OUT: needs auth */ + has_asconf:1, /* IN: have seen an asconf before */ + tsn_missing_report:2, /* Data chunk missing counter. */ + fast_retransmit:2; /* Is this chunk fast retransmitted? */ }; void sctp_chunk_hold(struct sctp_chunk *); diff --git a/net/sctp/output.c b/net/sctp/output.c index 225c7123c41..c3f417f7ec6 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -699,7 +699,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ - if (chunk->fast_retransmit <= 0) + if (chunk->fast_retransmit != SCTP_NEED_FRTX) if (transport->flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index da8d846301c..247ebc95c1e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -420,7 +420,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * be added to the retransmit queue. */ if ((reason == SCTP_RTXR_FAST_RTX && - (chunk->fast_retransmit > 0)) || + (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { /* If this chunk was sent less then 1 rto ago, do not * retransmit this chunk, but give the peer time @@ -650,8 +650,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. */ - if (chunk->fast_retransmit > 0) - chunk->fast_retransmit = -1; + if (chunk->fast_retransmit == SCTP_NEED_FRTX) + chunk->fast_retransmit = SCTP_DONT_FRTX; /* Force start T3-rtx timer when fast retransmitting * the earliest outstanding TSN @@ -680,8 +680,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, */ if (rtx_timeout || fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; + if (chunk1->fast_retransmit == SCTP_NEED_FRTX) + chunk1->fast_retransmit = SCTP_DONT_FRTX; } } @@ -1656,7 +1656,7 @@ static void sctp_mark_missing(struct sctp_outq *q, * chunk if it has NOT been fast retransmitted or marked for * fast retransmit already. */ - if (!chunk->fast_retransmit && + if (chunk->fast_retransmit == SCTP_CAN_FRTX && !chunk->tsn_gap_acked && TSN_lt(tsn, highest_new_tsn_in_sack)) { @@ -1681,7 +1681,7 @@ static void sctp_mark_missing(struct sctp_outq *q, */ if (chunk->tsn_missing_report >= 3) { - chunk->fast_retransmit = 1; + chunk->fast_retransmit = SCTP_NEED_FRTX; do_fast_retransmit = 1; } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index d68869f966c..99fe0747cc9 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1211,7 +1211,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, */ retval->tsn_missing_report = 0; retval->tsn_gap_acked = 0; - retval->fast_retransmit = 0; + retval->fast_retransmit = SCTP_CAN_FRTX; /* If this is a fragmented message, track all fragments * of the message (for SEND_FAILED). -- cgit v1.2.3-70-g09d2 From 52cae8f06babf9eed327479c1aa024ce3732f912 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 18 Aug 2008 10:34:34 -0400 Subject: sctp: try harder to figure out address family when checking wildcards sctp_is_any() function that is used to check for wildcard addresses only looks at the address itself to determine the address family. This function is used in the API to check the address passed in from the user. If the user simply zerroes out the sockaddr_storage and pass that in, we'll end up failing. So, let's try harder to determine the address family by also checking the socket if it's possible. Signed-off-by: Vlad Yasevich --- include/net/sctp/structs.h | 2 +- net/sctp/bind_addr.c | 16 +++++++++++++--- net/sctp/ipv6.c | 5 +++-- net/sctp/socket.c | 10 +++++----- 4 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0dc1b7267c3..94c62e4ddea 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1228,7 +1228,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, sctp_scope_t sctp_scope(const union sctp_addr *); int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope); -int sctp_is_any(const union sctp_addr *addr); +int sctp_is_any(struct sock *sk, const union sctp_addr *addr); int sctp_addr_is_valid(const union sctp_addr *addr); diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f62bc246893..6d5944a745d 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -457,7 +457,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, { int error = 0; - if (sctp_is_any(addr)) { + if (sctp_is_any(NULL, addr)) { error = sctp_copy_local_addr_list(dest, scope, gfp, flags); } else if (sctp_in_scope(addr, scope)) { /* Now that the address is in scope, check to see if @@ -477,11 +477,21 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, } /* Is this a wildcard address? */ -int sctp_is_any(const union sctp_addr *addr) +int sctp_is_any(struct sock *sk, const union sctp_addr *addr) { - struct sctp_af *af = sctp_get_af_specific(addr->sa.sa_family); + unsigned short fam = 0; + struct sctp_af *af; + + /* Try to get the right address family */ + if (addr->sa.sa_family != AF_UNSPEC) + fam = addr->sa.sa_family; + else if (sk) + fam = sk->sk_family; + + af = sctp_get_af_specific(fam); if (!af) return 0; + return af->is_any(addr); } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 47f91afa021..c78da3c9dd3 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -837,6 +837,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, struct sctp_sock *opt) { struct sctp_af *af1, *af2; + struct sock *sk = sctp_opt2sk(opt); af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); @@ -845,11 +846,11 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, return 0; /* If the socket is IPv6 only, v4 addrs will not match */ - if (__ipv6_only_sock(sctp_opt2sk(opt)) && af1 != af2) + if (__ipv6_only_sock(sk) && af1 != af2) return 0; /* Today, wildcard AF_INET/AF_INET6. */ - if (sctp_is_any(addr1) || sctp_is_any(addr2)) + if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; if (addr1->sa.sa_family != addr2->sa.sa_family) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5ffb9dec1c3..a1b904529d5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2309,7 +2309,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -4062,7 +4062,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -4414,7 +4414,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { rcu_read_lock(); list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { @@ -4602,7 +4602,7 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs_old(sk, bp->port, getaddrs.addr_num, addrs, &bytes_copied); @@ -4695,7 +4695,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs(sk, bp->port, addrs, space_left, &bytes_copied); if (cnt < 0) { -- cgit v1.2.3-70-g09d2 From 9995a32b4d14dcda2f8df58030526bee91114c16 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Sun, 5 Oct 2008 11:14:48 -0700 Subject: Phonet: connected sockets glue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 43 +++++++++++++++++++++ net/phonet/socket.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 include/net/phonet/pep.h (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h new file mode 100644 index 00000000000..b2f8c54c533 --- /dev/null +++ b/include/net/phonet/pep.h @@ -0,0 +1,43 @@ +/* + * File: pep.h + * + * Phonet Pipe End Point sockets definitions + * + * Copyright (C) 2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef NET_PHONET_PEP_H +#define NET_PHONET_PEP_H + +struct pep_sock { + struct pn_sock pn_sk; + + /* Listening socket stuff: */ + struct hlist_head ackq; + + /* Connected socket stuff: */ + u8 tx_credits; +}; + +static inline struct pep_sock *pep_sk(struct sock *sk) +{ + return (struct pep_sock *)sk; +} + +extern const struct proto_ops phonet_stream_ops; + +#endif diff --git a/net/phonet/socket.c b/net/phonet/socket.c index dfd4061646d..cea1136cbe4 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -25,11 +25,13 @@ #include #include +#include #include #include #include #include +#include #include static int pn_socket_release(struct socket *sock) @@ -166,6 +168,24 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } +static int pn_socket_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + struct sock *sk = sock->sk; + struct sock *newsk; + int err; + + newsk = sk->sk_prot->accept(sk, flags, &err); + if (!newsk) + return err; + + lock_sock(newsk); + sock_graft(newsk, newsock); + newsock->state = SS_CONNECTED; + release_sock(newsk); + return 0; +} + static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer) { @@ -182,6 +202,33 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, return 0; } +static unsigned int pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct pep_sock *pn = pep_sk(sk); + unsigned int mask = 0; + + poll_wait(file, &sock->wait, wait); + + switch (sk->sk_state) { + case TCP_LISTEN: + return hlist_empty(&pn->ackq) ? 0 : POLLIN; + case TCP_CLOSE: + return POLLERR; + } + + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= POLLIN | POLLRDNORM; + else if (sk->sk_state == TCP_CLOSE_WAIT) + return POLLHUP; + + if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -220,6 +267,30 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, return sk->sk_prot->ioctl(sk, cmd, arg); } +static int pn_socket_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + if (sock->state != SS_UNCONNECTED) + return -EINVAL; + if (pn_socket_autobind(sock)) + return -ENOBUFS; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE) { + err = -EINVAL; + goto out; + } + + sk->sk_state = TCP_LISTEN; + sk->sk_ack_backlog = 0; + sk->sk_max_ack_backlog = backlog; +out: + release_sock(sk); + return err; +} + static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { @@ -256,6 +327,32 @@ const struct proto_ops phonet_dgram_ops = { .sendpage = sock_no_sendpage, }; +const struct proto_ops phonet_stream_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = pn_socket_accept, + .getname = pn_socket_getname, + .poll = pn_socket_poll, + .ioctl = pn_socket_ioctl, + .listen = pn_socket_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = sock_no_setsockopt, + .compat_getsockopt = compat_sock_no_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; +EXPORT_SYMBOL(phonet_stream_ops); + static DEFINE_MUTEX(port_mutex); /* allocate port for a socket */ -- cgit v1.2.3-70-g09d2 From 9641458d3ec42def729fde64669abf07f3220cd5 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Sun, 5 Oct 2008 11:15:13 -0700 Subject: Phonet: Pipe End Point for Phonet Pipes protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This protocol provides some connection handling and negotiated congestion control. Nokia cellular modems use it for bulk transfers. It provides packet boundaries (hence SOCK_SEQPACKET). Congestion control is per packet rather per byte, so we do not re-use the generic socket memory accounting. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 4 +- include/net/phonet/pep.h | 114 ++++++ net/phonet/Makefile | 4 +- net/phonet/af_phonet.c | 3 + net/phonet/pep.c | 908 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1031 insertions(+), 2 deletions(-) create mode 100644 net/phonet/pep.c (limited to 'include/net') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 3a027f588a4..f9218524207 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -27,7 +27,9 @@ #define PN_PROTO_TRANSPORT 0 /* Phonet datagram socket */ #define PN_PROTO_PHONET 1 -#define PHONET_NPROTO 2 +/* Phonet pipe */ +#define PN_PROTO_PIPE 2 +#define PHONET_NPROTO 3 #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index b2f8c54c533..fb024e18686 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -26,11 +26,21 @@ struct pep_sock { struct pn_sock pn_sk; + /* XXX: union-ify listening vs connected stuff ? */ /* Listening socket stuff: */ struct hlist_head ackq; + struct hlist_head hlist; /* Connected socket stuff: */ + struct sock *listener; + u16 peer_type; /* peer type/subtype */ + u8 pipe_handle; + + u8 rx_credits; u8 tx_credits; + u8 rx_fc; /* RX flow control */ + u8 tx_fc; /* TX flow control */ + u8 init_enable; /* auto-enable at creation */ }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -40,4 +50,108 @@ static inline struct pep_sock *pep_sk(struct sock *sk) extern const struct proto_ops phonet_stream_ops; +/* Pipe protocol definitions */ +struct pnpipehdr { + u8 utid; /* transaction ID */ + u8 message_id; + u8 pipe_handle; + union { + u8 state_after_connect; /* connect request */ + u8 state_after_reset; /* reset request */ + u8 error_code; /* any response */ + u8 pep_type; /* status indication */ + u8 data[1]; + }; +}; +#define other_pep_type data[1] + +static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) +{ + return (struct pnpipehdr *)skb_transport_header(skb); +} + +#define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4) + +enum { + PNS_PIPE_DATA = 0x20, + + PNS_PEP_CONNECT_REQ = 0x40, + PNS_PEP_CONNECT_RESP, + PNS_PEP_DISCONNECT_REQ, + PNS_PEP_DISCONNECT_RESP, + PNS_PEP_RESET_REQ, + PNS_PEP_RESET_RESP, + PNS_PEP_ENABLE_REQ, + PNS_PEP_ENABLE_RESP, + PNS_PEP_CTRL_REQ, + PNS_PEP_CTRL_RESP, + PNS_PEP_DISABLE_REQ = 0x4C, + PNS_PEP_DISABLE_RESP, + + PNS_PEP_STATUS_IND = 0x60, + PNS_PIPE_CREATED_IND, + PNS_PIPE_RESET_IND = 0x63, + PNS_PIPE_ENABLED_IND, + PNS_PIPE_REDIRECTED_IND, + PNS_PIPE_DISABLED_IND = 0x66, +}; + +#define PN_PIPE_INVALID_HANDLE 0xff +#define PN_PEP_TYPE_COMMON 0x00 + +/* Phonet pipe status indication */ +enum { + PN_PEP_IND_FLOW_CONTROL, + PN_PEP_IND_ID_MCFC_GRANT_CREDITS, +}; + +/* Phonet pipe error codes */ +enum { + PN_PIPE_NO_ERROR, + PN_PIPE_ERR_INVALID_PARAM, + PN_PIPE_ERR_INVALID_HANDLE, + PN_PIPE_ERR_INVALID_CTRL_ID, + PN_PIPE_ERR_NOT_ALLOWED, + PN_PIPE_ERR_PEP_IN_USE, + PN_PIPE_ERR_OVERLOAD, + PN_PIPE_ERR_DEV_DISCONNECTED, + PN_PIPE_ERR_TIMEOUT, + PN_PIPE_ERR_ALL_PIPES_IN_USE, + PN_PIPE_ERR_GENERAL, + PN_PIPE_ERR_NOT_SUPPORTED, +}; + +/* Phonet pipe states */ +enum { + PN_PIPE_DISABLE, + PN_PIPE_ENABLE, +}; + +/* Phonet pipe sub-block types */ +enum { + PN_PIPE_SB_CREATE_REQ_PEP_SUB_TYPE, + PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE, + PN_PIPE_SB_REDIRECT_REQ_PEP_SUB_TYPE, + PN_PIPE_SB_NEGOTIATED_FC, + PN_PIPE_SB_REQUIRED_FC_TX, + PN_PIPE_SB_PREFERRED_FC_RX, +}; + +/* Phonet pipe flow control models */ +enum { + PN_NO_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_MULTI_CREDIT_FLOW_CONTROL, +}; + +#define pn_flow_safe(fc) ((fc) >> 1) + +/* Phonet pipe flow control states */ +enum { + PEP_IND_EMPTY, + PEP_IND_BUSY, + PEP_IND_READY, +}; + #endif diff --git a/net/phonet/Makefile b/net/phonet/Makefile index ae9c3ed5be8..505df2a04a8 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_PHONET) += phonet.o +obj-$(CONFIG_PHONET) += phonet.o pn_pep.o phonet-objs := \ pn_dev.o \ @@ -7,3 +7,5 @@ phonet-objs := \ datagram.o \ sysctl.o \ af_phonet.o + +pn_pep-objs := pep.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 0a74aeaf5ad..9e9c6fce11a 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -58,6 +58,9 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) case SOCK_DGRAM: protocol = PN_PROTO_PHONET; break; + case SOCK_SEQPACKET: + protocol = PN_PROTO_PIPE; + break; default: return -EPROTONOSUPPORT; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c new file mode 100644 index 00000000000..c5dfecb207d --- /dev/null +++ b/net/phonet/pep.c @@ -0,0 +1,908 @@ +/* + * File: pep.c + * + * Phonet pipe protocol end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* sk_state values: + * TCP_CLOSE sock not in use yet + * TCP_CLOSE_WAIT disconnected pipe + * TCP_LISTEN listening pipe endpoint + * TCP_SYN_RECV connected pipe in disabled state + * TCP_ESTABLISHED connected pipe in enabled state + * + * pep_sock locking: + * - sk_state, ackq, hlist: sock lock needed + * - listener: read only + * - pipe_handle: read only + */ + +#define CREDITS_MAX 10 +#define CREDITS_THR 7 + +static const struct sockaddr_pn pipe_srv = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, /* pipe service */ +}; + +#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ + +/* Get the next TLV sub-block. */ +static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, + void *buf) +{ + void *data = NULL; + struct { + u8 sb_type; + u8 sb_len; + } *ph, h; + int buflen = *plen; + + ph = skb_header_pointer(skb, 0, 2, &h); + if (ph == NULL || ph->sb_len < 2 || !pskb_may_pull(skb, ph->sb_len)) + return NULL; + ph->sb_len -= 2; + *ptype = ph->sb_type; + *plen = ph->sb_len; + + if (buflen > ph->sb_len) + buflen = ph->sb_len; + data = skb_header_pointer(skb, 2, buflen, buf); + __skb_pull(skb, 2 + ph->sb_len); + return data; +} + +static int pep_reply(struct sock *sk, struct sk_buff *oskb, + u8 code, const void *data, int len, gfp_t priority) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = oph->utid; + ph->message_id = oph->message_id + 1; /* REQ -> RESP */ + ph->pipe_handle = oph->pipe_handle; + ph->error_code = code; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +#define PAD 0x00 +static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) +{ + static const u8 data[20] = { + PAD, PAD, PAD, 2 /* sub-blocks */, + PN_PIPE_SB_REQUIRED_FC_TX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + PN_PIPE_SB_PREFERRED_FC_RX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + }; + + might_sleep(); + return pep_reply(sk, skb, PN_PIPE_NO_ERROR, data, sizeof(data), + GFP_KERNEL); +} + +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +{ + static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; + WARN_ON(code == PN_PIPE_NO_ERROR); + return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); +} + +/* Control requests are not sent by the pipe service and have a specific + * message format. */ +static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct sk_buff *skb; + struct pnpipehdr *ph; + struct sockaddr_pn dst; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PHONET_HEADER); + ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + + ph->utid = oph->utid; + ph->message_id = PNS_PEP_CTRL_RESP; + ph->pipe_handle = oph->pipe_handle; + ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data[1] = oph->data[0]; /* PEP type */ + ph->data[2] = code; /* error code, at an usual offset */ + ph->data[3] = PAD; + ph->data[4] = PAD; + + pn_skb_get_src_sockaddr(oskb, &dst); + return pn_skb_send(sk, skb, &dst); +} + +static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER + 4); + __skb_push(skb, sizeof(*ph) + 4); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PEP_STATUS_IND; + ph->pipe_handle = pn->pipe_handle; + ph->pep_type = PN_PEP_TYPE_COMMON; + ph->data[1] = type; + ph->data[2] = PAD; + ph->data[3] = PAD; + ph->data[4] = status; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +/* Send our RX flow control information to the sender. + * Socket must be locked. */ +static void pipe_grant_credits(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + switch (pn->rx_fc) { + case PN_LEGACY_FLOW_CONTROL: /* TODO */ + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, GFP_ATOMIC); + pn->rx_credits = 1; + break; + case PN_MULTI_CREDIT_FLOW_CONTROL: + if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) + break; + if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, + CREDITS_MAX - pn->rx_credits, + GFP_ATOMIC) == 0) + pn->rx_credits = CREDITS_MAX; + break; + } +} + +static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", + (unsigned)hdr->data[0]); + return -EOPNOTSUPP; + } + + switch (hdr->data[1]) { + case PN_PEP_IND_FLOW_CONTROL: + switch (pn->tx_fc) { + case PN_LEGACY_FLOW_CONTROL: + switch (hdr->data[4]) { + case PEP_IND_BUSY: + pn->tx_credits = 0; + break; + case PEP_IND_READY: + pn->tx_credits = 1; + break; + } + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + if (hdr->data[4] == PEP_IND_READY) + pn->tx_credits = 1; + break; + } + break; + + case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: + if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) + break; + if (pn->tx_credits + hdr->data[4] > 0xff) + pn->tx_credits = 0xff; + else + pn->tx_credits += hdr->data[4]; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n", + (unsigned)hdr->data[1]); + return -EOPNOTSUPP; + } + if (pn->tx_credits) + sk->sk_write_space(sk); + return 0; +} + +static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + u8 n_sb = hdr->data[0]; + + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + __skb_pull(skb, sizeof(*hdr)); + while (n_sb > 0) { + u8 type, buf[2], len = sizeof(buf); + u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_NEGOTIATED_FC: + if (len < 2 || (data[0] | data[1]) > 3) + break; + pn->tx_fc = data[0] & 3; + pn->rx_fc = data[1] & 3; + break; + } + n_sb--; + } + return 0; +} + +/* Queue an skb to a connected sock. + * Socket lock must be held. */ +static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + int err = 0; + + BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + sk->sk_state = TCP_CLOSE_WAIT; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + break; + + case PNS_PEP_ENABLE_REQ: + /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_RESET_REQ: + switch (hdr->state_after_reset) { + case PN_PIPE_DISABLE: + pn->init_enable = 0; + break; + case PN_PIPE_ENABLE: + pn->init_enable = 1; + break; + default: /* not allowed to send an error here!? */ + err = -EINVAL; + goto out; + } + /* fall through */ + case PNS_PEP_DISABLE_REQ: + pn->tx_credits = 0; + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + /* TODO */ + pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR); + break; + + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return 0; + break; + } + + if (pn->rx_credits == 0) { + err = -ENOBUFS; + break; + } + pn->rx_credits--; + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(&sk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return 0; + + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + + case PNS_PIPE_REDIRECTED_IND: + err = pipe_rcv_created(sk, skb); + break; + + case PNS_PIPE_CREATED_IND: + err = pipe_rcv_created(sk, skb); + if (err) + break; + /* fall through */ + case PNS_PIPE_RESET_IND: + if (!pn->init_enable) + break; + /* fall through */ + case PNS_PIPE_ENABLED_IND: + if (!pn_flow_safe(pn->tx_fc)) { + pn->tx_credits = 1; + sk->sk_write_space(sk); + } + if (sk->sk_state == TCP_ESTABLISHED) + break; /* Nothing to do */ + sk->sk_state = TCP_ESTABLISHED; + pipe_grant_credits(sk); + break; + + case PNS_PIPE_DISABLED_IND: + sk->sk_state = TCP_SYN_RECV; + pn->rx_credits = 0; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n", + hdr->message_id); + err = -EINVAL; + } +out: + kfree_skb(skb); + return err; +} + +/* Destroy connected sock. */ +static void pipe_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct sock *newsk; + struct pep_sock *newpn, *pn = pep_sk(sk); + struct pnpipehdr *hdr; + struct sockaddr_pn dst; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + + if (!pskb_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); + return -EINVAL; + } + peer_type = hdr->other_pep_type << 8; + + if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + return -ENOBUFS; + } + + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + return -EINVAL; + peer_type = (peer_type & 0xff00) | data[0]; + break; + } + n_sb--; + } + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); + if (!newsk) { + kfree_skb(skb); + return -ENOMEM; + } + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.resource = pn->pn_sk.resource; + newpn->pipe_handle = pipe_handle; + newpn->peer_type = peer_type; + newpn->rx_credits = newpn->tx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + + BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); + skb_queue_head(&newsk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + + sk_acceptq_added(sk); + sk_add_node(newsk, &pn->ackq); + return 0; +} + +/* Listening sock must be locked */ +static struct sock *pep_find_pipe(const struct hlist_head *hlist, + const struct sockaddr_pn *dst, + u8 pipe_handle) +{ + struct hlist_node *node; + struct sock *sknode; + u16 dobj = pn_sockaddr_get_object(dst); + + sk_for_each(sknode, node, hlist) { + struct pep_sock *pnnode = pep_sk(sknode); + + /* Ports match, but addresses might not: */ + if (pnnode->pn_sk.sobject != dobj) + continue; + if (pnnode->pipe_handle != pipe_handle) + continue; + if (sknode->sk_state == TCP_CLOSE_WAIT) + continue; + + sock_hold(sknode); + return sknode; + } + return NULL; +} + +/* + * Deliver an skb to a listening sock. + * Socket lock must be held. + * We then queue the skb to the right connected sock (if any). + */ +static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *sknode; + struct pnpipehdr *hdr = pnp_hdr(skb); + struct sockaddr_pn dst; + int err = NET_RX_SUCCESS; + u8 pipe_handle; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + if (pipe_handle == PN_PIPE_INVALID_HANDLE) + goto drop; + + pn_skb_get_dst_sockaddr(skb, &dst); + + /* Look for an existing pipe handle */ + sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (sknode) + return sk_receive_skb(sknode, skb, 1); + + /* Look for a pipe handle pending accept */ + sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); + if (sknode) { + sock_put(sknode); + if (net_ratelimit()) + printk(KERN_WARNING"Phonet unconnected PEP ignored"); + err = NET_RX_DROP; + goto drop; + } + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + err = pep_connreq_rcv(sk, skb); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE); + break; + + case PNS_PEP_RESET_REQ: + case PNS_PEP_ENABLE_REQ: + case PNS_PEP_DISABLE_REQ: + /* invalid handle is not even allowed here! */ + default: + err = NET_RX_DROP; + } +drop: + kfree_skb(skb); + return err; +} + +/* associated socket ceases to exist */ +static void pep_sock_close(struct sock *sk, long timeout) +{ + struct pep_sock *pn = pep_sk(sk); + + sk_common_release(sk); + + lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + /* Destroy the listen queue */ + struct sock *sknode; + struct hlist_node *p, *n; + + sk_for_each_safe(sknode, p, n, &pn->ackq) + sk_del_node_init(sknode); + sk->sk_state = TCP_CLOSE; + } + release_sock(sk); +} + +static int pep_wait_connreq(struct sock *sk, int noblock) +{ + struct task_struct *tsk = current; + struct pep_sock *pn = pep_sk(sk); + long timeo = sock_rcvtimeo(sk, noblock); + + for (;;) { + DEFINE_WAIT(wait); + + if (sk->sk_state != TCP_LISTEN) + return -EINVAL; + if (!hlist_empty(&pn->ackq)) + break; + if (!timeo) + return -EWOULDBLOCK; + if (signal_pending(tsk)) + return sock_intr_errno(timeo); + + prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(&sk->sk_socket->wait, &wait); + } + + return 0; +} + +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *newsk = NULL; + struct sk_buff *oskb; + int err; + + lock_sock(sk); + err = pep_wait_connreq(sk, flags & O_NONBLOCK); + if (err) + goto out; + + newsk = __sk_head(&pn->ackq); + + oskb = skb_dequeue(&newsk->sk_receive_queue); + err = pep_accept_conn(newsk, oskb); + if (err) { + skb_queue_head(&newsk->sk_receive_queue, oskb); + newsk = NULL; + goto out; + } + + sock_hold(sk); + pep_sk(newsk)->listener = sk; + + sock_hold(newsk); + sk_del_node_init(newsk); + sk_acceptq_removed(sk); + sk_add_node(newsk, &pn->hlist); + __sock_put(newsk); + +out: + release_sock(sk); + *errp = err; + return newsk; +} + +static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + int answ; + + switch (cmd) { + case SIOCINQ: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + lock_sock(sk); + if (!skb_queue_empty(&sk->sk_receive_queue)) + answ = skb_peek(&sk->sk_receive_queue)->len; + else + answ = 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +static int pep_init(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + INIT_HLIST_HEAD(&pn->ackq); + INIT_HLIST_HEAD(&pn->hlist); + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return 0; +} + +static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff *skb = NULL; + struct pnpipehdr *ph; + long timeo; + int flags = msg->msg_flags; + int err, done; + + if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR)) + return -EOPNOTSUPP; + + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) { + err = -ENOTCONN; + goto out; + } + if (sk->sk_state != TCP_ESTABLISHED) { + /* Wait until the pipe gets to enabled state */ +disabled: + err = sk_stream_wait_connect(sk, &timeo); + if (err) + goto out; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + err = -ECONNRESET; + goto out; + } + } + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + /* Wait until flow control allows TX */ + done = pn->tx_credits > 0; + while (!done) { + DEFINE_WAIT(wait); + + if (!timeo) { + err = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + + prepare_to_wait(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, &timeo, pn->tx_credits > 0); + finish_wait(&sk->sk_socket->wait, &wait); + + if (sk->sk_state != TCP_ESTABLISHED) + goto disabled; + } + + if (!skb) { + skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, + flags & MSG_DONTWAIT, &err); + if (skb == NULL) + goto out; + skb_reserve(skb, MAX_PHONET_HEADER + 3); + + if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits) + goto disabled; /* sock_alloc_send_skb might sleep */ + } + + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (err < 0) + goto out; + + __skb_push(skb, 3); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_DATA; + ph->pipe_handle = pn->pipe_handle; + if (pn_flow_safe(pn->tx_fc)) /* credit-based flow control */ + pn->tx_credits--; + + err = pn_skb_send(sk, skb, &pipe_srv); + if (err >= 0) + err = len; /* success! */ + skb = NULL; +out: + release_sock(sk); + kfree_skb(skb); + return err; +} + +static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb; + int err; + + if (unlikely(flags & MSG_OOB)) + return -EOPNOTSUPP; + if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE))) + return -ENOTCONN; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + lock_sock(sk); + if (skb == NULL) { + if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + release_sock(sk); + return err; + } + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + release_sock(sk); + + msg->msg_flags |= MSG_EOR; + + if (skb->len > len) + msg->msg_flags |= MSG_TRUNC; + else + len = skb->len; + + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len); + if (!err) + err = (flags & MSG_TRUNC) ? skb->len : len; + + skb_free_datagram(sk, skb); + return err; +} + +static void pep_sock_unhash(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *skparent = NULL; + + lock_sock(sk); + if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + skparent = pn->listener; + sk_del_node_init(sk); + release_sock(sk); + + sk = skparent; + pn = pep_sk(skparent); + lock_sock(sk); + } + /* Unhash a listening sock only when it is closed + * and all of its active connected pipes are closed. */ + if (hlist_empty(&pn->hlist)) + pn_sock_unhash(&pn->pn_sk.sk); + release_sock(sk); + + if (skparent) + sock_put(skparent); +} + +static struct proto pep_proto = { + .close = pep_sock_close, + .accept = pep_sock_accept, + .ioctl = pep_ioctl, + .init = pep_init, + .sendmsg = pep_sendmsg, + .recvmsg = pep_recvmsg, + .backlog_rcv = pep_do_rcv, + .hash = pn_sock_hash, + .unhash = pep_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pep_sock), + .owner = THIS_MODULE, + .name = "PNPIPE", +}; + +static struct phonet_protocol pep_pn_proto = { + .ops = &phonet_stream_ops, + .prot = &pep_proto, + .sock_type = SOCK_SEQPACKET, +}; + +static int __init pep_register(void) +{ + return phonet_proto_register(PN_PROTO_PIPE, &pep_pn_proto); +} + +static void __exit pep_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PIPE, &pep_pn_proto); +} + +module_init(pep_register); +module_exit(pep_unregister); +MODULE_AUTHOR("Remi Denis-Courmont, Nokia"); +MODULE_DESCRIPTION("Phonet pipe protocol"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_PHONET, PN_PROTO_PIPE); -- cgit v1.2.3-70-g09d2 From c41bd97f815720f9404f97da0c4f4400b52c243d Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Sun, 5 Oct 2008 11:15:43 -0700 Subject: Phonet: receive pipe control requests as out-of-band data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 2 ++ net/phonet/pep.c | 65 ++++++++++++++++++++++++++++++++++-------------- net/phonet/socket.c | 4 ++- 3 files changed, 52 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index fb024e18686..4d79564850a 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -33,6 +33,8 @@ struct pep_sock { /* Connected socket stuff: */ struct sock *listener; + struct sk_buff_head ctrlreq_queue; +#define PNPIPE_CTRLREQ_MAX 10 u16 peer_type; /* peer type/subtype */ u8 pipe_handle; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index c5dfecb207d..d564d07cb79 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -137,14 +137,15 @@ static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) /* Control requests are not sent by the pipe service and have a specific * message format. */ -static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code) +static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, + gfp_t priority) { const struct pnpipehdr *oph = pnp_hdr(oskb); struct sk_buff *skb; struct pnpipehdr *ph; struct sockaddr_pn dst; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, GFP_ATOMIC); + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); if (!skb) return -ENOMEM; skb_set_owner_w(skb, sk); @@ -305,6 +306,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); + struct sk_buff_head *queue; int err = 0; BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); @@ -345,9 +347,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) break; case PNS_PEP_CTRL_REQ: - /* TODO */ - pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR); - break; + if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) + break; + __skb_pull(skb, 4); + queue = &pn->ctrlreq_queue; + goto queue; case PNS_PIPE_DATA: __skb_pull(skb, 3); /* Pipe data header */ @@ -363,13 +367,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) break; } pn->rx_credits--; - skb->dev = NULL; - skb_set_owner_r(skb, sk); - err = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, err); - return 0; + queue = &sk->sk_receive_queue; + goto queue; case PNS_PEP_STATUS_IND: pipe_rcv_status(sk, skb); @@ -412,12 +411,24 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) out: kfree_skb(skb); return err; + +queue: + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return 0; } /* Destroy connected sock. */ static void pipe_destruct(struct sock *sk) { + struct pep_sock *pn = pep_sk(sk); + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&pn->ctrlreq_queue); } static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) @@ -490,6 +501,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) pn_skb_get_dst_sockaddr(skb, &dst); newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); newpn->pn_sk.resource = pn->pn_sk.resource; + skb_queue_head_init(&newpn->ctrlreq_queue); newpn->pipe_handle = pipe_handle; newpn->peer_type = peer_type; newpn->rx_credits = newpn->tx_credits = 0; @@ -581,7 +593,7 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) break; case PNS_PEP_CTRL_REQ: - pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE); + pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE, GFP_ATOMIC); break; case PNS_PEP_RESET_REQ: @@ -684,6 +696,7 @@ out: static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { + struct pep_sock *pn = pep_sk(sk); int answ; switch (cmd) { @@ -692,7 +705,10 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) return -EINVAL; lock_sock(sk); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (sock_flag(sk, SOCK_URGINLINE) + && !skb_queue_empty(&pn->ctrlreq_queue)) + answ = skb_peek(&pn->ctrlreq_queue)->len; + else if (!skb_queue_empty(&sk->sk_receive_queue)) answ = skb_peek(&sk->sk_receive_queue)->len; else answ = 0; @@ -709,6 +725,7 @@ static int pep_init(struct sock *sk) INIT_HLIST_HEAD(&pn->ackq); INIT_HLIST_HEAD(&pn->hlist); + skb_queue_head_init(&pn->ctrlreq_queue); pn->pipe_handle = PN_PIPE_INVALID_HANDLE; return 0; } @@ -810,11 +827,24 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, struct sk_buff *skb; int err; - if (unlikely(flags & MSG_OOB)) - return -EOPNOTSUPP; if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE))) return -ENOTCONN; + if ((flags & MSG_OOB) || sock_flag(sk, SOCK_URGINLINE)) { + /* Dequeue and acknowledge control request */ + struct pep_sock *pn = pep_sk(sk); + + skb = skb_dequeue(&pn->ctrlreq_queue); + if (skb) { + pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR, + GFP_KERNEL); + msg->msg_flags |= MSG_OOB; + goto copy; + } + if (flags & MSG_OOB) + return -EINVAL; + } + skb = skb_recv_datagram(sk, flags, noblock, &err); lock_sock(sk); if (skb == NULL) { @@ -827,9 +857,8 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state == TCP_ESTABLISHED) pipe_grant_credits(sk); release_sock(sk); - +copy: msg->msg_flags |= MSG_EOR; - if (skb->len > len) msg->msg_flags |= MSG_TRUNC; else diff --git a/net/phonet/socket.c b/net/phonet/socket.c index cea1136cbe4..a9c3d1f2e9d 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -220,7 +220,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; - else if (sk->sk_state == TCP_CLOSE_WAIT) + if (!skb_queue_empty(&pn->ctrlreq_queue)) + mask |= POLLPRI; + if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) -- cgit v1.2.3-70-g09d2 From 02a47617cdce440f60c71a51f3a93f9f5fcc5a7a Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Sun, 5 Oct 2008 11:16:16 -0700 Subject: Phonet: implement GPRS virtual interface over PEP socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 8 ++ include/linux/socket.h | 1 + include/net/phonet/gprs.h | 38 +++++ include/net/phonet/pep.h | 1 + net/phonet/Makefile | 2 +- net/phonet/pep-gprs.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++ net/phonet/pep.c | 161 +++++++++++++++++++-- net/phonet/socket.c | 8 +- 8 files changed, 550 insertions(+), 16 deletions(-) create mode 100644 include/net/phonet/gprs.h create mode 100644 net/phonet/pep-gprs.c (limited to 'include/net') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index f9218524207..c9609f9aeda 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -31,9 +31,17 @@ #define PN_PROTO_PIPE 2 #define PHONET_NPROTO 3 +/* Socket options for SOL_PNPIPE level */ +#define PNPIPE_ENCAP 1 +#define PNPIPE_IFINDEX 2 + #define PNADDR_ANY 0 #define PNPORT_RESOURCE_ROUTING 0 +/* Values for PNPIPE_ENCAP option */ +#define PNPIPE_ENCAP_NONE 0 +#define PNPIPE_ENCAP_IP 1 + /* ioctls */ #define SIOCPNGETOBJECT (SIOCPROTOPRIVATE + 0) diff --git a/include/linux/socket.h b/include/linux/socket.h index 818ca33bf79..20fc4bbfca4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -297,6 +297,7 @@ struct ucred { #define SOL_RXRPC 272 #define SOL_PPPOL2TP 273 #define SOL_BLUETOOTH 274 +#define SOL_PNPIPE 275 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/net/phonet/gprs.h b/include/net/phonet/gprs.h new file mode 100644 index 00000000000..928daf595be --- /dev/null +++ b/include/net/phonet/gprs.h @@ -0,0 +1,38 @@ +/* + * File: pep_gprs.h + * + * GPRS over Phonet pipe end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#ifndef NET_PHONET_GPRS_H +#define NET_PHONET_GPRS_H + +struct sock; +struct sk_buff; + +int pep_writeable(struct sock *sk); +int pep_write(struct sock *sk, struct sk_buff *skb); +struct sk_buff *pep_read(struct sock *sk); + +int gprs_attach(struct sock *sk); +void gprs_detach(struct sock *sk); + +#endif diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 4d79564850a..fcd793030e4 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -35,6 +35,7 @@ struct pep_sock { struct sock *listener; struct sk_buff_head ctrlreq_queue; #define PNPIPE_CTRLREQ_MAX 10 + int ifindex; u16 peer_type; /* peer type/subtype */ u8 pipe_handle; diff --git a/net/phonet/Makefile b/net/phonet/Makefile index 505df2a04a8..d62bbba649b 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -8,4 +8,4 @@ phonet-objs := \ sysctl.o \ af_phonet.o -pn_pep-objs := pep.o +pn_pep-objs := pep.o pep-gprs.o diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c new file mode 100644 index 00000000000..9978afbd9f2 --- /dev/null +++ b/net/phonet/pep-gprs.c @@ -0,0 +1,347 @@ +/* + * File: pep-gprs.c + * + * GPRS over Phonet pipe end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define GPRS_DEFAULT_MTU 1400 + +struct gprs_dev { + struct sock *sk; + void (*old_state_change)(struct sock *); + void (*old_data_ready)(struct sock *, int); + void (*old_write_space)(struct sock *); + + struct net_device *net; + struct net_device_stats stats; + + struct sk_buff_head tx_queue; + struct work_struct tx_work; + spinlock_t tx_lock; + unsigned tx_max; +}; + +static int gprs_type_trans(struct sk_buff *skb) +{ + const u8 *pvfc; + u8 buf; + + pvfc = skb_header_pointer(skb, 0, 1, &buf); + if (!pvfc) + return 0; + /* Look at IP version field */ + switch (*pvfc >> 4) { + case 4: + return htons(ETH_P_IP); + case 6: + return htons(ETH_P_IPV6); + } + return 0; +} + +/* + * Socket callbacks + */ + +static void gprs_state_change(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + netif_stop_queue(dev->net); + netif_carrier_off(dev->net); + } +} + +static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) +{ + int err = 0; + u16 protocol = gprs_type_trans(skb); + + if (!protocol) { + err = -EINVAL; + goto drop; + } + + if (likely(skb_headroom(skb) & 3)) { + struct sk_buff *rskb, *fs; + int flen = 0; + + /* Phonet Pipe data header is misaligned (3 bytes), + * so wrap the IP packet as a single fragment of an head-less + * socket buffer. The network stack will pull what it needs, + * but at least, the whole IP payload is not memcpy'd. */ + rskb = netdev_alloc_skb(dev->net, 0); + if (!rskb) { + err = -ENOBUFS; + goto drop; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb = rskb; + } + + skb->protocol = protocol; + skb_reset_mac_header(skb); + skb->dev = dev->net; + + if (likely(dev->net->flags & IFF_UP)) { + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + netif_rx(skb); + skb = NULL; + } else + err = -ENODEV; + +drop: + if (skb) { + dev_kfree_skb(skb); + dev->stats.rx_dropped++; + } + return err; +} + +static void gprs_data_ready(struct sock *sk, int len) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct sk_buff *skb; + + while ((skb = pep_read(sk)) != NULL) { + skb_orphan(skb); + gprs_recv(dev, skb); + } +} + +static void gprs_write_space(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + unsigned credits = pep_writeable(sk); + + spin_lock_bh(&dev->tx_lock); + dev->tx_max = credits; + if (credits > skb_queue_len(&dev->tx_queue)) + netif_wake_queue(dev->net); + spin_unlock_bh(&dev->tx_lock); +} + +/* + * Network device callbacks + */ + +static int gprs_xmit(struct sk_buff *skb, struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + switch (skb->protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + break; + default: + dev_kfree_skb(skb); + return 0; + } + + spin_lock(&dev->tx_lock); + if (likely(skb_queue_len(&dev->tx_queue) < dev->tx_max)) { + skb_queue_tail(&dev->tx_queue, skb); + skb = NULL; + } + if (skb_queue_len(&dev->tx_queue) >= dev->tx_max) + netif_stop_queue(net); + spin_unlock(&dev->tx_lock); + + schedule_work(&dev->tx_work); + if (unlikely(skb)) + dev_kfree_skb(skb); + return 0; +} + +static void gprs_tx(struct work_struct *work) +{ + struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work); + struct sock *sk = dev->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) { + int err; + + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + + skb_orphan(skb); + skb_set_owner_w(skb, sk); + + lock_sock(sk); + err = pep_write(sk, skb); + if (err) { + LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", + dev->net->name, err); + dev->stats.tx_aborted_errors++; + dev->stats.tx_errors++; + } + release_sock(sk); + } + + lock_sock(sk); + gprs_write_space(sk); + release_sock(sk); +} + +static int gprs_set_mtu(struct net_device *net, int new_mtu) +{ + if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) + return -EINVAL; + + net->mtu = new_mtu; + return 0; +} + +static struct net_device_stats *gprs_get_stats(struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + return &dev->stats; +} + +static void gprs_setup(struct net_device *net) +{ + net->features = NETIF_F_FRAGLIST; + net->type = ARPHRD_NONE; + net->flags = IFF_POINTOPOINT | IFF_NOARP; + net->mtu = GPRS_DEFAULT_MTU; + net->hard_header_len = 0; + net->addr_len = 0; + net->tx_queue_len = 10; + + net->destructor = free_netdev; + net->hard_start_xmit = gprs_xmit; /* mandatory */ + net->change_mtu = gprs_set_mtu; + net->get_stats = gprs_get_stats; +} + +/* + * External interface + */ + +/* + * Attach a GPRS interface to a datagram socket. + * Returns the interface index on success, negative error code on error. + */ +int gprs_attach(struct sock *sk) +{ + static const char ifname[] = "gprs%d"; + struct gprs_dev *dev; + struct net_device *net; + int err; + + if (unlikely(sk->sk_type == SOCK_STREAM)) + return -EINVAL; /* need packet boundaries */ + + /* Create net device */ + net = alloc_netdev(sizeof(*dev), ifname, gprs_setup); + if (!net) + return -ENOMEM; + dev = netdev_priv(net); + dev->net = net; + dev->tx_max = 0; + spin_lock_init(&dev->tx_lock); + skb_queue_head_init(&dev->tx_queue); + INIT_WORK(&dev->tx_work, gprs_tx); + + netif_stop_queue(net); + err = register_netdev(net); + if (err) { + free_netdev(net); + return err; + } + + lock_sock(sk); + if (unlikely(sk->sk_user_data)) { + err = -EBUSY; + goto out_rel; + } + if (unlikely((1 << sk->sk_state & (TCPF_CLOSE|TCPF_LISTEN)) || + sock_flag(sk, SOCK_DEAD))) { + err = -EINVAL; + goto out_rel; + } + sk->sk_user_data = dev; + dev->old_state_change = sk->sk_state_change; + dev->old_data_ready = sk->sk_data_ready; + dev->old_write_space = sk->sk_write_space; + sk->sk_state_change = gprs_state_change; + sk->sk_data_ready = gprs_data_ready; + sk->sk_write_space = gprs_write_space; + release_sock(sk); + + sock_hold(sk); + dev->sk = sk; + + printk(KERN_DEBUG"%s: attached\n", net->name); + gprs_write_space(sk); /* kick off TX */ + return net->ifindex; + +out_rel: + release_sock(sk); + unregister_netdev(net); + return err; +} + +void gprs_detach(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct net_device *net = dev->net; + + lock_sock(sk); + sk->sk_user_data = NULL; + sk->sk_state_change = dev->old_state_change; + sk->sk_data_ready = dev->old_data_ready; + sk->sk_write_space = dev->old_write_space; + release_sock(sk); + + printk(KERN_DEBUG"%s: detached\n", net->name); + unregister_netdev(net); + flush_scheduled_work(); + sock_put(sk); + skb_queue_purge(&dev->tx_queue); +} diff --git a/net/phonet/pep.c b/net/phonet/pep.c index d564d07cb79..bc6d50f8324 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -31,6 +31,7 @@ #include #include #include +#include /* sk_state values: * TCP_CLOSE sock not in use yet @@ -612,6 +613,7 @@ drop: static void pep_sock_close(struct sock *sk, long timeout) { struct pep_sock *pn = pep_sk(sk); + int ifindex = 0; sk_common_release(sk); @@ -625,7 +627,12 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_del_node_init(sknode); sk->sk_state = TCP_CLOSE; } + ifindex = pn->ifindex; + pn->ifindex = 0; release_sock(sk); + + if (ifindex) + gprs_detach(sk); } static int pep_wait_connreq(struct sock *sk, int noblock) @@ -730,12 +737,107 @@ static int pep_init(struct sock *sk) return 0; } +static int pep_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int val = 0, err = 0; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (optlen >= sizeof(int)) { + if (get_user(val, (int __user *) optval)) + return -EFAULT; + } + + lock_sock(sk); + switch (optname) { + case PNPIPE_ENCAP: + if (val && val != PNPIPE_ENCAP_IP) { + err = -EINVAL; + break; + } + if (!pn->ifindex == !val) + break; /* Nothing to do! */ + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (val) { + release_sock(sk); + err = gprs_attach(sk); + if (err > 0) { + pn->ifindex = err; + err = 0; + } + } else { + pn->ifindex = 0; + release_sock(sk); + gprs_detach(sk); + err = 0; + } + goto out_norel; + default: + err = -ENOPROTOOPT; + } + release_sock(sk); + +out_norel: + return err; +} + +static int pep_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int len, val; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (get_user(len, optlen)) + return -EFAULT; + + switch (optname) { + case PNPIPE_ENCAP: + val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; + break; + case PNPIPE_IFINDEX: + val = pn->ifindex; + break; + default: + return -ENOPROTOOPT; + } + + len = min_t(unsigned int, sizeof(int), len); + if (put_user(len, optlen)) + return -EFAULT; + if (put_user(val, (int __user *) optval)) + return -EFAULT; + return 0; +} + +static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + + skb_push(skb, 3); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_DATA; + ph->pipe_handle = pn->pipe_handle; + if (pn_flow_safe(pn->tx_fc) && pn->tx_credits) + pn->tx_credits--; + + return pn_skb_send(sk, skb, &pipe_srv); +} + static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); struct sk_buff *skb = NULL; - struct pnpipehdr *ph; long timeo; int flags = msg->msg_flags; int err, done; @@ -801,16 +903,7 @@ disabled: if (err < 0) goto out; - __skb_push(skb, 3); - skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = PNS_PIPE_DATA; - ph->pipe_handle = pn->pipe_handle; - if (pn_flow_safe(pn->tx_fc)) /* credit-based flow control */ - pn->tx_credits--; - - err = pn_skb_send(sk, skb, &pipe_srv); + err = pipe_skb_send(sk, skb); if (err >= 0) err = len; /* success! */ skb = NULL; @@ -820,6 +913,50 @@ out: return err; } +int pep_writeable(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0; +} + +int pep_write(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *rskb, *fs; + int flen = 0; + + rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + if (!rskb) { + kfree_skb(skb); + return -ENOMEM; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb_reserve(rskb, MAX_PHONET_HEADER + 3); + return pipe_skb_send(sk, rskb); +} + +struct sk_buff *pep_read(struct sock *sk) +{ + struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + return skb; +} + static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) @@ -902,6 +1039,8 @@ static struct proto pep_proto = { .accept = pep_sock_accept, .ioctl = pep_ioctl, .init = pep_init, + .setsockopt = pep_setsockopt, + .getsockopt = pep_getsockopt, .sendmsg = pep_sendmsg, .recvmsg = pep_recvmsg, .backlog_rcv = pep_do_rcv, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index a9c3d1f2e9d..d81740187fb 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -342,11 +342,11 @@ const struct proto_ops phonet_stream_ops = { .ioctl = pn_socket_ioctl, .listen = pn_socket_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, #ifdef CONFIG_COMPAT - .compat_setsockopt = sock_no_setsockopt, - .compat_getsockopt = compat_sock_no_getsockopt, + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, #endif .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, -- cgit v1.2.3-70-g09d2 From 13c1d18931ebb5cf407cb348ef2cd6284d68902d Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Sun, 5 Oct 2008 13:33:42 -0700 Subject: xfrm: MIGRATE enhancements (draft-ebalard-mext-pfkey-enhanced-migrate) Provides implementation of the enhancements of XFRM/PF_KEY MIGRATE mechanism specified in draft-ebalard-mext-pfkey-enhanced-migrate-00. Defines associated PF_KEY SADB_X_EXT_KMADDRESS extension and XFRM/netlink XFRMA_KMADDRESS attribute. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 +++++++- include/linux/xfrm.h | 10 ++++++ include/net/xfrm.h | 15 +++++++-- net/key/af_key.c | 86 +++++++++++++++++++++++++++++++++++++++---------- net/xfrm/xfrm_policy.c | 5 +-- net/xfrm/xfrm_state.c | 5 +-- net/xfrm/xfrm_user.c | 57 +++++++++++++++++++++++++------- 7 files changed, 154 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 700725ddcaa..01b262959f2 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -226,6 +226,15 @@ struct sadb_x_sec_ctx { } __attribute__((packed)); /* sizeof(struct sadb_sec_ctx) = 8 */ +/* Used by MIGRATE to pass addresses IKE will use to perform + * negotiation with the peer */ +struct sadb_x_kmaddress { + uint16_t sadb_x_kmaddress_len; + uint16_t sadb_x_kmaddress_exttype; + uint32_t sadb_x_kmaddress_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_kmaddress) == 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -346,7 +355,9 @@ struct sadb_x_sec_ctx { #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 #define SADB_X_EXT_SEC_CTX 24 -#define SADB_EXT_MAX 24 +/* Used with MIGRATE to pass @ to IKE for negotiation */ +#define SADB_X_EXT_KMADDRESS 25 +#define SADB_EXT_MAX 25 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fb0c215a305..4bc1e6b86cb 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -279,6 +279,7 @@ enum xfrm_attr_type_t { XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -415,6 +416,15 @@ struct xfrm_user_report { struct xfrm_selector sel; }; +/* Used by MIGRATE to pass addresses IKE should use to perform + * SA negotiation with the peer */ +struct xfrm_user_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + __u32 reserved; + __u16 family; +}; + struct xfrm_user_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b98d2056f27..11c890ad8eb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -492,6 +492,13 @@ struct xfrm_policy struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; +struct xfrm_kmaddress { + xfrm_address_t local; + xfrm_address_t remote; + u32 reserved; + u16 family; +}; + struct xfrm_migrate { xfrm_address_t old_daddr; xfrm_address_t old_saddr; @@ -531,7 +538,7 @@ struct xfrm_mgr int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); - int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles); + int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1432,12 +1439,14 @@ extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles); + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k); extern struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m); extern struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m); extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles); + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k); #endif extern wait_queue_head_t km_waitq; diff --git a/net/key/af_key.c b/net/key/af_key.c index 7ae641df70b..362fe317e1f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -398,6 +398,7 @@ static u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), + [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -2384,24 +2385,21 @@ static int pfkey_sockaddr_pair_size(sa_family_t family) return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } -static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { - u8 *sa = (u8 *) (rq + 1); int af, socklen; - if (rq->sadb_x_ipsecrequest_len < - pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family)) + if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; - af = pfkey_sockaddr_extract((struct sockaddr *) sa, - saddr); + af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); - if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen), + if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; @@ -2421,7 +2419,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* old endoints */ - err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), + rq1->sadb_x_ipsecrequest_len, + &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; @@ -2434,7 +2434,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* new endpoints */ - err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), + rq2->sadb_x_ipsecrequest_len, + &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; @@ -2460,29 +2462,40 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; + struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress k; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], - ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } + kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; - if (!pol) { - err = -EINVAL; - goto out; - } if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } + if (kma) { + /* convert sadb_x_kmaddress to xfrm_kmaddress */ + k.reserved = kma->sadb_x_kmaddress_reserved; + ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), + 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), + &k.local, &k.remote, &k.family); + if (ret < 0) { + err = ret; + goto out; + } + } + dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); @@ -2527,7 +2540,8 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, goto out; } - return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, + kma ? &k : NULL); out: return err; @@ -3319,6 +3333,32 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, return 0; } + +static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +{ + struct sadb_x_kmaddress *kma; + u8 *sa; + int family = k->family; + int socklen = pfkey_sockaddr_len(family); + int size_req; + + size_req = (sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(family)); + + kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req); + memset(kma, 0, size_req); + kma->sadb_x_kmaddress_len = size_req / 8; + kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; + kma->sadb_x_kmaddress_reserved = k->reserved; + + sa = (u8 *)(kma + 1); + if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) + return -EINVAL; + + return 0; +} + static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, @@ -3351,7 +3391,8 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3368,6 +3409,12 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; + if (k != NULL) { + /* addresses for KM */ + size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(k->family)); + } + /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) @@ -3404,6 +3451,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; + /* Addresses to be used by KM for negotiation, if ext is available */ + if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) + return -EINVAL; + /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); @@ -3449,7 +3500,8 @@ err: } #else static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b7ec08025ff..832b47c1de8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2679,7 +2679,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) } int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2723,7 +2724,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 747fd8c291a..508337f9724 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1814,7 +1814,8 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; @@ -1823,7 +1824,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate); + ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 76f75df21e1..4a8a1abb59e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1710,12 +1710,23 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, struct nlattr **attrs, int *num) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); @@ -1745,6 +1756,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; @@ -1752,19 +1764,20 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + err = copy_from_user_policy_type(&type, attrs); if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, - attrs, &n); + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); if (err) return err; if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); return 0; } @@ -1795,16 +1808,30 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static inline size_t xfrm_migrate_msgsize(int num_migrate) +static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->local, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) - + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) - + userpolicy_type_attrsize(); + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_selector *sel, - u8 dir, u8 type) + int num_migrate, struct xfrm_kmaddress *k, + struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; @@ -1821,6 +1848,9 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; + if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) + goto nlmsg_failure; + if (copy_to_user_policy_type(type, skb) < 0) goto nlmsg_failure; @@ -1836,23 +1866,25 @@ nlmsg_failure: } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -1901,6 +1933,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, }; static struct xfrm_link { -- cgit v1.2.3-70-g09d2 From 554794de7949d1a6279336404c066f974d4c2bde Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 09:54:39 -0700 Subject: pkt_sched: Fix handling of gso skbs on requeuing Jay Cliburn noticed and diagnosed a bug triggered in dev_gso_skb_destructor() after last change from qdisc->gso_skb to qdisc->requeue list. Since gso_segmented skbs can't be queued to another list this patch brings back qdisc->gso_skb for them. Reported-by: Jay Cliburn Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/sched/sch_generic.c | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3b983e8a055..3fe49d80895 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -52,6 +52,7 @@ struct Qdisc u32 parent; atomic_t refcnt; unsigned long state; + struct sk_buff *gso_skb; struct sk_buff_head requeue; struct sk_buff_head q; struct netdev_queue *dev_queue; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e7e0bd38fe..3db4cf1bd26 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,7 +44,10 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - __skb_queue_head(&q->requeue, skb); + if (unlikely(skb->next)) + q->gso_skb = skb; + else + __skb_queue_head(&q->requeue, skb); __netif_schedule(q); return 0; @@ -52,7 +55,10 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb = skb_peek(&q->requeue); + struct sk_buff *skb = q->gso_skb; + + if (!skb) + skb = skb_peek(&q->requeue); if (unlikely(skb)) { struct net_device *dev = qdisc_dev(q); @@ -60,10 +66,15 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) - __skb_unlink(skb, &q->requeue); - else + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { + if (q->gso_skb) + q->gso_skb = NULL; + else + __skb_unlink(skb, &q->requeue); + } else { skb = NULL; + } } else { skb = q->dequeue(q); } @@ -548,6 +559,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); + kfree_skb(qdisc->gso_skb); __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); -- cgit v1.2.3-70-g09d2 From 76708dee382a69b2f9d0e50f413f99fefb2dc509 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 5 Oct 2008 18:02:48 +0200 Subject: mac80211: free up 2 bytes in skb->cb Free up 2 bytes in skb->cb to be used for multi-rate retry later. Move iv_len and icv_len initialization into key alloc. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/base.c | 2 +- drivers/net/wireless/ath9k/xmit.c | 2 +- drivers/net/wireless/b43/xmit.c | 4 ++-- drivers/net/wireless/b43legacy/xmit.c | 4 ++-- drivers/net/wireless/rt2x00/rt2x00crypto.c | 4 ++-- drivers/net/wireless/rt2x00/rt2x00queue.c | 7 +++++-- include/net/mac80211.h | 4 ++-- net/mac80211/key.c | 14 ++++++++++++++ net/mac80211/wep.c | 3 --- net/mac80211/wpa.c | 6 ------ 10 files changed, 29 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index c151588aa48..47be49acf55 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -1188,7 +1188,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) if (info->control.hw_key) { keyidx = info->control.hw_key->hw_key_idx; - pktlen += info->control.icv_len; + pktlen += info->control.hw_key->icv_len; } ret = ah->ah_setup_tx_desc(ah, ds, pktlen, ieee80211_get_hdrlen_from_skb(skb), AR5K_PKT_TYPE_NORMAL, diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index bdcb9e1799c..3a4757942b3 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -237,7 +237,7 @@ static int ath_tx_prepare(struct ath_softc *sc, if (tx_info->control.hw_key) { txctl->keyix = tx_info->control.hw_key->hw_key_idx; - txctl->frmlen += tx_info->control.icv_len; + txctl->frmlen += tx_info->control.hw_key->icv_len; if (tx_info->control.hw_key->alg == ALG_WEP) txctl->keytype = ATH9K_KEY_TYPE_WEP; diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index 5e0b71c3ad0..e0749c0074c 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -252,7 +252,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, } /* Hardware appends ICV. */ - plcp_fragment_len += info->control.icv_len; + plcp_fragment_len += info->control.hw_key->icv_len; key_idx = b43_kidx_to_fw(dev, key_idx); mac_ctl |= (key_idx << B43_TXH_MAC_KEYIDX_SHIFT) & @@ -260,7 +260,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, mac_ctl |= (key->algorithm << B43_TXH_MAC_KEYALG_SHIFT) & B43_TXH_MAC_KEYALG; wlhdr_len = ieee80211_hdrlen(fctl); - iv_len = min((size_t) info->control.iv_len, + iv_len = min((size_t) info->control.hw_key->iv_len, ARRAY_SIZE(txhdr->iv)); memcpy(txhdr->iv, ((u8 *) wlhdr) + wlhdr_len, iv_len); } diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c index 6835064758f..a894169411c 100644 --- a/drivers/net/wireless/b43legacy/xmit.c +++ b/drivers/net/wireless/b43legacy/xmit.c @@ -243,7 +243,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, if (key->enabled) { /* Hardware appends ICV. */ - plcp_fragment_len += info->control.icv_len; + plcp_fragment_len += info->control.hw_key->icv_len; key_idx = b43legacy_kidx_to_fw(dev, key_idx); mac_ctl |= (key_idx << B43legacy_TX4_MAC_KEYIDX_SHIFT) & @@ -252,7 +252,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, B43legacy_TX4_MAC_KEYALG_SHIFT) & B43legacy_TX4_MAC_KEYALG; wlhdr_len = ieee80211_hdrlen(wlhdr->frame_control); - iv_len = min((size_t)info->control.iv_len, + iv_len = min((size_t)info->control.hw_key->iv_len, ARRAY_SIZE(txhdr->iv)); memcpy(txhdr->iv, ((u8 *)wlhdr) + wlhdr_len, iv_len); } else { diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index e1448cfa944..5a858e5106c 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -56,10 +56,10 @@ unsigned int rt2x00crypto_tx_overhead(struct ieee80211_tx_info *tx_info) * note that these lengths should only be added when * mac80211 does not generate it. */ - overhead += tx_info->control.icv_len; + overhead += key->icv_len; if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_IV)) - overhead += tx_info->control.iv_len; + overhead += key->iv_len; if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { if (key->alg == ALG_TKIP) diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index b7f4fe8fba6..1676ac48479 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -374,7 +374,7 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb) struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX); struct txentry_desc txdesc; struct skb_frame_desc *skbdesc; - unsigned int iv_len = IEEE80211_SKB_CB(skb)->control.iv_len; + unsigned int iv_len; if (unlikely(rt2x00queue_full(queue))) return -EINVAL; @@ -410,8 +410,11 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb) * the frame so we can provide it to the driver seperately. */ if (test_bit(ENTRY_TXD_ENCRYPT, &txdesc.flags) && - !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags)) + !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags) && + (IEEE80211_SKB_CB(skb)->control.hw_key != NULL)) { + iv_len = IEEE80211_SKB_CB(skb)->control.hw_key->iv_len; rt2x00crypto_tx_remove_iv(skb, iv_len); + } /* * It could be possible that the queue was corrupted and this diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f5f5b1ff158..feb3be81dfa 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -337,8 +337,6 @@ struct ieee80211_tx_info { unsigned long jiffies; s8 rts_cts_rate_idx, alt_retry_rate_idx; u8 retry_limit; - u8 icv_len; - u8 iv_len; } control; struct { u64 ampdu_ack_map; @@ -635,6 +633,8 @@ enum ieee80211_key_flags { */ struct ieee80211_key_conf { enum ieee80211_key_alg alg; + u8 icv_len; + u8 iv_len; u8 hw_key_idx; u8 flags; s8 keyidx; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 57afcd38cd9..a5b06fe7198 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -281,6 +281,20 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.alg = alg; key->conf.keyidx = idx; key->conf.keylen = key_len; + switch (alg) { + case ALG_WEP: + key->conf.iv_len = WEP_IV_LEN; + key->conf.icv_len = WEP_ICV_LEN; + break; + case ALG_TKIP: + key->conf.iv_len = TKIP_IV_LEN; + key->conf.icv_len = TKIP_ICV_LEN; + break; + case ALG_CCMP: + key->conf.iv_len = CCMP_HDR_LEN; + key->conf.icv_len = CCMP_MIC_LEN; + break; + } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); INIT_LIST_HEAD(&key->todo); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 376c84987e4..f0e2d3ecb5c 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -313,9 +313,6 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - info->control.iv_len = WEP_IV_LEN; - info->control.icv_len = WEP_ICV_LEN; - if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) return -1; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 37ae9a959f6..6db649480e8 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -152,9 +152,6 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) int len, tail; u8 *pos; - info->control.icv_len = TKIP_ICV_LEN; - info->control.iv_len = TKIP_IV_LEN; - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { /* hwaccel - with no need for preallocated room for IV/ICV */ @@ -374,9 +371,6 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) u8 *pos, *pn; int i; - info->control.icv_len = CCMP_MIC_LEN; - info->control.iv_len = CCMP_HDR_LEN; - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { /* hwaccel - with no need for preallocated room for CCMP " -- cgit v1.2.3-70-g09d2 From 870abdf67170daa9f1022e55a35c469239fcc74c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 5 Oct 2008 18:04:24 +0200 Subject: mac80211: add multi-rate retry support This patch adjusts the rate control API to allow multi-rate retry if supported by the driver. The ieee80211_hw struct specifies how many alternate rate selections the driver supports. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- drivers/net/wireless/b43/main.c | 1 + drivers/net/wireless/b43/xmit.c | 2 +- drivers/net/wireless/b43legacy/main.c | 1 + drivers/net/wireless/b43legacy/xmit.c | 2 +- drivers/net/wireless/rtl8180_dev.c | 5 +++-- include/net/mac80211.h | 29 +++++++++++++++++++++++++---- net/mac80211/tx.c | 13 +++++++------ 7 files changed, 39 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 3bf74e236ab..14c44df584d 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4588,6 +4588,7 @@ static int b43_wireless_init(struct ssb_device *dev) BIT(NL80211_IFTYPE_ADHOC); hw->queues = b43_modparam_qos ? 4 : 1; + hw->max_altrates = 1; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac); diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index e0749c0074c..2fabcf8f047 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -208,7 +208,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, txrate = ieee80211_get_tx_rate(dev->wl->hw, info); rate = txrate ? txrate->hw_value : B43_CCK_RATE_1MB; rate_ofdm = b43_is_ofdm_rate(rate); - fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, info) ? : txrate; + fbrate = ieee80211_get_alt_retry_rate(dev->wl->hw, info, 0) ? : txrate; rate_fb = fbrate->hw_value; rate_fb_ofdm = b43_is_ofdm_rate(rate_fb); diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 9fb1421cbec..c66d57560e7 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3710,6 +3710,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev) BIT(NL80211_IFTYPE_WDS) | BIT(NL80211_IFTYPE_ADHOC); hw->queues = 1; /* FIXME: hardware has more queues */ + hw->max_altrates = 1; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac); diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c index a894169411c..65e83378160 100644 --- a/drivers/net/wireless/b43legacy/xmit.c +++ b/drivers/net/wireless/b43legacy/xmit.c @@ -210,7 +210,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, rate = tx_rate->hw_value; rate_ofdm = b43legacy_is_ofdm_rate(rate); - rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, info) ? : tx_rate; + rate_fb = ieee80211_get_alt_retry_rate(dev->wl->hw, info, 0) ? : tx_rate; rate_fb_ofdm = b43legacy_is_ofdm_rate(rate_fb->hw_value); txhdr->mac_frame_ctl = wlhdr->frame_control; diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c index abcd641c54b..df7e78ee8a8 100644 --- a/drivers/net/wireless/rtl8180_dev.c +++ b/drivers/net/wireless/rtl8180_dev.c @@ -292,8 +292,8 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb) entry->plcp_len = cpu_to_le16(plcp_len); entry->tx_buf = cpu_to_le32(mapping); entry->frame_len = cpu_to_le32(skb->len); - entry->flags2 = info->control.alt_retry_rate_idx >= 0 ? - ieee80211_get_alt_retry_rate(dev, info)->bitrate << 4 : 0; + entry->flags2 = info->control.retries[0].rate_idx >= 0 ? + ieee80211_get_alt_retry_rate(dev, info, 0)->bitrate << 4 : 0; entry->retry_limit = info->control.retry_limit; entry->flags = cpu_to_le32(tx_flags); __skb_queue_tail(&ring->queue, skb); @@ -855,6 +855,7 @@ static int __devinit rtl8180_probe(struct pci_dev *pdev, priv = dev->priv; priv->pdev = pdev; + dev->max_altrates = 1; SET_IEEE80211_DEV(dev, &pdev->dev); pci_set_drvdata(pdev, dev); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index feb3be81dfa..5617a1613c9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -292,6 +292,20 @@ enum mac80211_tx_control_flags { #define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \ (IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)) +/* maximum number of alternate rate retry stages */ +#define IEEE80211_TX_MAX_ALTRATE 3 + +/** + * struct ieee80211_tx_altrate - alternate rate selection/status + * + * @rate_idx: rate index to attempt to send with + * @limit: number of retries before fallback + */ +struct ieee80211_tx_altrate { + s8 rate_idx; + u8 limit; +}; + /** * struct ieee80211_tx_info - skb transmit information * @@ -335,12 +349,14 @@ struct ieee80211_tx_info { struct ieee80211_key_conf *hw_key; struct ieee80211_sta *sta; unsigned long jiffies; - s8 rts_cts_rate_idx, alt_retry_rate_idx; + s8 rts_cts_rate_idx; u8 retry_limit; + struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE]; } control; struct { u64 ampdu_ack_map; int ack_signal; + struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE + 1]; u8 retry_count; bool excessive_retries; u8 ampdu_ack_len; @@ -828,6 +844,9 @@ enum ieee80211_hw_flags { * within &struct ieee80211_vif. * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. + * + * @max_altrates: maximum number of alternate rate retry stages + * @max_altrate_tries: maximum number of tries for each stage */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -844,6 +863,8 @@ struct ieee80211_hw { u16 ampdu_queues; u16 max_listen_interval; s8 max_signal; + u8 max_altrates; + u8 max_altrate_tries; }; struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy); @@ -900,11 +921,11 @@ ieee80211_get_rts_cts_rate(const struct ieee80211_hw *hw, static inline struct ieee80211_rate * ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, - const struct ieee80211_tx_info *c) + const struct ieee80211_tx_info *c, int idx) { - if (c->control.alt_retry_rate_idx < 0) + if (c->control.retries[idx].rate_idx < 0) return NULL; - return &hw->wiphy->bands[c->band]->bitrates[c->control.alt_retry_rate_idx]; + return &hw->wiphy->bands[c->band]->bitrates[c->control.retries[idx].rate_idx]; } /** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d7153bbcdb2..1460537faf3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -454,15 +454,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (unlikely(rsel.probe_idx >= 0)) { info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - info->control.alt_retry_rate_idx = tx->rate_idx; + info->control.retries[0].rate_idx = tx->rate_idx; + info->control.retries[0].limit = tx->local->hw.max_altrate_tries; tx->rate_idx = rsel.probe_idx; - } else - info->control.alt_retry_rate_idx = -1; + } else if (info->control.retries[0].limit == 0) + info->control.retries[0].rate_idx = -1; if (unlikely(tx->rate_idx < 0)) return TX_DROP; } else - info->control.alt_retry_rate_idx = -1; + info->control.retries[0].rate_idx = -1; if (tx->sdata->bss_conf.use_cts_prot && (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { @@ -521,7 +522,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) * frames. * TODO: The last fragment could still use multiple retry * rates. */ - info->control.alt_retry_rate_idx = -1; + info->control.retries[0].rate_idx = -1; } /* Use CTS protection for unicast frames sent using extended rates if @@ -551,7 +552,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) int idx; /* Do not use multiple retry rates when using RTS/CTS */ - info->control.alt_retry_rate_idx = -1; + info->control.retries[0].rate_idx = -1; /* Use min(data rate, max base rate) as CTS/RTS rate */ rate = &sband->bitrates[tx->rate_idx]; -- cgit v1.2.3-70-g09d2 From 9a1f27c48065ce713eb47f2fd475b717e63ef239 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Oct 2008 11:41:57 -0700 Subject: inet_hashtables: Add inet_lookup_skb helpers To be able to use the cached socket reference in the skb during input processing we add a new set of lookup functions that receive the skb on their argument list. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: KOVACS Krisztian Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 11 +++++++++++ include/net/inet_hashtables.h | 14 ++++++++++++++ net/dccp/ipv4.c | 5 ++--- net/dccp/ipv6.c | 6 ++---- net/ipv4/tcp_ipv4.c | 3 +-- net/ipv6/tcp_ipv6.c | 6 +----- 6 files changed, 31 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index e48989f04c2..995efbb031a 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -91,6 +91,17 @@ static inline struct sock *__inet6_lookup(struct net *net, return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif); } +static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, + struct sk_buff *skb, + const __be16 sport, + const __be16 dport) +{ + return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); +} + extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index bb619d80f2e..3522bbcd546 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include @@ -371,6 +373,18 @@ static inline struct sock *inet_lookup(struct net *net, return sk; } +static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, + struct sk_buff *skb, + const __be16 sport, + const __be16 dport) +{ + const struct iphdr *iph = ip_hdr(skb); + + return __inet_lookup(dev_net(skb->dst->dev), hashinfo, + iph->saddr, sport, + iph->daddr, dport, inet_iif(skb)); +} + extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 882c5c4de69..e3dfddab21c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -811,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, inet_iif(skb)); + sk = __inet_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5e1ee0da2c4..caa7f346962 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -805,10 +805,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet6_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - &ipv6_hdr(skb)->saddr, dh->dccph_sport, - &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + sk = __inet6_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8b24bd833cb..24ffc5e1d3d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1577,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr, - th->source, iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index df16b68644e..6268d266c03 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1681,11 +1681,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(net, &tcp_hashinfo, - &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), - inet6_iif(skb)); - + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; -- cgit v1.2.3-70-g09d2 From 23542618deb77cfed312842fe8c41ed19fb16470 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Tue, 7 Oct 2008 12:41:01 -0700 Subject: inet: Don't lookup the socket if there's a socket attached to the skb Use the socket cached in the skb if it's present. Signed-off-by: KOVACS Krisztian Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 12 ++++++++---- include/net/inet_hashtables.h | 10 +++++++--- include/net/sock.h | 12 ++++++++++++ net/ipv4/udp.c | 10 +++++++--- net/ipv6/udp.c | 10 +++++++--- 5 files changed, 41 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 995efbb031a..f74665d7bea 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -96,10 +96,14 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + struct sock *sk; + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else return __inet6_lookup(dev_net(skb->dst->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); } extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3522bbcd546..5cc182f9eca 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -378,11 +378,15 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { + struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - return __inet_lookup(dev_net(skb->dst->dev), hashinfo, - iph->saddr, sport, - iph->daddr, dport, inet_iif(skb)); + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __inet_lookup(dev_net(skb->dst->dev), hashinfo, + iph->saddr, sport, + iph->daddr, dport, inet_iif(skb)); } extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, diff --git a/include/net/sock.h b/include/net/sock.h index 75a312d3888..18f96708f3a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1324,6 +1324,18 @@ static inline void sk_change_net(struct sock *sk, struct net *net) sock_net_set(sk, hold_net(net)); } +static inline struct sock *skb_steal_sock(struct sk_buff *skb) +{ + if (unlikely(skb->sk)) { + struct sock *sk = skb->sk; + + skb->destructor = NULL; + skb->sk = NULL; + return sk; + } + return NULL; +} + extern void sock_enable_timestamp(struct sock *sk); extern int sock_get_timestamp(struct sock *, struct timeval __user *); extern int sock_get_timestampns(struct sock *, struct timespec __user *); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c7a90b546b2..822c9deac83 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -306,11 +306,15 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct hlist_head udptable[]) { + struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ce26c41d157..e51da8c092f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,11 +111,15 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct hlist_head udptable[]) { + struct sock *sk; struct ipv6hdr *iph = ipv6_hdr(skb); - return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport, - &iph->daddr, dport, inet6_iif(skb), - udptable); + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + udptable); } /* -- cgit v1.2.3-70-g09d2 From c57943a1c96214ee68f3890bb6772841ffbfd606 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Oct 2008 14:18:42 -0700 Subject: net: wrap sk->sk_backlog_rcv() Wrap calling sk->sk_backlog_rcv() in a function. This will allow extending the generic sk_backlog_rcv behaviour. Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ include/net/tcp.h | 2 +- net/core/sock.c | 4 ++-- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_timer.c | 2 +- 5 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 18f96708f3a..ada50c04d09 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -482,6 +482,11 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + return sk->sk_backlog_rcv(sk, skb); +} + #define sk_wait_event(__sk, __timeo, __condition) \ ({ int __rc; \ release_sock(__sk); \ diff --git a/include/net/tcp.h b/include/net/tcp.h index f6cc3414315..438014d5761 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -896,7 +896,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) BUG_ON(sock_owned_by_user(sk)); while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { - sk->sk_backlog_rcv(sk, skb1); + sk_backlog_rcv(sk, skb1); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED); } diff --git a/net/core/sock.c b/net/core/sock.c index 2d358dd8a03..5e2a3132a8c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,7 +327,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) */ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); - rc = sk->sk_backlog_rcv(sk, skb); + rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); } else @@ -1374,7 +1374,7 @@ static void __release_sock(struct sock *sk) struct sk_buff *next = skb->next; skb->next = NULL; - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); /* * We are in process context here with softirqs diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7d81a1ee550..7d3fe571d15 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1161,7 +1161,7 @@ static void tcp_prequeue_process(struct sock *sk) * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5ab6ba19c3c..6b6dff1164b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -201,7 +201,7 @@ static void tcp_delack_timer(unsigned long data) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); tp->ucopy.memory = 0; } -- cgit v1.2.3-70-g09d2 From 835bcc0497e18f54153ac9e32b598dd8ffb7aa66 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:45:55 -0700 Subject: netns: move /proc/net/dev_snmp6 to struct net Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/mib.h | 4 ++++ net/ipv6/proc.c | 20 +++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 44914760464..ffcef5de3d1 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -11,6 +11,10 @@ struct netns_mib { DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics); DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct proc_dir_entry *proc_net_devsnmp6; +#endif }; #endif diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 0179b66864f..16ebf85d4ad 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -29,8 +29,6 @@ #include #include -static struct proc_dir_entry *proc_net_devsnmp6; - static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -210,18 +208,20 @@ static const struct file_operations snmp6_seq_fops = { int snmp6_register_dev(struct inet6_dev *idev) { struct proc_dir_entry *p; + struct net *net; if (!idev || !idev->dev) return -EINVAL; - if (!net_eq(dev_net(idev->dev), &init_net)) + net = dev_net(idev->dev); + if (!net_eq(net, &init_net)) return 0; - if (!proc_net_devsnmp6) + if (!net->mib.proc_net_devsnmp6) return -ENOENT; p = proc_create_data(idev->dev->name, S_IRUGO, - proc_net_devsnmp6, &snmp6_seq_fops, idev); + net->mib.proc_net_devsnmp6, &snmp6_seq_fops, idev); if (!p) return -ENOMEM; @@ -231,12 +231,13 @@ int snmp6_register_dev(struct inet6_dev *idev) int snmp6_unregister_dev(struct inet6_dev *idev) { - if (!proc_net_devsnmp6) + struct net *net = dev_net(idev->dev); + if (!net->mib.proc_net_devsnmp6) return -ENOENT; if (!idev || !idev->stats.proc_dir_entry) return -EINVAL; remove_proc_entry(idev->stats.proc_dir_entry->name, - proc_net_devsnmp6); + net->mib.proc_net_devsnmp6); idev->stats.proc_dir_entry = NULL; return 0; } @@ -269,8 +270,9 @@ int __init ipv6_misc_proc_init(void) if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); - if (!proc_net_devsnmp6) + init_net.mib.proc_net_devsnmp6 = + proc_mkdir("dev_snmp6", init_net.proc_net); + if (!init_net.mib.proc_net_devsnmp6) goto proc_dev_snmp6_fail; out: return rc; -- cgit v1.2.3-70-g09d2 From 0c7ed677fb7013c8028045d409a48ac42151187a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:49:36 -0700 Subject: netns: make udpv6 mib per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/mib.h | 1 + include/net/udp.h | 12 ++++++------ net/ipv4/udp.c | 3 --- net/ipv6/af_inet6.c | 9 ++++----- net/ipv6/proc.c | 3 ++- 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index ffcef5de3d1..ba622b24b84 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -14,6 +14,7 @@ struct netns_mib { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct proc_dir_entry *proc_net_devsnmp6; + DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); #endif }; diff --git a/include/net/udp.h b/include/net/udp.h index d38f6f2419f..72f28c3ddaa 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -152,8 +152,6 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); -DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); - /* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); @@ -167,12 +165,14 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, field); \ else SNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) -#define UDP6_INC_STATS_BH(net, field, is_udplite) do { (void)net; \ +#define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field); \ - else SNMP_INC_STATS_BH(udp_stats_in6, field); } while(0) -#define UDP6_INC_STATS_USER(net, field, is_udplite) do { (void)net; \ + else SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ +} while(0) +#define UDP6_INC_STATS_USER(net, field, is_udplite) do { \ if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ - else SNMP_INC_STATS_USER(udp_stats_in6, field); } while(0) + else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ +} while(0) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #define UDPX_INC_STATS_BH(sk, field) \ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 822c9deac83..85f8e8e10b1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -108,9 +108,6 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -EXPORT_SYMBOL(udp_stats_in6); - struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e8f82eca160..e09139122ef 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -806,16 +806,12 @@ static int __init init_ipv6_mibs(void) if (snmp_mib_init((void **)icmpv6msg_statistics, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) - goto err_udp_mib; if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; return 0; err_udplite_mib: - snmp_mib_free((void **)udp_stats_in6); -err_udp_mib: snmp_mib_free((void **)icmpv6msg_statistics); err_icmpmsg_mib: snmp_mib_free((void **)icmpv6_statistics); @@ -831,17 +827,20 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)ipv6_statistics); snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); - snmp_mib_free((void **)udp_stats_in6); snmp_mib_free((void **)udplite_stats_in6); } static int __net_init ipv6_init_mibs(struct net *net) { + if (snmp_mib_init((void **)net->mib.udp_stats_in6, + sizeof (struct udp_mib)) < 0) + return -ENOMEM; return 0; } static void __net_exit ipv6_cleanup_mibs(struct net *net) { + snmp_mib_free((void **)net->mib.udp_stats_in6); } static int inet6_net_init(struct net *net) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 23e567fb1d3..3eaf20bf998 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -180,7 +180,8 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); - snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); + snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6, + snmp6_udp6_list); snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); return 0; } -- cgit v1.2.3-70-g09d2 From be713a443ee019489890e93654557916fbf72612 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 7 Oct 2008 14:50:06 -0700 Subject: netns: make uplitev6 mib per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/netns/mib.h | 1 + include/net/udp.h | 11 ++++------- net/ipv6/af_inet6.c | 14 ++++++++------ net/ipv6/proc.c | 3 ++- net/ipv6/udplite.c | 2 -- 5 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index ba622b24b84..4e58f0519ce 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -15,6 +15,7 @@ struct netns_mib { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct proc_dir_entry *proc_net_devsnmp6; DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); + DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6); #endif }; diff --git a/include/net/udp.h b/include/net/udp.h index 72f28c3ddaa..1e205095ea6 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -152,9 +152,6 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); -/* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ -DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); - /* * SNMP statistics for UDP and UDP-Lite */ @@ -166,12 +163,12 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); else SNMP_INC_STATS_BH((net)->mib.udp_statistics, field); } while(0) #define UDP6_INC_STATS_BH(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field); \ + if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\ else SNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field); \ } while(0) -#define UDP6_INC_STATS_USER(net, field, is_udplite) do { \ - if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ - else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ +#define UDP6_INC_STATS_USER(net, field, __lite) do { \ + if (__lite) SNMP_INC_STATS_USER((net)->mib.udplite_stats_in6, field); \ + else SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field); \ } while(0) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e09139122ef..127b240d2d8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -806,13 +806,8 @@ static int __init init_ipv6_mibs(void) if (snmp_mib_init((void **)icmpv6msg_statistics, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udplite_stats_in6, - sizeof (struct udp_mib)) < 0) - goto err_udplite_mib; return 0; -err_udplite_mib: - snmp_mib_free((void **)icmpv6msg_statistics); err_icmpmsg_mib: snmp_mib_free((void **)icmpv6_statistics); err_icmp_mib: @@ -827,7 +822,6 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)ipv6_statistics); snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); - snmp_mib_free((void **)udplite_stats_in6); } static int __net_init ipv6_init_mibs(struct net *net) @@ -835,12 +829,20 @@ static int __net_init ipv6_init_mibs(struct net *net) if (snmp_mib_init((void **)net->mib.udp_stats_in6, sizeof (struct udp_mib)) < 0) return -ENOMEM; + if (snmp_mib_init((void **)net->mib.udplite_stats_in6, + sizeof (struct udp_mib)) < 0) + goto err_udplite_mib; return 0; + +err_udplite_mib: + snmp_mib_free((void **)net->mib.udp_stats_in6); + return -ENOMEM; } static void __net_exit ipv6_cleanup_mibs(struct net *net) { snmp_mib_free((void **)net->mib.udp_stats_in6); + snmp_mib_free((void **)net->mib.udplite_stats_in6); } static int inet6_net_init(struct net *net) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 3eaf20bf998..c78cf754ef3 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -182,7 +182,8 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); + snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6, + snmp6_udplite6_list); return 0; } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f6cdcb348e0..3cd1a1ac3d6 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -13,8 +13,6 @@ */ #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; - static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); -- cgit v1.2.3-70-g09d2 From 76108cea065cda58366d16a7eb6ca90d717a1396 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 8 Oct 2008 11:35:00 +0200 Subject: netfilter: Use unsigned types for hooknum and pf vars and (try to) consistently use u_int8_t for the L3 family. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 30 ++++++++-------- include/linux/netfilter/x_tables.h | 28 +++++++-------- include/net/netfilter/nf_conntrack_core.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 2 +- include/net/netfilter/nf_conntrack_l4proto.h | 4 +-- include/net/netfilter/nf_log.h | 8 ++--- include/net/netfilter/nf_queue.h | 6 ++-- net/bridge/br_netfilter.c | 4 +-- net/bridge/netfilter/ebt_log.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 2 +- net/ipv4/netfilter/ipt_LOG.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 4 +-- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 +-- net/netfilter/core.c | 4 +-- net/netfilter/nf_conntrack_core.c | 6 ++-- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 3 +- net/netfilter/nf_conntrack_proto_dccp.c | 4 +-- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 6 ++-- net/netfilter/nf_conntrack_proto_udp.c | 4 +-- net/netfilter/nf_conntrack_proto_udplite.c | 4 +-- net/netfilter/nf_internals.h | 4 +-- net/netfilter/nf_log.c | 6 ++-- net/netfilter/nf_queue.c | 10 +++--- net/netfilter/nf_sockopt.c | 15 ++++---- net/netfilter/nfnetlink_log.c | 4 +-- net/netfilter/x_tables.c | 47 ++++++++++++++------------ net/netfilter/xt_connlimit.c | 2 +- net/netfilter/xt_conntrack.c | 8 ++--- net/netfilter/xt_hashlimit.c | 11 +++--- 35 files changed, 127 insertions(+), 121 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0c5eb7ed8b3..8c83d2e23bd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -92,8 +92,8 @@ struct nf_hook_ops /* User fills in from here down. */ nf_hookfn *hook; struct module *owner; - int pf; - int hooknum; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; @@ -102,7 +102,7 @@ struct nf_sockopt_ops { struct list_head list; - int pf; + u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; @@ -140,7 +140,7 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -151,7 +151,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -167,7 +167,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -212,14 +212,14 @@ __ret;}) NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) /* Call setsockopt() */ -int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, +int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -int compat_nf_setsockopt(struct sock *sk, int pf, int optval, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int len); -int compat_nf_getsockopt(struct sock *sk, int pf, int optval, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); /* Call this before modifying an existing packet: ensures it is @@ -292,7 +292,7 @@ extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #ifdef CONFIG_NF_NAT_NEEDED void (*decodefn)(struct sk_buff *, struct flowi *); @@ -315,7 +315,7 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) -static inline int nf_hook_thresh(int pf, unsigned int hook, +static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, @@ -324,7 +324,7 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, { return okfn(skb); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { @@ -332,7 +332,9 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, } struct flowi; static inline void -nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) +{ +} #endif /*CONFIG_NETFILTER*/ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2326296b6f2..6989b22716e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -292,7 +292,7 @@ struct xt_table /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - int af; /* address/protocol family */ + u_int8_t af; /* address/protocol family */ }; #include @@ -346,19 +346,19 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table, struct xt_table_info *newinfo, int *error); -extern struct xt_match *xt_find_match(int af, const char *name, u8 revision); -extern struct xt_target *xt_find_target(int af, const char *name, u8 revision); -extern struct xt_target *xt_request_find_target(int af, const char *name, +extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision); +extern struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision); -extern int xt_find_revision(int af, const char *name, u8 revision, int target, - int *err); +extern int xt_find_revision(u8 af, const char *name, u8 revision, + int target, int *err); -extern struct xt_table *xt_find_table_lock(struct net *net, int af, +extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name); extern void xt_table_unlock(struct xt_table *t); -extern int xt_proto_init(struct net *net, int af); -extern void xt_proto_fini(struct net *net, int af); +extern int xt_proto_init(struct net *net, u_int8_t af); +extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); @@ -423,12 +423,12 @@ struct compat_xt_counters_info #define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \ & ~(__alignof__(struct compat_xt_counters)-1)) -extern void xt_compat_lock(int af); -extern void xt_compat_unlock(int af); +extern void xt_compat_lock(u_int8_t af); +extern void xt_compat_unlock(u_int8_t af); -extern int xt_compat_add_offset(int af, unsigned int offset, short delta); -extern void xt_compat_flush_offsets(int af); -extern short xt_compat_calc_jump(int af, unsigned int offset); +extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); +extern void xt_compat_flush_offsets(u_int8_t af); +extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index a8177121093..05760d6a706 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,7 +20,7 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -extern unsigned int nf_conntrack_in(int pf, +extern unsigned int nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index dfdf4b45947..4c4d894cb9b 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -86,7 +86,7 @@ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); -void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, int, +void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, const union nf_inet_addr *, const union nf_inet_addr *, u_int8_t, const __be16 *, const __be16 *); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 723df9d1cc3..d4376e97bae 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -39,7 +39,7 @@ struct nf_conntrack_l4proto const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum); /* Called when a new connection for this protocol found; @@ -52,7 +52,7 @@ struct nf_conntrack_l4proto int (*error)(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, unsigned int hooknum); + u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ int (*print_tuple)(struct seq_file *s, diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 8c6b5ae4553..7182c06974f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -28,7 +28,7 @@ struct nf_loginfo { } u; }; -typedef void nf_logfn(unsigned int pf, +typedef void nf_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -43,12 +43,12 @@ struct nf_logger { }; /* Function to register/unregister log function. */ -int nf_log_register(int pf, const struct nf_logger *logger); +int nf_log_register(u_int8_t pf, const struct nf_logger *logger); void nf_log_unregister(const struct nf_logger *logger); -void nf_log_unregister_pf(int pf); +void nf_log_unregister_pf(u_int8_t pf); /* Calls the registered backend logging function */ -void nf_log_packet(int pf, +void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index d030044e923..252fd1010b7 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -8,7 +8,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_ops *elem; - int pf; + u_int8_t pf; unsigned int hook; struct net_device *indev; struct net_device *outdev; @@ -24,9 +24,9 @@ struct nf_queue_handler { char *name; }; -extern int nf_register_queue_handler(int pf, +extern int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(int pf, +extern int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh); extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh); extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6a9a6cd74b1..a4abed5b4c4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -657,7 +657,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge; struct net_device *parent; - int pf; + u_int8_t pf; if (!skb->nf_bridge) return NF_ACCEPT; @@ -791,7 +791,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); - int pf; + u_int8_t pf; #ifdef CONFIG_NETFILTER_DEBUG /* Be very paranoid. This probably won't happen anymore, but let's diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 2f430d4ae91..3770cd8a7b3 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -84,7 +84,7 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) #define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void -ebt_log_packet(unsigned int pf, unsigned int hooknum, +ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2d4c9ef909f..c84bda61afb 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -223,7 +223,7 @@ alloc_failure: } /* this function is registered with the netfilter core */ -static void ebt_log_packet(unsigned int pf, unsigned int hooknum, +static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, const char *prefix) diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 0af14137137..9330ba3577e 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ipt_log_packet(unsigned int pf, +ipt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b192756c6d0..d8241e6b077 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -292,7 +292,7 @@ ulog_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static void ipt_logfn(unsigned int pf, +static void ipt_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 97791048fa9..da8edcdaef3 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -173,7 +173,7 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int icmp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 3a2316974f8..0716f8afa0b 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -385,7 +385,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ip6t_log_packet(unsigned int pf, +ip6t_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 14d47d83354..5756f30ebc6 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -81,7 +81,7 @@ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -173,7 +173,7 @@ icmpv6_error_message(struct sk_buff *skb, static int icmpv6_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 292fa28146f..26b8f489d7a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -113,7 +113,7 @@ EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - int hook, + unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, @@ -155,7 +155,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9d1830da8e8..6aaf64b5ded 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -665,7 +665,7 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) +nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -683,7 +683,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) } /* rcu_read_lock()ed by nf_hook_slow */ - l3proto = __nf_ct_l3proto_find((u_int16_t)pf); + l3proto = __nf_ct_l3proto_find(pf); ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { @@ -693,7 +693,7 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) return -ret; } - l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum); + l4proto = __nf_ct_l4proto_find(pf, protonum); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e8f0dead267..990fa12f2ee 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -241,7 +241,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, - int family, + u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 2f83c158934..5dc0478108a 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -709,7 +709,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ static int callforward_do_filter(const union nf_inet_addr *src, - const union nf_inet_addr *dst, int family) + const union nf_inet_addr *dst, + u_int8_t family) { const struct nf_afinfo *afinfo; struct flowi fl1, fl2; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index e7866dd3cde..edc30358dc1 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -461,7 +461,7 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh) static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, unsigned int hooknum) + u_int8_t pf, unsigned int hooknum) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; @@ -546,7 +546,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, } static int dccp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e31b0e7bd0b..dbe680af85d 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -45,7 +45,7 @@ static int packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9bd03967fea..c5a78220fa3 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -219,7 +219,7 @@ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is a GRE connection. diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 30aa5b94a77..b5a90596d3f 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -287,7 +287,7 @@ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { enum sctp_conntrack new_state, old_state; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 6f61261888e..539a8202025 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -486,7 +486,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - int pf) + u_int8_t pf) { struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; @@ -749,7 +749,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = static int tcp_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { const struct tcphdr *th; @@ -804,7 +804,7 @@ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { struct nf_conntrack_tuple *tuple; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8b21762e65d..2a965c4a0ea 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -66,7 +66,7 @@ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is some kind of UDP @@ -91,7 +91,7 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, static int udp_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 1fa62f3c24f..4fb6c8d83a8 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -65,7 +65,7 @@ static int udplite_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is some kind of UDP @@ -91,7 +91,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, static int udplite_error(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 196269c1e58..bf6609978af 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -15,7 +15,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - int hook, + unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, @@ -25,7 +25,7 @@ extern unsigned int nf_iterate(struct list_head *head, /* nf_queue.c */ extern int nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9fda6ee95a3..5c2f7332015 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -20,7 +20,7 @@ static DEFINE_MUTEX(nf_log_mutex); /* return EBUSY if somebody else is registered, EEXIST if the same logger * is registred, 0 on success. */ -int nf_log_register(int pf, const struct nf_logger *logger) +int nf_log_register(u_int8_t pf, const struct nf_logger *logger) { int ret; @@ -45,7 +45,7 @@ int nf_log_register(int pf, const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(int pf) +void nf_log_unregister_pf(u_int8_t pf) { if (pf >= NPROTO) return; @@ -73,7 +73,7 @@ void nf_log_unregister(const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_unregister); -void nf_log_packet(int pf, +void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 582ec3efc8a..f285086f629 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -22,7 +22,7 @@ static DEFINE_MUTEX(queue_handler_mutex); /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh) +int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { int ret; @@ -45,7 +45,7 @@ int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh) EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh) +int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { if (pf >= NPROTO) return -EINVAL; @@ -67,7 +67,7 @@ EXPORT_SYMBOL(nf_unregister_queue_handler); void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) { - int pf; + u_int8_t pf; mutex_lock(&queue_handler_mutex); for (pf = 0; pf < NPROTO; pf++) { @@ -107,7 +107,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) */ static int __nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -191,7 +191,7 @@ err: int nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 01489681fa9..f9b46de6a3d 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -60,7 +60,7 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg) } EXPORT_SYMBOL(nf_unregister_sockopt); -static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf, +static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf, int val, int get) { struct nf_sockopt_ops *ops; @@ -96,7 +96,7 @@ out: } /* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, int pf, int val, +static int nf_sockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len, int get) { struct nf_sockopt_ops *ops; @@ -115,21 +115,22 @@ static int nf_sockopt(struct sock *sk, int pf, int val, return ret; } -int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int len) { return nf_sockopt(sk, pf, val, opt, &len, 0); } EXPORT_SYMBOL(nf_setsockopt); -int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) +int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, + int *len) { return nf_sockopt(sk, pf, val, opt, len, 1); } EXPORT_SYMBOL(nf_getsockopt); #ifdef CONFIG_COMPAT -static int compat_nf_sockopt(struct sock *sk, int pf, int val, +static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len, int get) { struct nf_sockopt_ops *ops; @@ -155,14 +156,14 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, return ret; } -int compat_nf_setsockopt(struct sock *sk, int pf, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int len) { return compat_nf_sockopt(sk, pf, val, opt, &len, 0); } EXPORT_SYMBOL(compat_nf_setsockopt); -int compat_nf_getsockopt(struct sock *sk, int pf, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len) { return compat_nf_sockopt(sk, pf, val, opt, len, 1); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9a35b57ab76..41e0105d382 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -359,7 +359,7 @@ static inline int __build_packet_message(struct nfulnl_instance *inst, const struct sk_buff *skb, unsigned int data_len, - unsigned int pf, + u_int8_t pf, unsigned int hooknum, const struct net_device *indev, const struct net_device *outdev, @@ -534,7 +534,7 @@ static struct nf_loginfo default_loginfo = { /* log handler for internal netfilter logging api */ static void -nfulnl_log_packet(unsigned int pf, +nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 5d75cd86ebb..cf2f3e90cef 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -68,7 +68,8 @@ static const char *const xt_prefix[NPROTO] = { int xt_register_target(struct xt_target *target) { - int ret, af = target->family; + u_int8_t af = target->family; + int ret; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) @@ -82,7 +83,7 @@ EXPORT_SYMBOL(xt_register_target); void xt_unregister_target(struct xt_target *target) { - int af = target->family; + u_int8_t af = target->family; mutex_lock(&xt[af].mutex); list_del(&target->list); @@ -123,7 +124,8 @@ EXPORT_SYMBOL(xt_unregister_targets); int xt_register_match(struct xt_match *match) { - int ret, af = match->family; + u_int8_t af = match->family; + int ret; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) @@ -139,7 +141,7 @@ EXPORT_SYMBOL(xt_register_match); void xt_unregister_match(struct xt_match *match) { - int af = match->family; + u_int8_t af = match->family; mutex_lock(&xt[af].mutex); list_del(&match->list); @@ -185,7 +187,7 @@ EXPORT_SYMBOL(xt_unregister_matches); */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ -struct xt_match *xt_find_match(int af, const char *name, u8 revision) +struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; int err = 0; @@ -210,7 +212,7 @@ struct xt_match *xt_find_match(int af, const char *name, u8 revision) EXPORT_SYMBOL(xt_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ -struct xt_target *xt_find_target(int af, const char *name, u8 revision) +struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = 0; @@ -234,7 +236,7 @@ struct xt_target *xt_find_target(int af, const char *name, u8 revision) } EXPORT_SYMBOL(xt_find_target); -struct xt_target *xt_request_find_target(int af, const char *name, u8 revision) +struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; @@ -246,7 +248,7 @@ struct xt_target *xt_request_find_target(int af, const char *name, u8 revision) } EXPORT_SYMBOL_GPL(xt_request_find_target); -static int match_revfn(int af, const char *name, u8 revision, int *bestp) +static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_match *m; int have_rev = 0; @@ -262,7 +264,7 @@ static int match_revfn(int af, const char *name, u8 revision, int *bestp) return have_rev; } -static int target_revfn(int af, const char *name, u8 revision, int *bestp) +static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_target *t; int have_rev = 0; @@ -279,7 +281,7 @@ static int target_revfn(int af, const char *name, u8 revision, int *bestp) } /* Returns true or false (if no such extension at all) */ -int xt_find_revision(int af, const char *name, u8 revision, int target, +int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err) { int have_rev, best = -1; @@ -337,7 +339,7 @@ int xt_check_match(const struct xt_match *match, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_add_offset(int af, unsigned int offset, short delta) +int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) { struct compat_delta *tmp; @@ -359,7 +361,7 @@ int xt_compat_add_offset(int af, unsigned int offset, short delta) } EXPORT_SYMBOL_GPL(xt_compat_add_offset); -void xt_compat_flush_offsets(int af) +void xt_compat_flush_offsets(u_int8_t af) { struct compat_delta *tmp, *next; @@ -373,7 +375,7 @@ void xt_compat_flush_offsets(int af) } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); -short xt_compat_calc_jump(int af, unsigned int offset) +short xt_compat_calc_jump(u_int8_t af, unsigned int offset) { struct compat_delta *tmp; short delta; @@ -590,7 +592,8 @@ void xt_free_table_info(struct xt_table_info *info) EXPORT_SYMBOL(xt_free_table_info); /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) +struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, + const char *name) { struct xt_table *t; @@ -612,13 +615,13 @@ void xt_table_unlock(struct xt_table *table) EXPORT_SYMBOL_GPL(xt_table_unlock); #ifdef CONFIG_COMPAT -void xt_compat_lock(int af) +void xt_compat_lock(u_int8_t af) { mutex_lock(&xt[af].compat_mutex); } EXPORT_SYMBOL_GPL(xt_compat_lock); -void xt_compat_unlock(int af) +void xt_compat_unlock(u_int8_t af) { mutex_unlock(&xt[af].compat_mutex); } @@ -722,13 +725,13 @@ EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS struct xt_names_priv { struct seq_net_private p; - int af; + u_int8_t af; }; static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - int af = priv->af; + u_int8_t af = priv->af; mutex_lock(&xt[af].mutex); return seq_list_start(&net->xt.tables[af], *pos); @@ -738,7 +741,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - int af = priv->af; + u_int8_t af = priv->af; return seq_list_next(v, &net->xt.tables[af], pos); } @@ -746,7 +749,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void xt_table_seq_stop(struct seq_file *seq, void *v) { struct xt_names_priv *priv = seq->private; - int af = priv->af; + u_int8_t af = priv->af; mutex_unlock(&xt[af].mutex); } @@ -922,7 +925,7 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ -int xt_proto_init(struct net *net, int af) +int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; @@ -974,7 +977,7 @@ out: } EXPORT_SYMBOL_GPL(xt_proto_init); -void xt_proto_fini(struct net *net, int af) +void xt_proto_fini(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 70907f6baac..1655e2cf25c 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -82,7 +82,7 @@ static inline bool already_closed(const struct nf_conn *conn) static inline unsigned int same_source_net(const union nf_inet_addr *addr, const union nf_inet_addr *mask, - const union nf_inet_addr *u3, unsigned int family) + const union nf_inet_addr *u3, u_int8_t family) { if (family == AF_INET) { return (addr->ip & mask->ip) == (u3->ip & mask->ip); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index d61412f58ef..28a42a3fbff 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -133,7 +133,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &info->origsrc_addr, &info->origsrc_mask, family); @@ -142,7 +142,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, &info->origdst_addr, &info->origdst_mask, family); @@ -151,7 +151,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, &info->replsrc_addr, &info->replsrc_mask, family); @@ -160,7 +160,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, &info->repldst_addr, &info->repldst_mask, family); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d9418a26781..0c9268fd2e1 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -80,7 +80,7 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ atomic_t use; - int family; + u_int8_t family; struct hashlimit_cfg1 cfg; /* config */ @@ -185,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(unsigned long htlong); -static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) +static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -258,8 +258,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) return 0; } -static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, - unsigned int family) +static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -378,7 +377,7 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) } static struct xt_hashlimit_htable *htable_find_get(const char *name, - int family) + u_int8_t family) { struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; @@ -901,7 +900,7 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show(struct dsthash_ent *ent, int family, +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { /* recalculate to show accurate numbers */ -- cgit v1.2.3-70-g09d2 From dfdb8d791877052bbb527d9688d94a064721d8f7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:02 +0200 Subject: netfilter: netns nf_conntrack: add netns boilerplate One comment: #ifdefs around #include is necessary to overcome amazing compile breakages in NOTRACK-in-netns patch (see below). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/net_namespace.h | 6 ++++++ include/net/netfilter/nf_conntrack_core.h | 4 ++-- include/net/netns/conntrack.h | 6 ++++++ net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_expect.c | 4 ++-- net/netfilter/nf_conntrack_standalone.c | 21 ++++++++++++++++++--- 6 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 include/net/netns/conntrack.h (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a8eb43cf0c7..708009be88b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -16,6 +16,9 @@ #include #include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include +#endif struct proc_dir_entry; struct net_device; @@ -67,6 +70,9 @@ struct net { #endif #ifdef CONFIG_NETFILTER struct netns_xt xt; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct netns_ct ct; +#endif #endif struct net_generic *gen; }; diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 05760d6a706..532aa200cbc 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -24,8 +24,8 @@ extern unsigned int nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); -extern int nf_conntrack_init(void); -extern void nf_conntrack_cleanup(void); +extern int nf_conntrack_init(struct net *net); +extern void nf_conntrack_cleanup(struct net *net); extern int nf_conntrack_proto_init(void); extern void nf_conntrack_proto_fini(void); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h new file mode 100644 index 00000000000..82d80b83477 --- /dev/null +++ b/include/net/netns/conntrack.h @@ -0,0 +1,6 @@ +#ifndef __NETNS_CONNTRACK_H +#define __NETNS_CONNTRACK_H + +struct netns_ct { +}; +#endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6aaf64b5ded..ee79e932589 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1006,7 +1006,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush); /* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */ -void nf_conntrack_cleanup(void) +void nf_conntrack_cleanup(struct net *net) { rcu_assign_pointer(ip_ct_attach, NULL); @@ -1120,7 +1120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -int __init nf_conntrack_init(void) +int nf_conntrack_init(struct net *net) { int max_factor = 8; int ret; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 990fa12f2ee..e6a79f2a7c5 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -537,7 +537,7 @@ static const struct file_operations exp_file_ops = { }; #endif /* CONFIG_PROC_FS */ -static int __init exp_proc_init(void) +static int exp_proc_init(void) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; @@ -558,7 +558,7 @@ static void exp_proc_remove(void) module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); -int __init nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(void) { int err = -ENOMEM; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 8509db14670..81dec17196d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -440,11 +440,26 @@ static void nf_conntrack_standalone_fini_sysctl(void) } #endif /* CONFIG_SYSCTL */ +static int nf_conntrack_net_init(struct net *net) +{ + return nf_conntrack_init(net); +} + +static void nf_conntrack_net_exit(struct net *net) +{ + nf_conntrack_cleanup(net); +} + +static struct pernet_operations nf_conntrack_net_ops = { + .init = nf_conntrack_net_init, + .exit = nf_conntrack_net_exit, +}; + static int __init nf_conntrack_standalone_init(void) { int ret; - ret = nf_conntrack_init(); + ret = register_pernet_subsys(&nf_conntrack_net_ops); if (ret < 0) goto out; ret = nf_conntrack_standalone_init_proc(); @@ -458,7 +473,7 @@ static int __init nf_conntrack_standalone_init(void) out_sysctl: nf_conntrack_standalone_fini_proc(); out_proc: - nf_conntrack_cleanup(); + unregister_pernet_subsys(&nf_conntrack_net_ops); out: return ret; } @@ -467,7 +482,7 @@ static void __exit nf_conntrack_standalone_fini(void) { nf_conntrack_standalone_fini_sysctl(); nf_conntrack_standalone_fini_proc(); - nf_conntrack_cleanup(); + unregister_pernet_subsys(&nf_conntrack_net_ops); } module_init(nf_conntrack_standalone_init); -- cgit v1.2.3-70-g09d2 From 5a1fb391d881905e89623d78858d05b248cbc86a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:02 +0200 Subject: netfilter: netns nf_conntrack: add ->ct_net -- pointer from conntrack to netns Conntrack (struct nf_conn) gets pointer to netns: ->ct_net -- netns in which it was created. It comes from netdevice. ->ct_net is write-once field. Every conntrack in system has ->ct_net initialized, no exceptions. ->ct_net doesn't pin netns: conntracks are recycled after timeouts and pinning background traffic will prevent netns from even starting shutdown sequence. Right now every conntrack is created in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 18 ++++++++++++++++-- net/netfilter/nf_conntrack_core.c | 17 +++++++++++++---- net/netfilter/nf_conntrack_netlink.c | 2 +- 3 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0741ad592da..2b8d6efecf3 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -123,7 +123,9 @@ struct nf_conn /* Extensions */ struct nf_ct_ext *ext; - +#ifdef CONFIG_NET_NS + struct net *ct_net; +#endif struct rcu_head rcu; }; @@ -147,6 +149,17 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct) /* get master conntrack via master expectation */ #define master_ct(conntr) (conntr->master) +extern struct net init_net; + +static inline struct net *nf_ct_net(const struct nf_conn *ct) +{ +#ifdef CONFIG_NET_NS + return ct->ct_net; +#else + return &init_net; +#endif +} + /* Alter reply tuple (maybe alter helper). */ extern void nf_conntrack_alter_reply(struct nf_conn *ct, @@ -251,7 +264,8 @@ extern void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, +nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ee79e932589..cefc338f6e5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -464,7 +464,8 @@ static noinline int early_drop(unsigned int hash) return dropped; } -struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) { @@ -503,6 +504,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); +#ifdef CONFIG_NET_NS + ct->ct_net = net; +#endif INIT_RCU_HEAD(&ct->rcu); return ct; @@ -528,7 +532,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(const struct nf_conntrack_tuple *tuple, +init_conntrack(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, @@ -544,7 +549,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -631,7 +636,8 @@ resolve_normal_ct(struct sk_buff *skb, /* look for tuple match */ h = nf_conntrack_find_get(&tuple); if (!h) { - h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb, + dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -1185,6 +1191,9 @@ int nf_conntrack_init(struct net *net) /* Set up fake conntrack: - to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif atomic_set(&nf_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a8752031adc..da3cdc8db70 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1125,7 +1125,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 49ac8713b6d064adf7474080fdccebd7cce76be0 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack count Sysctls and proc files are stubbed to init_net's one. This is temporary. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 3 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 2 +- net/netfilter/nf_conntrack_core.c | 18 ++++++++---------- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2b8d6efecf3..5999c5313d0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -288,7 +288,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern int nf_conntrack_checksum; -extern atomic_t nf_conntrack_count; extern int nf_conntrack_max; DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 82d80b83477..edf84714d7c 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,6 +1,9 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H +#include + struct netns_ct { + atomic_t count; }; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a955c44036..31abee3e29f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -254,7 +254,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3a020720e40..4556805027f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -314,7 +314,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + unsigned int nr_conntracks = atomic_read(&init_net.ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cefc338f6e5..8299b3490e7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -44,10 +44,6 @@ DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); -/* nf_conntrack_standalone needs this */ -atomic_t nf_conntrack_count = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(nf_conntrack_count); - unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); @@ -477,13 +473,13 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, } /* We don't want any race condition at early drop stage */ - atomic_inc(&nf_conntrack_count); + atomic_inc(&net->ct.count); if (nf_conntrack_max && - unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) { + unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); if (!early_drop(hash)) { - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -495,7 +491,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } @@ -516,10 +512,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); static void nf_conntrack_free_rcu(struct rcu_head *head) { struct nf_conn *ct = container_of(head, struct nf_conn, rcu); + struct net *net = nf_ct_net(ct); nf_ct_ext_free(ct); kmem_cache_free(nf_conntrack_cachep, ct); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); } void nf_conntrack_free(struct nf_conn *ct) @@ -1024,7 +1021,7 @@ void nf_conntrack_cleanup(struct net *net) nf_ct_event_cache_flush(); i_see_dead_people: nf_conntrack_flush(); - if (atomic_read(&nf_conntrack_count) != 0) { + if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; } @@ -1148,6 +1145,7 @@ int nf_conntrack_init(struct net *net) * entries. */ max_factor = 4; } + atomic_set(&net->ct.count, 0); nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &nf_conntrack_vmalloc); if (!nf_conntrack_hash) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 81dec17196d..021b505907d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -226,7 +226,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + unsigned int nr_conntracks = atomic_read(&init_net.ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -338,7 +338,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_COUNT, .procname = "nf_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, -- cgit v1.2.3-70-g09d2 From 400dad39d1c33fe797e47326d87a3f54d0ac5181 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack hash * make per-netns conntrack hash Other solution is to add ->ct_net pointer to tuplehashes and still has one hash, I tried that it's ugly and requires more code deep down in protocol modules et al. * propagate netns pointer to where needed, e. g. to conntrack iterators. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 6 +- include/net/netfilter/nf_conntrack_core.h | 3 +- include/net/netns/conntrack.h | 2 + net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 74 +++++++++++----------- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 16 ++--- net/netfilter/nf_conntrack_pptp.c | 2 +- net/netfilter/nf_conntrack_proto.c | 4 +- net/netfilter/nf_conntrack_standalone.c | 4 +- net/netfilter/xt_connlimit.c | 2 +- 16 files changed, 67 insertions(+), 63 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5999c5313d0..f5447f14304 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -195,11 +195,11 @@ extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); -extern void nf_conntrack_flush(void); +extern void nf_conntrack_flush(struct net *net); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, @@ -261,7 +261,7 @@ extern struct nf_conn nf_conntrack_untracked; /* Iterate over all conntracks: if iter returns true, it's deleted. */ extern void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); +nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * nf_conntrack_alloc(struct net *net, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 532aa200cbc..1c373564396 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -48,7 +48,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); @@ -71,7 +71,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *proto); -extern struct hlist_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index edf84714d7c..b767683f112 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,5 +5,7 @@ struct netns_ct { atomic_t count; + struct hlist_head *hash; + int hash_vmalloc; }; #endif diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9a4822f8243..5e1c81791e5 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -129,7 +129,8 @@ static int masq_device_event(struct notifier_block *this, and forget them. */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(&init_net, device_cmp, + (void *)(long)dev->ifindex); } return NOTIFY_DONE; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 31abee3e29f..03dd108015c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -323,7 +323,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(sock_net(sk), &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 4556805027f..8e0afdc2b13 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(init_net.ct.hash[st->bucket].first); if (n) return n; } @@ -48,7 +48,7 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(init_net.ct.hash[st->bucket].first); } return head; } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index da8edcdaef3..daf346377b6 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple); + h = nf_conntrack_find_get(&init_net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 6c6a3cba8d5..5d4a5b70da2 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -643,7 +643,7 @@ static int clean_nat(struct nf_conn *i, void *data) static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&clean_nat, NULL); + nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); nf_ct_l3proto_put(l3proto); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 5756f30ebc6..548cf4f15c0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -156,7 +156,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&intuple); + h = nf_conntrack_find_get(&init_net, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8299b3490e7..da56b260552 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -50,15 +50,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct hlist_head *nf_conntrack_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_hash); - struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; HLIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); @@ -242,7 +238,7 @@ static void death_by_timeout(unsigned long ul_conntrack) } struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_node *n; @@ -252,7 +248,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple) * at least once for the stats anyway. */ local_bh_disable(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); local_bh_enable(); @@ -268,13 +264,13 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); - h = __nf_conntrack_find(tuple); + h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) @@ -290,10 +286,12 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { + struct net *net = nf_ct_net(ct); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &nf_conntrack_hash[hash]); + &net->ct.hash[hash]); hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, - &nf_conntrack_hash[repl_hash]); + &net->ct.hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -319,8 +317,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn_help *help; struct hlist_node *n; enum ip_conntrack_info ctinfo; + struct net *net; ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -347,11 +347,11 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; @@ -394,6 +394,7 @@ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { + struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); @@ -402,7 +403,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * least once for the stats anyway. */ rcu_read_lock_bh(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); @@ -421,7 +422,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(unsigned int hash) +static noinline int early_drop(struct net *net, unsigned int hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; @@ -432,7 +433,7 @@ static noinline int early_drop(unsigned int hash) rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) @@ -478,7 +479,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); - if (!early_drop(hash)) { + if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) printk(KERN_WARNING @@ -631,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) { h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb, dataoff); @@ -941,7 +942,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; @@ -950,7 +951,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { + hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; @@ -969,13 +970,14 @@ found: return ct; } -void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) +void nf_ct_iterate_cleanup(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data) { struct nf_conn *ct; unsigned int bucket = 0; - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); @@ -1001,9 +1003,9 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(void) +void nf_conntrack_flush(struct net *net) { - nf_ct_iterate_cleanup(kill_all, NULL); + nf_ct_iterate_cleanup(net, kill_all, NULL); } EXPORT_SYMBOL_GPL(nf_conntrack_flush); @@ -1020,7 +1022,7 @@ void nf_conntrack_cleanup(struct net *net) nf_ct_event_cache_flush(); i_see_dead_people: - nf_conntrack_flush(); + nf_conntrack_flush(net); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; @@ -1032,7 +1034,7 @@ void nf_conntrack_cleanup(struct net *net) rcu_assign_pointer(nf_ct_destroy, NULL); kmem_cache_destroy(nf_conntrack_cachep); - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); nf_conntrack_acct_fini(); @@ -1097,8 +1099,8 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!hlist_empty(&nf_conntrack_hash[i])) { - h = hlist_entry(nf_conntrack_hash[i].first, + while (!hlist_empty(&init_net.ct.hash[i])) { + h = hlist_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnode); hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); @@ -1106,12 +1108,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) } } old_size = nf_conntrack_htable_size; - old_vmalloced = nf_conntrack_vmalloc; - old_hash = nf_conntrack_hash; + old_vmalloced = init_net.ct.hash_vmalloc; + old_hash = init_net.ct.hash; nf_conntrack_htable_size = hashsize; - nf_conntrack_vmalloc = vmalloced; - nf_conntrack_hash = hash; + init_net.ct.hash_vmalloc = vmalloced; + init_net.ct.hash = hash; nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); @@ -1146,9 +1148,9 @@ int nf_conntrack_init(struct net *net) max_factor = 4; } atomic_set(&net->ct.count, 0); - nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, - &nf_conntrack_vmalloc); - if (!nf_conntrack_hash) { + net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + &net->ct.hash_vmalloc); + if (!net->ct.hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; } @@ -1207,7 +1209,7 @@ out_fini_proto: err_free_conntrack_slab: kmem_cache_destroy(nf_conntrack_cachep); err_free_hash: - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_out: return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8e0b4c8f62a..d91278dfdaf 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -159,7 +159,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) hlist_for_each_entry(h, n, &unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) + hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode) unhelp(h, me); } spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index da3cdc8db70..918a3358a12 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -549,7 +549,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]], + hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -794,14 +794,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(); + nf_conntrack_flush(&init_net); return 0; } if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -847,7 +847,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -1213,9 +1213,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&otuple); + h = __nf_conntrack_find(&init_net, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&rtuple); + h = __nf_conntrack_find(&init_net, &rtuple); if (h == NULL) { struct nf_conntrack_tuple master; @@ -1230,7 +1230,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) goto out_unlock; - master_h = __nf_conntrack_find(&master); + master_h = __nf_conntrack_find(&init_net, &master); if (master_h == NULL) { err = -ENOENT; goto out_unlock; @@ -1670,7 +1670,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&master_tuple); + h = nf_conntrack_find_get(&init_net, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 97e54b0e43a..7caf45b59d2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -143,7 +143,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(t); + h = nf_conntrack_find_get(&init_net, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a49fc932629..3a2f7ef997f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -219,7 +219,7 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l3proto, proto); + nf_ct_iterate_cleanup(&init_net, kill_l3proto, proto); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -328,7 +328,7 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l4proto, l4proto); + nf_ct_iterate_cleanup(&init_net, kill_l4proto, l4proto); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 021b505907d..5456e4b9424 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -51,7 +51,7 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(init_net.ct.hash[st->bucket].first); if (n) return n; } @@ -67,7 +67,7 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(init_net.ct.hash[st->bucket].first); } return head; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index d2453d182d6..bd00830ff69 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = __nf_conntrack_find(&conn->tuple); + found = __nf_conntrack_find(&init_net, &conn->tuple); found_ct = NULL; if (found != NULL) -- cgit v1.2.3-70-g09d2 From 9b03f38d0487f3908696242286d934c9b38f9d2a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:03 +0200 Subject: netfilter: netns nf_conntrack: per-netns expectations Make per-netns a) expectation hash and b) expectations count. Expectations always belongs to netns to which it's master conntrack belong. This is natural and doesn't bloat expectation. Proc files and leaf users are stubbed to init_net, this is temporary. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 20 +++++--- include/net/netns/conntrack.h | 3 ++ .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 6 ++- net/ipv4/netfilter/nf_nat_pptp.c | 2 +- net/netfilter/nf_conntrack_core.c | 8 ++-- net/netfilter/nf_conntrack_expect.c | 55 +++++++++++----------- net/netfilter/nf_conntrack_h323_main.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 13 ++--- net/netfilter/nf_conntrack_pptp.c | 4 +- net/netfilter/nf_conntrack_sip.c | 2 +- 11 files changed, 66 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4c4d894cb9b..37a7fc1164b 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -6,7 +6,6 @@ #define _NF_CONNTRACK_EXPECT_H #include -extern struct hlist_head *nf_ct_expect_hash; extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; @@ -56,6 +55,15 @@ struct nf_conntrack_expect struct rcu_head rcu; }; +static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) +{ +#ifdef CONFIG_NET_NS + return exp->master->ct_net; /* by definition */ +#else + return &init_net; +#endif +} + struct nf_conntrack_expect_policy { unsigned int max_expected; @@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy #define NF_CT_EXPECT_PERMANENT 0x1 #define NF_CT_EXPECT_INACTIVE 0x2 -int nf_conntrack_expect_init(void); -void nf_conntrack_expect_fini(void); +int nf_conntrack_expect_init(struct net *net); +void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index b767683f112..e453a33f3e9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,7 +5,10 @@ struct netns_ct { atomic_t count; + unsigned int expect_count; struct hlist_head *hash; + struct hlist_head *expect_hash; int hash_vmalloc; + int expect_vmalloc; }; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8e0afdc2b13..f8636a57e8c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -177,11 +177,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -191,13 +192,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index da3d91a5ef5..e4bdddc6034 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -73,7 +73,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(&init_net, &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index da56b260552..c188edea249 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -562,7 +562,7 @@ init_conntrack(struct net *net, nf_ct_acct_ext_add(ct, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(tuple); + exp = nf_ct_find_expectation(net, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -1038,7 +1038,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_htable_size); nf_conntrack_acct_fini(); - nf_conntrack_expect_fini(); + nf_conntrack_expect_fini(net); nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); } @@ -1173,7 +1173,7 @@ int nf_conntrack_init(struct net *net) if (ret < 0) goto err_free_conntrack_slab; - ret = nf_conntrack_expect_init(); + ret = nf_conntrack_expect_init(net); if (ret < 0) goto out_fini_proto; @@ -1203,7 +1203,7 @@ int nf_conntrack_init(struct net *net) out_fini_helper: nf_conntrack_helper_fini(); out_fini_expect: - nf_conntrack_expect_fini(); + nf_conntrack_expect_fini(net); out_fini_proto: nf_conntrack_proto_fini(); err_free_conntrack_slab: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e6a79f2a7c5..5307316356e 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -28,17 +28,12 @@ #include #include -struct hlist_head *nf_ct_expect_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_expect_hash); - unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); static unsigned int nf_ct_expect_hash_rnd __read_mostly; -static unsigned int nf_ct_expect_count; unsigned int nf_ct_expect_max __read_mostly; static int nf_ct_expect_hash_rnd_initted __read_mostly; -static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; @@ -46,12 +41,13 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); - nf_ct_expect_count--; + net->ct.expect_count--; hlist_del(&exp->lnode); master_help->expecting[exp->class]--; @@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(tuple); + i = __nf_ct_expect_find(net, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { exp = i; @@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); @@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; - hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - nf_ct_expect_count++; + hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); @@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret; @@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) } } - if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net->ct.expect_count >= nf_ct_expect_max) { if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: expectation table full\n"); @@ -430,11 +428,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -444,13 +443,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = &init_net; struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -558,7 +558,7 @@ static void exp_proc_remove(void) module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); -int nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(struct net *net) { int err = -ENOMEM; @@ -569,9 +569,10 @@ int nf_conntrack_expect_init(void) } nf_ct_expect_max = nf_ct_expect_hsize * 4; - nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &nf_ct_expect_vmalloc); - if (nf_ct_expect_hash == NULL) + net->ct.expect_count = 0; + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &net->ct.expect_vmalloc); + if (net->ct.expect_hash == NULL) goto err1; nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", @@ -589,16 +590,16 @@ int nf_conntrack_expect_init(void) err3: kmem_cache_destroy(nf_ct_expect_cachep); err2: - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(void) +void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(); kmem_cache_destroy(nf_ct_expect_cachep); - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 5dc0478108a..dfb826c973d 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1219,7 +1219,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index d91278dfdaf..c793db810cd 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -145,7 +145,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], hnode) { + &init_net.ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((help->helper == me || exp->helper == me) && del_timer(&exp->timeout)) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 918a3358a12..cadfd15b44f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1458,6 +1458,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb) static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = &init_net; struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); struct hlist_node *n; @@ -1467,7 +1468,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]], + hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -1529,7 +1530,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1583,7 +1584,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1613,7 +1614,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); if (m_help->helper == h @@ -1629,7 +1630,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -1724,7 +1725,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 7caf45b59d2..5db7df5d19b 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(&inv_t); + exp_other = nf_ct_expect_find_get(&init_net, &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -154,7 +154,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(t); + exp = nf_ct_expect_find_get(&init_net, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1fa306be60f..a006080eb38 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -775,7 +775,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rcu_read_lock(); do { - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || -- cgit v1.2.3-70-g09d2 From 63c9a26264be108b52de087724673f8664570e34 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:04 +0200 Subject: netfilter: netns nf_conntrack: per-netns unconfirmed list What is confirmed connection in one netns can very well be unconfirmed in another one. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 1 - include/net/netns/conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 7 ++++--- net/netfilter/nf_conntrack_helper.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 1c373564396..b4b45c541da 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -72,6 +72,5 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); extern spinlock_t nf_conntrack_lock ; -extern struct hlist_head unconfirmed; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e453a33f3e9..6ddf58e142a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,6 +1,7 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H +#include #include struct netns_ct { @@ -8,6 +9,7 @@ struct netns_ct { unsigned int expect_count; struct hlist_head *hash; struct hlist_head *expect_hash; + struct hlist_head unconfirmed; int hash_vmalloc; int expect_vmalloc; }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c188edea249..2a105db1330 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -54,7 +54,6 @@ struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; -HLIST_HEAD(unconfirmed); static struct kmem_cache *nf_conntrack_cachep __read_mostly; DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); @@ -596,7 +595,8 @@ init_conntrack(struct net *net, } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &net->ct.unconfirmed); spin_unlock_bh(&nf_conntrack_lock); @@ -957,7 +957,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), goto found; } } - hlist_for_each_entry(h, n, &unconfirmed, hnode) { + hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -1154,6 +1154,7 @@ int nf_conntrack_init(struct net *net) printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); goto err_out; } + INIT_HLIST_HEAD(&net->ct.unconfirmed); nf_conntrack_max = max_factor * nf_conntrack_htable_size; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c793db810cd..920e778539a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -156,7 +156,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, n, &unconfirmed, hnode) + hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode) -- cgit v1.2.3-70-g09d2 From a702a65fc1376fc1f6757ec2a6960348af3f1876 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:04 +0200 Subject: netfilter: netns nf_conntrack: pass netns pointer to nf_conntrack_in() It's deducible from skb->dev or skb->dst->dev, but we know netns at the moment of call, so pass it down and use for finding and creating conntracks. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_core.h | 3 ++- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 24 ++++++++++++++++-------- net/netfilter/nf_conntrack_core.c | 15 ++++++++------- 4 files changed, 28 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index b4b45c541da..e78afe7f28e 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -20,7 +20,8 @@ /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ -extern unsigned int nf_conntrack_in(u_int8_t pf, +extern unsigned int nf_conntrack_in(struct net *net, + u_int8_t pf, unsigned int hooknum, struct sk_buff *skb); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 03dd108015c..2e4dd3fb002 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -172,7 +172,7 @@ static unsigned int ipv4_conntrack_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, @@ -188,7 +188,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 85050c072ab..e91db16611d 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -211,11 +211,10 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_STOLEN; } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int __ipv6_conntrack_in(struct net *net, + unsigned int hooknum, + struct sk_buff *skb, + int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; @@ -225,7 +224,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, reasm); + ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } @@ -235,7 +234,16 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, skb); + return nf_conntrack_in(net, PF_INET6, hooknum, skb); +} + +static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -250,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2a105db1330..5c96d9732c7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,7 +611,8 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct sk_buff *skb, +resolve_normal_ct(struct net *net, + struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, @@ -632,10 +633,9 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&init_net, &tuple); + h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(&init_net, &tuple, l3proto, l4proto, skb, - dataoff); + h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -669,7 +669,8 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) +nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, + struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -709,8 +710,8 @@ nf_conntrack_in(u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) return -ret; } - ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, - &set_reply, &ctinfo); + ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(invalid); -- cgit v1.2.3-70-g09d2 From 74c51a1497033e6ff7b8096797daca233a4a30df Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:05 +0200 Subject: netfilter: netns nf_conntrack: pass netns pointer to L4 protocol's ->error hook Again, it's deducible from skb, but we're going to use it for nf_conntrack_checksum and statistics, so just pass it from upper layer. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 9 +++++---- net/netfilter/nf_conntrack_core.c | 12 +++++++----- net/netfilter/nf_conntrack_proto_dccp.c | 6 +++--- net/netfilter/nf_conntrack_proto_tcp.c | 3 ++- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 4 +++- 8 files changed, 26 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d4376e97bae..97723d33c95 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -50,7 +50,7 @@ struct nf_conntrack_l4proto /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct sk_buff *skb, unsigned int dataoff, + int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index daf346377b6..8c7ed5bc959 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, +icmp_error_message(struct net *net, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&init_net, &innertuple); + h = nf_conntrack_find_get(net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -172,7 +172,7 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, +icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 548cf4f15c0..aabddfe2127 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -122,7 +122,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct sk_buff *skb, +icmpv6_error_message(struct net *net, + struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, unsigned int hooknum) @@ -156,7 +157,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&init_net, &intuple); + h = nf_conntrack_find_get(net, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -172,7 +173,7 @@ icmpv6_error_message(struct sk_buff *skb, } static int -icmpv6_error(struct sk_buff *skb, unsigned int dataoff, +icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; @@ -197,7 +198,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5c96d9732c7..251f020c7c1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -703,11 +703,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ - if (l4proto->error != NULL && - (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); - return -ret; + if (l4proto->error != NULL) { + ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + if (ret <= 0) { + NF_CT_STAT_INC_ATOMIC(error); + NF_CT_STAT_INC_ATOMIC(invalid); + return -ret; + } } ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index edc30358dc1..6ead8da3e9e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -545,9 +545,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, u_int8_t pf, - unsigned int hooknum) +static int dccp_error(struct net *net, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, + u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; unsigned int dccp_len = skb->len - dataoff; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 539a8202025..4e71de2405f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -746,7 +746,8 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct sk_buff *skb, +static int tcp_error(struct net *net, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 2a965c4a0ea..8a245beb2c9 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -89,7 +89,7 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct sk_buff *skb, unsigned int dataoff, +static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4fb6c8d83a8..981701919a7 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -89,7 +89,9 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct sk_buff *skb, unsigned int dataoff, +static int udplite_error(struct net *net, + struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) -- cgit v1.2.3-70-g09d2 From a71996fccce4b2086a26036aa3c915365ca36926 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: pass conntrack to nf_conntrack_event_cache() not skb This is cleaner, we already know conntrack to which event is relevant. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 4 +--- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/nf_nat_helper.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 10 +++++----- net/netfilter/nf_conntrack_ftp.c | 9 +++++---- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 6 +++--- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- net/netfilter/xt_CONNMARK.c | 8 ++++---- net/netfilter/xt_CONNSECMARK.c | 2 +- 13 files changed, 27 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index f0b9078235c..c1b406cecf9 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -28,10 +28,8 @@ extern void __nf_ct_event_cache_init(struct nf_conn *ct); extern void nf_ct_event_cache_flush(void); static inline void -nf_conntrack_event_cache(enum ip_conntrack_events event, - const struct sk_buff *skb) +nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_ecache *ecache; local_bh_disable(); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 8c7ed5bc959..205ba399d4a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 112dcfa1290..cf7a42bf982 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -193,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); - nf_conntrack_event_cache(IPCT_NATSEQADJ, skb); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index aabddfe2127..df04de91e6e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -93,7 +93,7 @@ static int icmpv6_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 251f020c7c1..01f59c57730 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -370,14 +370,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, skb); + nf_conntrack_event_cache(IPCT_HELPER, ct); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, skb); + nf_conntrack_event_cache(IPCT_NATINFO, ct); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, skb); + IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; out: @@ -740,7 +740,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); return ret; } @@ -853,7 +853,7 @@ acct: /* must be unlocked when calling event cache */ if (event) - nf_conntrack_event_cache(event, skb); + nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index bb20672fe03..4f7107107e9 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, +static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, + struct nf_ct_ftp_master *info, int dir, struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } @@ -509,7 +510,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, skb); + update_nl_seq(ct, seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index c5a78220fa3..5b1273a01fe 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -229,7 +229,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index b5a90596d3f..ae8c2609e23 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct, ct->proto.sctp.state = new_state; if (old_state != new_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } write_unlock_bh(&sctp_lock); @@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4e71de2405f..b5d62d66e02 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -969,9 +969,9 @@ static int tcp_packet(struct nf_conn *ct, timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to @@ -990,7 +990,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8a245beb2c9..e0ee89e179c 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 981701919a7..c5b77c8f86c 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index e72e5d01752..e1415c3f5c9 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -54,7 +54,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -62,7 +62,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: @@ -95,7 +95,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -103,7 +103,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, (skb->mark & info->nfmask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index ae939e54dfa..5f221c3bd35 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb) ct = nf_ct_get(skb, &ctinfo); if (ct && !ct->secmark) { ct->secmark = skb->secmark; - nf_conntrack_event_cache(IPCT_SECMARK, skb); + nf_conntrack_event_cache(IPCT_SECMARK, ct); } } } -- cgit v1.2.3-70-g09d2 From 6058fa6bb96a5b6145cba10c5171f09c2783ca69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: per-netns event cache Heh, last minute proof-reading of this patch made me think, that this is actually unneeded, simply because "ct" pointers will be different for different conntracks in different netns, just like they are different in one netns. Not so sure anymore. [Patrick: pointers will be different, flushing can only be done while inactive though and thus it needs to be per netns] Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 22 ++++++++++++++++------ include/net/netns/conntrack.h | 5 +++++ net/netfilter/nf_conntrack_core.c | 12 +++++++++--- net/netfilter/nf_conntrack_ecache.c | 26 +++++++++++++++++++------- 4 files changed, 49 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index c1b406cecf9..35f814c1e2c 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -8,6 +8,7 @@ #include #include +#include #include #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -15,9 +16,6 @@ struct nf_conntrack_ecache { struct nf_conn *ct; unsigned int events; }; -DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); - -#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x) extern struct atomic_notifier_head nf_conntrack_chain; extern int nf_conntrack_register_notifier(struct notifier_block *nb); @@ -25,15 +23,16 @@ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); extern void __nf_ct_event_cache_init(struct nf_conn *ct); -extern void nf_ct_event_cache_flush(void); +extern void nf_ct_event_cache_flush(struct net *net); static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ct != ecache->ct) __nf_ct_event_cache_init(ct); ecache->events |= event; @@ -58,6 +57,9 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event, atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); } +extern int nf_conntrack_ecache_init(struct net *net); +extern void nf_conntrack_ecache_fini(struct net *net); + #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, @@ -67,7 +69,15 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event, static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} -static inline void nf_ct_event_cache_flush(void) {} +static inline void nf_ct_event_cache_flush(struct net *net) {} + +static inline int nf_conntrack_ecache_init(struct net *net) +{ + return 0; + +static inline void nf_conntrack_ecache_fini(struct net *net) +{ +} #endif /* CONFIG_NF_CONNTRACK_EVENTS */ #endif /*_NF_CONNTRACK_ECACHE_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 6ddf58e142a..9d5c1623c51 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,12 +4,17 @@ #include #include +struct nf_conntrack_ecache; + struct netns_ct { atomic_t count; unsigned int expect_count; struct hlist_head *hash; struct hlist_head *expect_hash; struct hlist_head unconfirmed; +#ifdef CONFIG_NF_CONNTRACK_EVENTS + struct nf_conntrack_ecache *ecache; +#endif int hash_vmalloc; int expect_vmalloc; }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 01f59c57730..b55944e5e4e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1023,7 +1023,8 @@ void nf_conntrack_cleanup(struct net *net) delete... */ synchronize_net(); - nf_ct_event_cache_flush(); + nf_ct_event_cache_flush(net); + nf_conntrack_ecache_fini(net); i_see_dead_people: nf_conntrack_flush(net); if (atomic_read(&net->ct.count) != 0) { @@ -1151,11 +1152,14 @@ int nf_conntrack_init(struct net *net) max_factor = 4; } atomic_set(&net->ct.count, 0); + ret = nf_conntrack_ecache_init(net); + if (ret < 0) + goto err_ecache; net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc); if (!net->ct.hash) { printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); - goto err_out; + goto err_hash; } INIT_HLIST_HEAD(&net->ct.unconfirmed); @@ -1215,6 +1219,8 @@ err_free_conntrack_slab: err_free_hash: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); -err_out: +err_hash: + nf_conntrack_ecache_fini(net); +err_ecache: return -ENOMEM; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 83c41ac3505..a5f5e2e65d1 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain); ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); EXPORT_SYMBOL_GPL(nf_ct_expect_chain); -DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); -EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); - /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void @@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) * by code prior to async packet handling for freeing the skb */ void nf_ct_deliver_cached_events(const struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ecache->ct == ct) __nf_ct_deliver_cached_events(ecache); local_bh_enable(); @@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); /* Deliver cached events for old pending events, if current conntrack != old */ void __nf_ct_event_cache_init(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; /* take care of delivering potentially old events */ - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); BUG_ON(ecache->ct == ct); if (ecache->ct) __nf_ct_deliver_cached_events(ecache); @@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); /* flush the event cache - touches other CPU's data and must not be called * while packets are still passing through the code */ -void nf_ct_event_cache_flush(void) +void nf_ct_event_cache_flush(struct net *net) { struct nf_conntrack_ecache *ecache; int cpu; for_each_possible_cpu(cpu) { - ecache = &per_cpu(nf_conntrack_ecache, cpu); + ecache = per_cpu_ptr(net->ct.ecache, cpu); if (ecache->ct) nf_ct_put(ecache->ct); } } +int nf_conntrack_ecache_init(struct net *net) +{ + net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache); + if (!net->ct.ecache) + return -ENOMEM; + return 0; +} + +void nf_conntrack_ecache_fini(struct net *net) +{ + free_percpu(net->ct.ecache); +} + int nf_conntrack_register_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&nf_conntrack_chain, nb); -- cgit v1.2.3-70-g09d2 From 0d55af8791bfb42e04cc456b348910582f230343 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:07 +0200 Subject: netfilter: netns nf_conntrack: per-netns statistics Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 8 ++-- include/net/netns/conntrack.h | 1 + .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/netfilter/nf_conntrack_core.c | 49 ++++++++++++---------- net/netfilter/nf_conntrack_expect.c | 4 +- net/netfilter/nf_conntrack_standalone.c | 4 +- 6 files changed, 38 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f5447f14304..c95561050f7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size; extern int nf_conntrack_checksum; extern int nf_conntrack_max; -DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) -#define NF_CT_STAT_INC_ATOMIC(count) \ +#define NF_CT_STAT_INC(net, count) \ + (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++) +#define NF_CT_STAT_INC_ATOMIC(net, count) \ do { \ local_bh_disable(); \ - __get_cpu_var(nf_conntrack_stat).count++; \ + per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \ local_bh_enable(); \ } while (0) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 9d5c1623c51..fc0a46d64cc 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -12,6 +12,7 @@ struct netns_ct { struct hlist_head *hash; struct hlist_head *expect_hash; struct hlist_head unconfirmed; + struct ip_conntrack_stat *stat; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index b2940836d10..fdc85b37078 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; @@ -308,7 +308,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b55944e5e4e..1e87fa0cd3a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -56,9 +56,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_untracked); unsigned int nf_ct_log_invalid __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; -DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -171,6 +168,7 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; + struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -203,7 +201,7 @@ destroy_conntrack(struct nf_conntrack *nfct) hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); } - NF_CT_STAT_INC(delete); + NF_CT_STAT_INC(net, delete); spin_unlock_bh(&nf_conntrack_lock); if (ct->master) @@ -216,6 +214,7 @@ destroy_conntrack(struct nf_conntrack *nfct) static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; + struct net *net = nf_ct_net(ct); struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; @@ -230,7 +229,7 @@ static void death_by_timeout(unsigned long ul_conntrack) spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ - NF_CT_STAT_INC(delete_list); + NF_CT_STAT_INC(net, delete_list); clean_from_lists(ct); spin_unlock_bh(&nf_conntrack_lock); nf_ct_put(ct); @@ -249,11 +248,11 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) local_bh_disable(); hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); local_bh_enable(); return h; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } local_bh_enable(); @@ -366,7 +365,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); + NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) @@ -381,7 +380,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: - NF_CT_STAT_INC(insert_failed); + NF_CT_STAT_INC(net, insert_failed); spin_unlock_bh(&nf_conntrack_lock); return NF_DROP; } @@ -405,11 +404,11 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } rcu_read_unlock_bh(); @@ -454,7 +453,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) if (del_timer(&ct->timeout)) { death_by_timeout((unsigned long)ct); dropped = 1; - NF_CT_STAT_INC_ATOMIC(early_drop); + NF_CT_STAT_INC_ATOMIC(net, early_drop); } nf_ct_put(ct); return dropped; @@ -581,7 +580,7 @@ init_conntrack(struct net *net, ct->secmark = exp->master->secmark; #endif nf_conntrack_get(&ct->master->ct_general); - NF_CT_STAT_INC(expect_new); + NF_CT_STAT_INC(net, expect_new); } else { struct nf_conntrack_helper *helper; @@ -591,7 +590,7 @@ init_conntrack(struct net *net, if (help) rcu_assign_pointer(help->helper, helper); } - NF_CT_STAT_INC(new); + NF_CT_STAT_INC(net, new); } /* Overload tuple linked list to put us in unconfirmed list. */ @@ -683,7 +682,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(ignore); + NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } @@ -693,8 +692,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } @@ -706,8 +705,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (l4proto->error != NULL) { ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); if (ret <= 0) { - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } } @@ -716,13 +715,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ - NF_CT_STAT_INC_ATOMIC(drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } @@ -735,7 +734,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(skb->nfct); skb->nfct = NULL; - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } @@ -1043,6 +1042,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_acct_fini(); nf_conntrack_expect_fini(net); + free_percpu(net->ct.stat); nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); } @@ -1152,6 +1152,9 @@ int nf_conntrack_init(struct net *net) max_factor = 4; } atomic_set(&net->ct.count, 0); + net->ct.stat = alloc_percpu(struct ip_conntrack_stat); + if (!net->ct.stat) + goto err_stat; ret = nf_conntrack_ecache_init(net); if (ret < 0) goto err_ecache; @@ -1222,5 +1225,7 @@ err_free_hash: err_hash: nf_conntrack_ecache_fini(net); err_ecache: + free_percpu(net->ct.stat); +err_stat: return -ENOMEM; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 6a09200049e..b7f75117161 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -53,7 +53,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) master_help->expecting[exp->class]--; nf_ct_expect_put(exp); - NF_CT_STAT_INC(expect_delete); + NF_CT_STAT_INC(net, expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); @@ -326,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) add_timer(&exp->timeout); atomic_inc(&exp->use); - NF_CT_STAT_INC(expect_create); + NF_CT_STAT_INC(net, expect_create); } /* Race with expectations being used means we could have none to find; OK. */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 02eaf872277..a4fdbbf363a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -203,7 +203,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; @@ -217,7 +217,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(init_net.ct.stat, cpu); } return NULL; -- cgit v1.2.3-70-g09d2 From 802507071b72ed5025747126099cbc6d1542f596 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_count sysctl Note, sysctl table is always duplicated, this is simpler and less special-cased. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 4 ++ net/netfilter/nf_conntrack_standalone.c | 73 ++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fc0a46d64cc..2b50758df6a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,6 +4,7 @@ #include #include +struct ctl_table_header; struct nf_conntrack_ecache; struct netns_ct { @@ -15,6 +16,9 @@ struct netns_ct { struct ip_conntrack_stat *stat; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; +#endif +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; #endif int hash_vmalloc; int expect_vmalloc; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 169760ddc16..64b4f95b367 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum); static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static struct ctl_table_header *nf_ct_sysctl_header; static struct ctl_table_header *nf_ct_netfilter_header; static ctl_table nf_ct_sysctl_table[] = { @@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = { EXPORT_SYMBOL_GPL(nf_ct_log_invalid); -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { - nf_ct_netfilter_header = - register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); - if (!nf_ct_netfilter_header) - goto out; - - nf_ct_sysctl_header = - register_sysctl_paths(nf_net_netfilter_sysctl_path, - nf_ct_sysctl_table); - if (!nf_ct_sysctl_header) + struct ctl_table *table; + + if (net_eq(net, &init_net)) { + nf_ct_netfilter_header = + register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) + goto out; + } + + table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out_kmemdup; + + table[1].data = &net->ct.count; + + net->ct.sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.sysctl_header) goto out_unregister_netfilter; return 0; out_unregister_netfilter: - unregister_sysctl_table(nf_ct_netfilter_header); + kfree(table); +out_kmemdup: + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); out: printk("nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { - unregister_sysctl_table(nf_ct_netfilter_header); - unregister_sysctl_table(nf_ct_sysctl_header); + struct ctl_table *table; + + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); + table = net->ct.sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.sysctl_header); + kfree(table); } #else -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { return 0; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { } #endif /* CONFIG_SYSCTL */ @@ -457,8 +474,13 @@ static int nf_conntrack_net_init(struct net *net) ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + ret = nf_conntrack_standalone_init_sysctl(net); + if (ret < 0) + goto out_sysctl; return 0; +out_sysctl: + nf_conntrack_standalone_fini_proc(net); out_proc: nf_conntrack_cleanup(net); out_init: @@ -467,6 +489,7 @@ out_init: static void nf_conntrack_net_exit(struct net *net) { + nf_conntrack_standalone_fini_sysctl(net); nf_conntrack_standalone_fini_proc(net); nf_conntrack_cleanup(net); } @@ -478,25 +501,11 @@ static struct pernet_operations nf_conntrack_net_ops = { static int __init nf_conntrack_standalone_init(void) { - int ret; - - ret = register_pernet_subsys(&nf_conntrack_net_ops); - if (ret < 0) - goto out; - ret = nf_conntrack_standalone_init_sysctl(); - if (ret < 0) - goto out_sysctl; - return 0; - -out_sysctl: - unregister_pernet_subsys(&nf_conntrack_net_ops); -out: - return ret; + return register_pernet_subsys(&nf_conntrack_net_ops); } static void __exit nf_conntrack_standalone_fini(void) { - nf_conntrack_standalone_fini_sysctl(); unregister_pernet_subsys(&nf_conntrack_net_ops); } -- cgit v1.2.3-70-g09d2 From c04d05529a6e0bf97183a2caf76a0c7f07f5b78c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_checksum sysctl Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 1 - include/net/netns/conntrack.h | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 7 +++---- 10 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c95561050f7..b76a8685b5b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; -extern int nf_conntrack_checksum; extern int nf_conntrack_max; #define NF_CT_STAT_INC(net, count) \ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 2b50758df6a..38b6dae4d3d 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,6 +17,7 @@ struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif + int sysctl_checksum; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2e4dd3fb002..75871b1dd8a 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 205ba399d4a..ace66cbf921 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, } /* See ip_conntrack_proto_tcp.c */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index df04de91e6e..fa12e57749a 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -187,7 +187,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, return -NF_ACCEPT; } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 6ead8da3e9e..769680e68b5 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -575,7 +575,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb, } } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, pf)) { msg = "nf_ct_dccp: bad checksum "; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5d62d66e02..131c9be4470 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -780,7 +780,7 @@ static int tcp_error(struct net *net, * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index e0ee89e179c..3d3fffe3f8b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -123,7 +123,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index c5b77c8f86c..3d1697c4f91 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -129,7 +129,7 @@ static int udplite_error(struct net *net, } /* Checksum invalid? Ignore. */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { if (LOG_INVALID(IPPROTO_UDPLITE)) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 64b4f95b367..5cd06637977 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -322,9 +322,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Sysctl support */ -int nf_conntrack_checksum __read_mostly = 1; -EXPORT_SYMBOL_GPL(nf_conntrack_checksum); - #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ static int log_invalid_proto_min = 0; @@ -360,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, .procname = "nf_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -425,6 +422,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; + table[3].data = &net->ct.sysctl_checksum; net->ct.sysctl_header = register_net_sysctl_table(net, nf_net_netfilter_sysctl_path, table); @@ -474,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net) ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; + net->ct.sysctl_checksum = 1; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; -- cgit v1.2.3-70-g09d2 From c2a2c7e0cc39e7f9336cd67e8307a110bdba82f3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:08 +0200 Subject: netfilter: netns nf_conntrack: per-netns net.netfilter.nf_conntrack_log_invalid sysctl Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 15 +++++++-------- include/net/netns/conntrack.h | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 +++--- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 1 - net/netfilter/nf_conntrack_proto_dccp.c | 10 ++++++---- net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++-------- net/netfilter/nf_conntrack_proto_udp.c | 6 +++--- net/netfilter/nf_conntrack_proto_udplite.c | 8 ++++---- net/netfilter/nf_conntrack_standalone.c | 6 +++--- 11 files changed, 39 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 97723d33c95..7f2f43c7728 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t); extern const struct nla_policy nf_ct_port_nla_policy[]; -/* Log invalid packets */ -extern unsigned int nf_ct_log_invalid; - #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS -#define LOG_INVALID(proto) \ - (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) +#define LOG_INVALID(net, proto) \ + ((net)->ct.sysctl_log_invalid == (proto) || \ + (net)->ct.sysctl_log_invalid == IPPROTO_RAW) #else -#define LOG_INVALID(proto) \ - ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \ +#define LOG_INVALID(net, proto) \ + (((net)->ct.sysctl_log_invalid == (proto) || \ + (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \ && net_ratelimit()) #endif #else -#define LOG_INVALID(proto) 0 +#define LOG_INVALID(net, proto) 0 #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b6dae4d3d..503e37551b1 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -18,6 +18,7 @@ struct netns_ct { struct nf_conntrack_ecache *ecache; #endif int sysctl_checksum; + unsigned int sysctl_log_invalid; /* Log invalid packets */ #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 75871b1dd8a..af69acc1d0f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -278,7 +278,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index ace66cbf921..4e887922022 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -181,7 +181,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; @@ -190,7 +190,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* See ip_conntrack_proto_tcp.c */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; @@ -203,7 +203,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index fa12e57749a..05726177903 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -181,7 +181,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(IPPROTO_ICMPV6)) + if (LOG_INVALID(net, IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1e87fa0cd3a..ade0bb3ab2e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -53,7 +53,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -unsigned int nf_ct_log_invalid __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; static int nf_conntrack_hash_rnd_initted; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 769680e68b5..8fcf1762fab 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { + struct net *net = nf_ct_net(ct); struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); return false; } @@ -463,6 +464,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; @@ -590,7 +592,7 @@ static int dccp_error(struct net *net, struct sk_buff *skb, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 131c9be4470..f947ec41e39 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -488,6 +488,7 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct tcphdr *tcph, u_int8_t pf) { + struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; - if (!res && LOG_INVALID(IPPROTO_TCP)) + if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -761,7 +762,7 @@ static int tcp_error(struct net *net, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; @@ -769,7 +770,7 @@ static int tcp_error(struct net *net, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; @@ -782,7 +783,7 @@ static int tcp_error(struct net *net, /* FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; @@ -791,7 +792,7 @@ static int tcp_error(struct net *net, /* Check TCP flags. */ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; @@ -808,6 +809,7 @@ static int tcp_packet(struct nf_conn *ct, u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; @@ -886,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct, * thus initiate a clean new session. */ write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); nf_ct_kill(ct); @@ -899,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct, segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; @@ -908,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3d3fffe3f8b..7c2ca48698b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -101,7 +101,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; @@ -109,7 +109,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; @@ -125,7 +125,7 @@ static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, * FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 3d1697c4f91..d22d839e4f9 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -104,7 +104,7 @@ static int udplite_error(struct net *net, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; @@ -114,7 +114,7 @@ static int udplite_error(struct net *net, if (cscov == 0) cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; @@ -122,7 +122,7 @@ static int udplite_error(struct net *net, /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; @@ -132,7 +132,7 @@ static int udplite_error(struct net *net, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5cd06637977..98106d4e89f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -365,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -403,8 +403,6 @@ static struct ctl_path nf_ct_path[] = { { } }; -EXPORT_SYMBOL_GPL(nf_ct_log_invalid); - static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct ctl_table *table; @@ -423,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[1].data = &net->ct.count; table[3].data = &net->ct.sysctl_checksum; + table[4].data = &net->ct.sysctl_log_invalid; net->ct.sysctl_header = register_net_sysctl_table(net, nf_net_netfilter_sysctl_path, table); @@ -473,6 +472,7 @@ static int nf_conntrack_net_init(struct net *net) if (ret < 0) goto out_proc; net->ct.sysctl_checksum = 1; + net->ct.sysctl_log_invalid = 0; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; -- cgit v1.2.3-70-g09d2 From d716a4dfbbdf0d4731d596a96e5f4b0d892ac168 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:09 +0200 Subject: netfilter: netns nf_conntrack: per-netns conntrack accounting Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_acct.h | 10 +-- include/net/netns/conntrack.h | 2 + net/netfilter/nf_conntrack_acct.c | 100 +++++++++++++++++++++--------- net/netfilter/nf_conntrack_core.c | 4 +- 4 files changed, 81 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h index 5d5ae55d54c..03e218f0be4 100644 --- a/include/net/netfilter/nf_conntrack_acct.h +++ b/include/net/netfilter/nf_conntrack_acct.h @@ -8,6 +8,7 @@ #ifndef _NF_CONNTRACK_ACCT_H #define _NF_CONNTRACK_ACCT_H +#include #include #include #include @@ -18,8 +19,6 @@ struct nf_conn_counter { u_int64_t bytes; }; -extern int nf_ct_acct; - static inline struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) { @@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct) static inline struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) { + struct net *net = nf_ct_net(ct); struct nf_conn_counter *acct; - if (!nf_ct_acct) + if (!net->ct.sysctl_acct) return NULL; acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp); @@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp) extern unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir); -extern int nf_conntrack_acct_init(void); -extern void nf_conntrack_acct_fini(void); +extern int nf_conntrack_acct_init(struct net *net); +extern void nf_conntrack_acct_fini(struct net *net); #endif /* _NF_CONNTRACK_ACCT_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 503e37551b1..f4498a62881 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -17,10 +17,12 @@ struct netns_ct { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif + int sysctl_acct; int sysctl_checksum; unsigned int sysctl_log_invalid; /* Log invalid packets */ #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; + struct ctl_table_header *acct_sysctl_header; #endif int hash_vmalloc; int expect_vmalloc; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 59bd8b903a1..03591d37b9c 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -22,19 +22,17 @@ #define NF_CT_ACCT_DEFAULT 0 #endif -int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; -EXPORT_SYMBOL_GPL(nf_ct_acct); +static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); #ifdef CONFIG_SYSCTL -static struct ctl_table_header *acct_sysctl_header; static struct ctl_table acct_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_acct", - .data = &nf_ct_acct, + .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = { .id = NF_CT_EXT_ACCT, }; -int nf_conntrack_acct_init(void) +#ifdef CONFIG_SYSCTL +static int nf_conntrack_acct_init_sysctl(struct net *net) { - int ret; + struct ctl_table *table; -#ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); - printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); - printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); -#endif + table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_acct; - ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); - return ret; + net->ct.acct_sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.acct_sysctl_header) { + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + goto out_register; } + return 0; -#ifdef CONFIG_SYSCTL - acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, - acct_sysctl_table); +out_register: + kfree(table); +out: + return -ENOMEM; +} - if (!acct_sysctl_header) { - nf_ct_extend_unregister(&acct_extend); +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ + struct ctl_table *table; - printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); - return -ENOMEM; - } + table = net->ct.acct_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.acct_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_acct_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ +} +#endif + +int nf_conntrack_acct_init(struct net *net) +{ + int ret; + + net->ct.sysctl_acct = nf_ct_acct; + + if (net_eq(net, &init_net)) { +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); #endif + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + goto out_extend_register; + } + } + + ret = nf_conntrack_acct_init_sysctl(net); + if (ret < 0) + goto out_sysctl; + return 0; + +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); +out_extend_register: + return ret; } -void nf_conntrack_acct_fini(void) +void nf_conntrack_acct_fini(struct net *net) { -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(acct_sysctl_header); -#endif - nf_ct_extend_unregister(&acct_extend); + nf_conntrack_acct_fini_sysctl(net); + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ade0bb3ab2e..bb26460d897 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1039,7 +1039,7 @@ void nf_conntrack_cleanup(struct net *net) nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); - nf_conntrack_acct_fini(); + nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); free_percpu(net->ct.stat); nf_conntrack_helper_fini(); @@ -1191,7 +1191,7 @@ int nf_conntrack_init(struct net *net) if (ret < 0) goto out_fini_expect; - ret = nf_conntrack_acct_init(); + ret = nf_conntrack_acct_init(net); if (ret < 0) goto out_fini_helper; -- cgit v1.2.3-70-g09d2 From e099a173573ce1ba171092aee7bb3c72ea686e59 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:10 +0200 Subject: netfilter: netns nat: per-netns NAT table Same story as with iptable_filter, iptables_raw tables. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 1 + net/ipv4/netfilter/nf_nat_rule.c | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a6ed83853dc..b286b840493 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -38,6 +38,7 @@ struct netns_ipv4 { struct xt_table *iptable_raw; struct xt_table *arptable_filter; struct xt_table *iptable_security; + struct xt_table *nat_table; #endif int sysctl_icmp_echo_ignore_all; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index e8b4d0d4439..0a02a8caf3b 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} nat_initial_table __initdata = { +} nat_initial_table __net_initdata = { .repl = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -58,14 +58,13 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *nat_table; /* Source NAT */ static unsigned int ipt_snat_target(struct sk_buff *skb, @@ -194,9 +193,10 @@ int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); int ret; - ret = ipt_do_table(skb, hooknum, in, out, nat_table); + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -226,14 +226,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = { .family = AF_INET, }; +static int __net_init nf_nat_rule_net_init(struct net *net) +{ + net->ipv4.nat_table = ipt_register_table(net, &nat_table, + &nat_initial_table.repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit nf_nat_rule_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.nat_table); +} + +static struct pernet_operations nf_nat_rule_net_ops = { + .init = nf_nat_rule_net_init, + .exit = nf_nat_rule_net_exit, +}; + int __init nf_nat_rule_init(void) { int ret; - nat_table = ipt_register_table(&init_net, &__nat_table, - &nat_initial_table.repl); - if (IS_ERR(nat_table)) - return PTR_ERR(nat_table); + ret = register_pernet_subsys(&nf_nat_rule_net_ops); + if (ret != 0) + goto out; ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -247,8 +265,8 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(nat_table); - + unregister_pernet_subsys(&nf_nat_rule_net_ops); + out: return ret; } @@ -256,5 +274,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(nat_table); + unregister_pernet_subsys(&nf_nat_rule_net_ops); } -- cgit v1.2.3-70-g09d2 From 0c4c9288ada0e6642d511ef872f10a4781a896ff Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 8 Oct 2008 11:35:11 +0200 Subject: netfilter: netns nat: per-netns bysource hash Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 2 ++ net/ipv4/netfilter/nf_nat_core.c | 72 +++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b286b840493..ece1c926b5d 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -39,6 +39,8 @@ struct netns_ipv4 { struct xt_table *arptable_filter; struct xt_table *iptable_security; struct xt_table *nat_table; + struct hlist_head *nat_bysource; + int nat_vmalloced; #endif int sysctl_icmp_echo_ignore_all; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 5d4a5b70da2..2ac9eaf1a8c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size __read_mostly; -static int nf_nat_vmalloced; - -static struct hlist_head *bysource __read_mostly; #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] @@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(const struct nf_conntrack_tuple *tuple, +find_appropriate_src(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { @@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, const struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, @@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(orig_tuple, tuple, range)) { + if (find_appropriate_src(net, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); @@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + hlist_add_head_rcu(&nat->bysource, + &net->ipv4.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +static int __net_init nf_nat_net_init(struct net *net) +{ + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &net->ipv4.nat_vmalloced); + if (!net->ipv4.nat_bysource) + return -ENOMEM; + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + nf_ct_iterate_cleanup(net, &clean_nat, NULL); + synchronize_rcu(); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, + nf_nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + static int __init nf_nat_init(void) { size_t i; @@ -599,12 +634,9 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &nf_nat_vmalloced); - if (!bysource) { - ret = -ENOMEM; + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) goto cleanup_extend; - } /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); @@ -629,23 +661,9 @@ static int __init nf_nat_init(void) return ret; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&init_net, &clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); + unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); -- cgit v1.2.3-70-g09d2 From 73e4022f78acdbe420e8c24a7afbd90f4c8f5077 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: split netfilter IPv4 defragmentation into a separate module Netfilter connection tracking requires all IPv4 packets to be defragmented. Both the socket match and the TPROXY target depend on this functionality, so this patch separates the Netfilter IPv4 defrag hooks into a separate module. Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv4/nf_defrag_ipv4.h | 6 ++ net/ipv4/netfilter/Kconfig | 5 ++ net/ipv4/netfilter/Makefile | 3 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 56 +-------------- net/ipv4/netfilter/nf_defrag_ipv4.c | 96 ++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 53 deletions(-) create mode 100644 include/net/netfilter/ipv4/nf_defrag_ipv4.h create mode 100644 net/ipv4/netfilter/nf_defrag_ipv4.c (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h new file mode 100644 index 00000000000..6b00ea38546 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV4_H +#define _NF_DEFRAG_IPV4_H + +extern void nf_defrag_ipv4_enable(void); + +#endif /* _NF_DEFRAG_IPV4_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 4e842d56642..07757ac8d5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 1107edbe478..5f9b650d90f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index af69acc1d0f..4a7c3527539 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team * @@ -24,6 +25,7 @@ #include #include #include +#include int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s, NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,28 +129,6 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -194,13 +157,6 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { - { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, @@ -208,13 +164,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK, }, - { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, @@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 00000000000..aa2c50a180f --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,96 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif + + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 9ad2d745a23853927a19789b034d9eb2e62d78ee Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 8 Oct 2008 11:35:12 +0200 Subject: netfilter: iptables tproxy core The iptables tproxy core is a module that contains the common routines used by various tproxy related modules (TPROXY target and socket match) Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 32 ++++++++++++ net/netfilter/Kconfig | 15 ++++++ net/netfilter/Makefile | 3 ++ net/netfilter/nf_tproxy_core.c | 96 ++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 include/net/netfilter/nf_tproxy_core.h create mode 100644 net/netfilter/nf_tproxy_core.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h new file mode 100644 index 00000000000..208b46f4d6d --- /dev/null +++ b/include/net/netfilter/nf_tproxy_core.h @@ -0,0 +1,32 @@ +#ifndef _NF_TPROXY_CORE_H +#define _NF_TPROXY_CORE_H + +#include +#include +#include +#include +#include +#include + +/* look up and get a reference to a matching socket */ +extern struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening); + +static inline void +nf_tproxy_put_sock(struct sock *sk) +{ + /* TIME_WAIT inet sockets have to be handled differently */ + if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + +/* assign a socket to the skb -- consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); + +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4a464857f21..ed1dcfb61e1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -287,6 +287,21 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +# transparent proxy support +config NETFILTER_TPROXY + tristate "Transparent proxying support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + help + This option enables transparent proxying support, that is, + support for handling non-locally bound IPv4 TCP and UDP sockets. + For it to work you will have to configure certain iptables rules + and use policy routing. For more information on how to set it up + see Documentation/networking/tproxy.txt. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f101cf61e6f..fc8bbb48d38 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +# transparent proxy support +obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c new file mode 100644 index 00000000000..fe34f4bf74c --- /dev/null +++ b/net/netfilter/nf_tproxy_core.c @@ -0,0 +1,96 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include +#include +#include + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening_only) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + if (listening_only) + sk = __inet_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + else + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +static void +nf_tproxy_destructor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + skb->sk = NULL; + skb->destructor = NULL; + + if (sk) + nf_tproxy_put_sock(sk); +} + +/* consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + if (inet_sk(sk)->transparent) { + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; + return 1; + } else + nf_tproxy_put_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); + +static int __init nf_tproxy_init(void) +{ + pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n"); + pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n"); + return 0; +} + +module_init(nf_tproxy_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Transparent proxy support core routines"); -- cgit v1.2.3-70-g09d2 From 3bd653c8455bc7991bae77968702b31c8f5df883 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:54:51 -0700 Subject: netns: add net parameter to IP6_INC_STATS Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 56 +++++++++++++++++++++++++++++++-------------------- net/ipv6/mcast.c | 10 ++++----- net/ipv6/ndisc.c | 4 ++-- net/ipv6/netfilter.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/reassembly.c | 8 ++++---- net/ipv6/route.c | 7 +++++-- 9 files changed, 57 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index dfa7ae3c560..26c17988b90 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -129,7 +129,8 @@ extern struct ctl_path net_ipv6_ctl_path[]; /* MIBs */ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); -#define IP6_INC_STATS(idev,field) _DEVINC(ipv6, , idev, field) +#define IP6_INC_STATS(net, idev,field) ({ (void)(net); \ + _DEVINC(ipv6, , idev, field); }) #define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field) #define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b3157a0cc15..758cdd7dc27 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -183,7 +183,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, */ dst = ip6_route_output(net, sk, fl); if (dst->error) { - IP6_INC_STATS(ip6_dst_idev(dst), + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = 1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f22393e1166..db28c208f32 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -150,13 +150,14 @@ static int ip6_output2(struct sk_buff *skb) ip6_dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(dev_net(dev), idev, + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS); } return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, @@ -175,7 +176,8 @@ int ip6_output(struct sk_buff *skb) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(dev_net(skb->dst->dev), idev, + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } @@ -194,6 +196,7 @@ int ip6_output(struct sk_buff *skb) int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6_txoptions *opt, int ipfragok) { + struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb->dst; @@ -216,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; @@ -270,7 +273,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -280,7 +283,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -422,7 +425,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -468,13 +471,14 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb->dst; @@ -530,7 +534,7 @@ int ip6_forward(struct sk_buff *skb) } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -622,6 +626,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; + struct net *net = dev_net(skb->dst->dev); hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -635,7 +640,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (!skb->local_df) { skb->dev = skb->dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -684,7 +690,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } @@ -735,7 +742,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -748,7 +756,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGOKS); dst_release(&rt->u.dst); return 0; } @@ -759,7 +768,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGFAILS); dst_release(&rt->u.dst); return err; } @@ -793,7 +803,7 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; @@ -857,15 +867,16 @@ slow_path: if (err) goto fail; - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGCREATES); } - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGOKS); kfree_skb(skb); return err; fail: - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; @@ -1385,7 +1396,7 @@ alloc_new_skb: return 0; error: inet->cork.length -= length; - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1409,6 +1420,7 @@ int ip6_push_pending_frames(struct sock *sk) struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; @@ -1462,7 +1474,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); @@ -1491,7 +1503,7 @@ void ip6_flush_pending_frames(struct sock *sk) while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { if (skb->dst) - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e7c03bcc278..a1d588d0b5d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1446,7 +1446,7 @@ static void mld_sendpack(struct sk_buff *skb) int err; struct flowi fl; - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); @@ -1771,7 +1771,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct flowi fl; rcu_read_lock(); - IP6_INC_STATS(__in6_dev_get(dev), + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); if (type == ICMPV6_MGM_REDUCTION) @@ -1787,7 +1787,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (skb == NULL) { rcu_read_lock(); - IP6_INC_STATS(__in6_dev_get(dev), + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; @@ -1841,9 +1841,9 @@ out: if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f1c62ba0f56..ce5b61763fd 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -516,7 +516,7 @@ static void __ndisc_send(struct net_device *dev, skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -1581,7 +1581,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 4cb4844a322..6b29b03925f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -33,7 +33,8 @@ int ip6_route_me_harder(struct sk_buff *skb) #endif if (dst->error) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(&init_net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e53e493606c..2ba04d41dc2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -638,7 +638,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (err) goto error_fault; - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -652,7 +652,7 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f4f62f08609..63644075f2d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -433,7 +433,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; err: - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -573,7 +574,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); - struct net *net; + struct net *net = dev_net(skb->dst->dev); IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); @@ -597,7 +598,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - net = dev_net(skb->dev); if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) ip6_evictor(net, ip6_dst_idev(skb->dst)); @@ -619,7 +619,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return -1; fail_hdr: - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); return -1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e10a1701550..89dc6992434 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1833,16 +1833,19 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; + struct dst_entry *dst = skb->dst; switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + ipstats_mib_noroutes); break; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); -- cgit v1.2.3-70-g09d2 From 483a47d2fe794328d29950fe00ce26dd405d9437 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 11:09:27 -0700 Subject: ipv6: added net argument to IP6_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- net/ipv6/exthdrs.c | 42 ++++++++++++++++++++++++------------------ net/ipv6/ip6_input.c | 27 ++++++++++++++++----------- net/ipv6/ip6_output.c | 18 +++++++++++------- net/ipv6/ip6mr.c | 3 ++- net/ipv6/mcast.c | 4 ++-- net/ipv6/reassembly.c | 26 +++++++++++++++----------- 7 files changed, 72 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 26c17988b90..e7732d3a3f9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -131,7 +131,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_INC_STATS(net, idev,field) ({ (void)(net); \ _DEVINC(ipv6, , idev, field); }) -#define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field) +#define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \ + _DEVINC(ipv6, _BH, idev, field); }) #define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 837c830d6d8..6bfffec2371 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -277,7 +277,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -301,7 +301,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return 1; } - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); dst_release(dst); return -1; } @@ -319,7 +320,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; - int accept_source_route = dev_net(skb->dev)->ipv6.devconf_all->accept_source_route; + struct net *net = dev_net(skb->dev); + int accept_source_route = net->ipv6.devconf_all->accept_source_route; idev = in6_dev_get(skb->dev); if (idev) { @@ -331,7 +333,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -341,7 +343,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -356,7 +358,7 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -382,7 +384,7 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -401,7 +403,7 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - @@ -415,7 +417,7 @@ looped_back: if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; @@ -438,13 +440,13 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -456,7 +458,7 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -476,7 +478,7 @@ looped_back: if (skb->dst->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); @@ -492,7 +494,7 @@ looped_back: return -1; unknown_rh: - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -579,29 +581,33 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); u32 pkt_len; + struct net *net = dev_net(skb->dst->dev); if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - IP6_INC_STATS_BH(ipv6_skb_idev(skb), + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (ipv6_hdr(skb)->payload_len) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 7e14cccd056..936f48946e2 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -59,6 +59,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; + struct net *net = dev_net(skb->dev); if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); @@ -69,11 +70,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; } @@ -118,11 +119,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* pkt_len may be zero if Jumbo payload option is present */ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(net, + idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = ipv6_hdr(skb); @@ -130,7 +132,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->nexthdr == NEXTHDR_HOP) { if (ipv6_parse_hopopts(skb) < 0) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; } @@ -141,7 +143,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); drop: rcu_read_unlock(); kfree_skb(skb); @@ -161,6 +163,7 @@ static int ip6_input_finish(struct sk_buff *skb) int nexthdr, raw; u8 hash; struct inet6_dev *idev; + struct net *net = dev_net(skb->dst->dev); /* * Parse extension headers @@ -205,24 +208,25 @@ resubmit: if (ret > 0) goto resubmit; else if (ret == 0) - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); + IP6_INC_STATS_BH(net, idev, + IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev); } } else - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } rcu_read_unlock(); return 0; discard: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; @@ -240,7 +244,8 @@ int ip6_mc_input(struct sk_buff *skb) struct ipv6hdr *hdr; int deliver; - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); + IP6_INC_STATS_BH(dev_net(skb->dst->dev), + ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index db28c208f32..f0fded630f5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -103,7 +103,8 @@ static int ip6_output_finish(struct sk_buff *skb) else if (dst->neighbour) return dst->neighbour->output(skb); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; @@ -458,7 +459,8 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -527,8 +529,10 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -544,12 +548,12 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -991,7 +995,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 095bc453ff4..182f8a177e7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1383,7 +1383,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct sk_buff *skb) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTFORWDATAGRAMS); return dst_output(skb); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a1d588d0b5d..88811ebbe4b 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1476,9 +1476,9 @@ out: if (!err) { ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 63644075f2d..693d20836b3 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -213,8 +213,8 @@ static void ip6_frag_expire(unsigned long data) goto out; rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ @@ -257,7 +257,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, return container_of(q, struct frag_queue, q); oom: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS); return NULL; } @@ -267,6 +267,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; struct net_device *dev; int offset, end; + struct net *net = dev_net(skb->dst->dev); if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -276,7 +277,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - @@ -309,7 +310,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); @@ -433,7 +434,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; err: - IP6_INC_STATS(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; @@ -550,7 +551,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->csum); rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(dev_net(dev), + __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; return 1; @@ -564,7 +566,8 @@ out_oom: printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(dev_net(dev), + __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); return -1; } @@ -576,7 +579,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb->dst->dev); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ if (hdr->payload_len==0) @@ -592,7 +595,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); return 1; @@ -614,7 +618,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return ret; } - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; -- cgit v1.2.3-70-g09d2 From 821d57776d4dda47ef5f0c33fdb3c761214b2f9f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:32:43 -0700 Subject: ipv6: added net argument to IP6_ADD_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- net/ipv6/reassembly.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e7732d3a3f9..ac0487bab8b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -133,7 +133,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); _DEVINC(ipv6, , idev, field); }) #define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \ _DEVINC(ipv6, _BH, idev, field); }) -#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) +#define IP6_ADD_STATS_BH(net, idev,field,val) ({ (void)(net); \ + _DEVADD(ipv6, _BH, idev, field, val); }) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 693d20836b3..af12de071f4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -189,7 +189,7 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev) evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); if (evicted) - IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); + IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); } static void ip6_frag_expire(unsigned long data) -- cgit v1.2.3-70-g09d2 From a862f6a6dc89c57dd3a959a1636b59f0c27169c2 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:33:06 -0700 Subject: ipv6: added net argument to ICMP6_INC_STATS Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ac0487bab8b..744fe7443cb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -139,7 +139,8 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); -#define ICMP6_INC_STATS(idev, field) _DEVINC(icmpv6, , idev, field) +#define ICMP6_INC_STATS(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6, , idev, field); }) #define ICMP6_INC_STATS_BH(idev, field) _DEVINC(icmpv6, _BH, idev, field) #define ICMP6MSGOUT_INC_STATS(idev, field) \ diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 88811ebbe4b..fa413afeb99 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1840,7 +1840,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) out: if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ce5b61763fd..6ce238ca538 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -522,7 +522,7 @@ static void __ndisc_send(struct net_device *dev, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } if (likely(idev != NULL)) @@ -1586,7 +1586,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } if (likely(idev != NULL)) -- cgit v1.2.3-70-g09d2 From e41b5368e029e79d11acb5952bc73284e5026c62 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:33:26 -0700 Subject: ipv6: added net argument to ICMP6_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- net/dccp/ipv6.c | 6 ++++-- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/mcast.c | 2 +- net/ipv6/tcp_ipv6.c | 3 ++- net/sctp/ipv6.c | 2 +- 7 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 744fe7443cb..5107cd92a46 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -141,7 +141,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #define ICMP6_INC_STATS(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6, , idev, field); }) -#define ICMP6_INC_STATS_BH(idev, field) _DEVINC(icmpv6, _BH, idev, field) +#define ICMP6_INC_STATS_BH(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6, _BH, idev, field); }) #define ICMP6MSGOUT_INC_STATS(idev, field) \ _DEVINC(icmpv6msg, , idev, field +256) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index caa7f346962..11062780bb0 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -98,7 +98,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -107,7 +108,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 758cdd7dc27..4c961556306 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -664,7 +664,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -772,7 +772,7 @@ static int icmpv6_rcv(struct sk_buff *skb) return 0; discard_it: - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f0fded630f5..e7eff320619 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1483,7 +1483,7 @@ int ip6_push_pending_frames(struct sock *sk) struct inet6_dev *idev = ip6_dst_idev(skb->dst); ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fa413afeb99..f06ceea1ffa 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1475,7 +1475,7 @@ static void mld_sendpack(struct sk_buff *skb) out: if (!err) { ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6268d266c03..424d9c4a67a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -330,7 +330,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c78da3c9dd3..4124bbb9994 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -156,7 +156,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); goto out; } -- cgit v1.2.3-70-g09d2 From 5c5d244bd388fe498dd7f5f57cb7770aae40b9ab Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:33:50 -0700 Subject: ipv6: added net argument to ICMP6MSGOUT_INC_STATS Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5107cd92a46..7f5a8dec1ae 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -144,8 +144,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #define ICMP6_INC_STATS_BH(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6, _BH, idev, field); }) -#define ICMP6MSGOUT_INC_STATS(idev, field) \ - _DEVINC(icmpv6msg, , idev, field +256) +#define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6msg, , idev, field +256); }) #define ICMP6MSGOUT_INC_STATS_BH(idev, field) \ _DEVINC(icmpv6msg, _BH, idev, field +256) #define ICMP6MSGIN_INC_STATS(idev, field) \ diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f06ceea1ffa..a96e4235fbf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1839,7 +1839,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS(idev, type); + ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6ce238ca538..840b15780a3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -521,7 +521,7 @@ static void __ndisc_send(struct net_device *dev, err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { - ICMP6MSGOUT_INC_STATS(idev, type); + ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } @@ -1585,7 +1585,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { - ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); + ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } -- cgit v1.2.3-70-g09d2 From 5a57d4c7fdac0e227efe8c5739fcbb263d9ae993 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:34:14 -0700 Subject: ipv6: added net argument to ICMP6MSGOUT_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/mcast.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7f5a8dec1ae..4736d8f1f28 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -146,8 +146,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6msg, , idev, field +256); }) -#define ICMP6MSGOUT_INC_STATS_BH(idev, field) \ - _DEVINC(icmpv6msg, _BH, idev, field +256) +#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6msg, _BH, idev, field +256); }) #define ICMP6MSGIN_INC_STATS(idev, field) \ _DEVINC(icmpv6msg, , idev, field) #define ICMP6MSGIN_INC_STATS_BH(idev, field) \ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e7eff320619..c77db0b95e2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1482,7 +1482,7 @@ int ip6_push_pending_frames(struct sock *sk) if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); - ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); + ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a96e4235fbf..d7b3c6d398a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1474,7 +1474,7 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); + ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else -- cgit v1.2.3-70-g09d2 From a712d3e859b78edc44d5664d867626d3022bd18e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:34:35 -0700 Subject: ipv6: ICMP6MSGIN_INC_STATS is not used Removed. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4736d8f1f28..01da23c061e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -148,8 +148,6 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); _DEVINC(icmpv6msg, , idev, field +256); }) #define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6msg, _BH, idev, field +256); }) -#define ICMP6MSGIN_INC_STATS(idev, field) \ - _DEVINC(icmpv6msg, , idev, field) #define ICMP6MSGIN_INC_STATS_BH(idev, field) \ _DEVINC(icmpv6msg, _BH, idev, field) -- cgit v1.2.3-70-g09d2 From 55d43808eb26e689dacb95b11f956a3b1a56a5f3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:34:54 -0700 Subject: ipv6: added net argument to ICMP6MSGIN_INC_STATS_BH Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 4 ++-- net/ipv6/icmp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 01da23c061e..47a76bfbf76 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -148,8 +148,8 @@ DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); _DEVINC(icmpv6msg, , idev, field +256); }) #define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ _DEVINC(icmpv6msg, _BH, idev, field +256); }) -#define ICMP6MSGIN_INC_STATS_BH(idev, field) \ - _DEVINC(icmpv6msg, _BH, idev, field) +#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) ({ (void)(net); \ + _DEVINC(icmpv6msg, _BH, idev, field); }) struct ip6_ra_chain { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4c961556306..9b7d19ae5ce 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -693,7 +693,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS_BH(idev, type); + ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: -- cgit v1.2.3-70-g09d2 From 087fe24033c4280a15b03cce41eaec844c92f8e5 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:35:11 -0700 Subject: ipv6: added net argument to _DEVINC/_DEVADD Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 47a76bfbf76..d0538dd2c44 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -110,17 +110,19 @@ struct frag_hdr { extern int sysctl_mld_max_msf; extern struct ctl_path net_ipv6_ctl_path[]; -#define _DEVINC(statname, modifier, idev, field) \ +#define _DEVINC(net, statname, modifier, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ + (void)(net); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ SNMP_INC_STATS##modifier(statname##_statistics, (field)); \ }) -#define _DEVADD(statname, modifier, idev, field, val) \ +#define _DEVADD(net, statname, modifier, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ + (void)(net); \ if (likely(_idev != NULL)) \ SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\ @@ -129,27 +131,27 @@ extern struct ctl_path net_ipv6_ctl_path[]; /* MIBs */ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); -#define IP6_INC_STATS(net, idev,field) ({ (void)(net); \ - _DEVINC(ipv6, , idev, field); }) -#define IP6_INC_STATS_BH(net, idev,field) ({ (void)(net); \ - _DEVINC(ipv6, _BH, idev, field); }) -#define IP6_ADD_STATS_BH(net, idev,field,val) ({ (void)(net); \ - _DEVADD(ipv6, _BH, idev, field, val); }) +#define IP6_INC_STATS(net, idev,field) \ + _DEVINC(net, ipv6, , idev, field) +#define IP6_INC_STATS_BH(net, idev,field) \ + _DEVINC(net, ipv6, _BH, idev, field) +#define IP6_ADD_STATS_BH(net, idev,field,val) \ + _DEVADD(net, ipv6, _BH, idev, field, val) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); -#define ICMP6_INC_STATS(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6, , idev, field); }) -#define ICMP6_INC_STATS_BH(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6, _BH, idev, field); }) - -#define ICMP6MSGOUT_INC_STATS(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6msg, , idev, field +256); }) -#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6msg, _BH, idev, field +256); }) -#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) ({ (void)(net); \ - _DEVINC(icmpv6msg, _BH, idev, field); }) +#define ICMP6_INC_STATS(net, idev, field) \ + _DEVINC(net, icmpv6, , idev, field) +#define ICMP6_INC_STATS_BH(net, idev, field) \ + _DEVINC(net, icmpv6, _BH, idev, field) + +#define ICMP6MSGOUT_INC_STATS(net, idev, field) \ + _DEVINC(net, icmpv6msg, , idev, field +256) +#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \ + _DEVINC(net, icmpv6msg, _BH, idev, field +256) +#define ICMP6MSGIN_INC_STATS_BH(net, idev, field) \ + _DEVINC(net, icmpv6msg, _BH, idev, field) struct ip6_ra_chain { -- cgit v1.2.3-70-g09d2 From 9261e53701121f83eb9482347d68833e95315362 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 8 Oct 2008 10:36:03 -0700 Subject: ipv6: making ip and icmp statistics per/namespace Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ipv6.h | 10 ++-------- include/net/netns/mib.h | 3 +++ net/ipv6/af_inet6.c | 38 ++++++++++++++++++-------------------- net/ipv6/proc.c | 8 +++++--- 4 files changed, 28 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0538dd2c44..6d5b58a1c74 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -113,23 +113,20 @@ extern struct ctl_path net_ipv6_ctl_path[]; #define _DEVINC(net, statname, modifier, idev, field) \ ({ \ struct inet6_dev *_idev = (idev); \ - (void)(net); \ if (likely(_idev != NULL)) \ SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \ - SNMP_INC_STATS##modifier(statname##_statistics, (field)); \ + SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\ }) #define _DEVADD(net, statname, modifier, idev, field, val) \ ({ \ struct inet6_dev *_idev = (idev); \ - (void)(net); \ if (likely(_idev != NULL)) \ SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ - SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\ + SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\ }) /* MIBs */ -DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_INC_STATS(net, idev,field) \ _DEVINC(net, ipv6, , idev, field) @@ -138,9 +135,6 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_ADD_STATS_BH(net, idev,field,val) \ _DEVADD(net, ipv6, _BH, idev, field, val) -DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); -DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); - #define ICMP6_INC_STATS(net, idev, field) \ _DEVINC(net, icmpv6, , idev, field) #define ICMP6_INC_STATS_BH(net, idev, field) \ diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 4e58f0519ce..10cb7c336de 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -16,6 +16,9 @@ struct netns_mib { struct proc_dir_entry *proc_net_devsnmp6; DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6); DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6); + DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); + DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); + DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #endif }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 127b240d2d8..6b509d7700d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -797,31 +797,11 @@ static void ipv6_packet_cleanup(void) static int __init init_ipv6_mibs(void) { - if (snmp_mib_init((void **)ipv6_statistics, - sizeof(struct ipstats_mib)) < 0) - goto err_ip_mib; - if (snmp_mib_init((void **)icmpv6_statistics, - sizeof(struct icmpv6_mib)) < 0) - goto err_icmp_mib; - if (snmp_mib_init((void **)icmpv6msg_statistics, - sizeof(struct icmpv6msg_mib)) < 0) - goto err_icmpmsg_mib; return 0; - -err_icmpmsg_mib: - snmp_mib_free((void **)icmpv6_statistics); -err_icmp_mib: - snmp_mib_free((void **)ipv6_statistics); -err_ip_mib: - return -ENOMEM; - } static void cleanup_ipv6_mibs(void) { - snmp_mib_free((void **)ipv6_statistics); - snmp_mib_free((void **)icmpv6_statistics); - snmp_mib_free((void **)icmpv6msg_statistics); } static int __net_init ipv6_init_mibs(struct net *net) @@ -832,8 +812,23 @@ static int __net_init ipv6_init_mibs(struct net *net) if (snmp_mib_init((void **)net->mib.udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; + if (snmp_mib_init((void **)net->mib.ipv6_statistics, + sizeof(struct ipstats_mib)) < 0) + goto err_ip_mib; + if (snmp_mib_init((void **)net->mib.icmpv6_statistics, + sizeof(struct icmpv6_mib)) < 0) + goto err_icmp_mib; + if (snmp_mib_init((void **)net->mib.icmpv6msg_statistics, + sizeof(struct icmpv6msg_mib)) < 0) + goto err_icmpmsg_mib; return 0; +err_icmpmsg_mib: + snmp_mib_free((void **)net->mib.icmpv6_statistics); +err_icmp_mib: + snmp_mib_free((void **)net->mib.ipv6_statistics); +err_ip_mib: + snmp_mib_free((void **)net->mib.udplite_stats_in6); err_udplite_mib: snmp_mib_free((void **)net->mib.udp_stats_in6); return -ENOMEM; @@ -843,6 +838,9 @@ static void __net_exit ipv6_cleanup_mibs(struct net *net) { snmp_mib_free((void **)net->mib.udp_stats_in6); snmp_mib_free((void **)net->mib.udplite_stats_in6); + snmp_mib_free((void **)net->mib.ipv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6msg_statistics); } static int inet6_net_init(struct net *net) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index c78cf754ef3..07f0b76e742 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -177,9 +177,11 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); + snmp6_seq_show_item(seq, (void **)net->mib.ipv6_statistics, + snmp6_ipstats_list); + snmp6_seq_show_item(seq, (void **)net->mib.icmpv6_statistics, + snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)net->mib.icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6, snmp6_udp6_list); snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6, -- cgit v1.2.3-70-g09d2 From 3c689b7320ae6f20dba6a8b71806a6c6fd604ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Oct 2008 14:18:04 -0700 Subject: inet: cleanup of local_port_range I noticed sysctl_local_port_range[] and its associated seqlock sysctl_local_port_range_lock were on separate cache lines. Moreover, sysctl_local_port_range[] was close to unrelated variables, highly modified, leading to cache misses. Moving these two variables in a structure can help data locality and moving this structure to read_mostly section helps sharing of this data among cpus. Cleanup of extern declarations (moved in include file where they belong), and use of inet_get_local_port_range() accessor instead of direct access to ports values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++++ net/ipv4/inet_connection_sock.c | 16 +++++++++------- net/ipv4/sysctl_net_ipv4.c | 23 ++++++++++------------- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index d678ea3d474..1cbccaf0de3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -178,6 +178,10 @@ extern unsigned long snmp_fold_field(void *mib[], int offt); extern int snmp_mib_init(void *ptr[2], size_t mibsize); extern void snmp_mib_free(void *ptr[2]); +extern struct local_ports { + seqlock_t lock; + int range[2]; +} sysctl_local_ports; extern void inet_get_local_port_range(int *low, int *high); extern int sysctl_ip_default_ttl; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 21fcc5a9045..bd1278a2d82 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* - * This array holds the first and last local port number. + * This struct holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; -DEFINE_SEQLOCK(sysctl_port_range_lock); +struct local_ports sysctl_local_ports __read_mostly = { + .lock = SEQLOCK_UNLOCKED, + .range = { 32768, 61000 }, +}; void inet_get_local_port_range(int *low, int *high) { unsigned seq; do { - seq = read_seqbegin(&sysctl_port_range_lock); + seq = read_seqbegin(&sysctl_local_ports.lock); - *low = sysctl_local_port_range[0]; - *high = sysctl_local_port_range[1]; - } while (read_seqretry(&sysctl_port_range_lock, seq)); + *low = sysctl_local_ports.range[0]; + *high = sysctl_local_ports.range[1]; + } while (read_seqretry(&sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0689fd7b79..276d047fb85 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,16 +26,13 @@ static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -extern seqlock_t sysctl_port_range_lock; -extern int sysctl_local_port_range[2]; - /* Update system visible IP port range */ static void set_local_port_range(int range[2]) { - write_seqlock(&sysctl_port_range_lock); - sysctl_local_port_range[0] = range[0]; - sysctl_local_port_range[1] = range[1]; - write_sequnlock(&sysctl_port_range_lock); + write_seqlock(&sysctl_local_ports.lock); + sysctl_local_ports.range[0] = range[0]; + sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, size_t *lenp, loff_t *ppos) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { @@ -73,8 +70,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, void __user *newval, size_t newlen) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -83,6 +79,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) @@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), + .data = &sysctl_local_ports.range, + .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, -- cgit v1.2.3-70-g09d2 From 8e1ee18c332e08bee9d8bd66e63cd564fbf17fc2 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 8 Oct 2008 14:18:39 -0700 Subject: sctp: Rework the tsn map to use generic bitmap. The tsn map currently use is 4K large and is stuck inside the sctp_association structure making memory references REALLY expensive. What we really need is at most 4K worth of bits so the biggest map we would have is 512 bytes. Also, the map is only really usefull when we have gaps to store and report. As such, starting with minimal map of say 32 TSNs (bits) should be enough for normal low-loss operations. We can grow the map by some multiple of 32 along with some extra room any time we receive the TSN which would put us outside of the map boundry. As we close gaps, we can shift the map to rebase it on the latest TSN we've seen. This saves 4088 bytes per association just in the map alone along savings from the now unnecessary structure members. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 +- include/net/sctp/structs.h | 1 - include/net/sctp/tsnmap.h | 39 ++--- net/sctp/associola.c | 9 +- net/sctp/sm_make_chunk.c | 5 +- net/sctp/sm_sideeffect.c | 3 +- net/sctp/tsnmap.c | 331 +++++++++++++++++++------------------------ net/sctp/ulpevent.c | 10 +- 8 files changed, 178 insertions(+), 224 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index c32ddf0279c..b05b0557211 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -261,7 +261,9 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 }; * must be less than 65535 (2^16 - 1), or we will have overflow * problems creating SACK's. */ -#define SCTP_TSN_MAP_SIZE 2048 +#define SCTP_TSN_MAP_INITIAL BITS_PER_LONG +#define SCTP_TSN_MAP_INCREMENT SCTP_TSN_MAP_INITIAL +#define SCTP_TSN_MAP_SIZE 4096 #define SCTP_TSN_MAX_GAP 65535 /* We will not record more than this many duplicate TSNs between two diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 94c62e4ddea..9661d7b765f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1545,7 +1545,6 @@ struct sctp_association { * in tsn_map--we get it by calling sctp_tsnmap_get_ctsn(). */ struct sctp_tsnmap tsn_map; - __u8 _map[sctp_tsnmap_storage_size(SCTP_TSN_MAP_SIZE)]; /* Ack State : This flag indicates if the next received * : packet is to be responded to with a diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 099211bf998..6dabbee8bbd 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -60,18 +60,7 @@ struct sctp_tsnmap { * It points at one of the two buffers with which we will * ping-pong between. */ - __u8 *tsn_map; - - /* This marks the tsn which overflows the tsn_map, when the - * cumulative ack point reaches this point we know we can switch - * maps (tsn_map and overflow_map swap). - */ - __u32 overflow_tsn; - - /* This is the overflow array for tsn_map. - * It points at one of the other ping-pong buffers. - */ - __u8 *overflow_map; + unsigned long *tsn_map; /* This is the TSN at tsn_map[0]. */ __u32 base_tsn; @@ -89,15 +78,15 @@ struct sctp_tsnmap { */ __u32 cumulative_tsn_ack_point; + /* This is the highest TSN we've marked. */ + __u32 max_tsn_seen; + /* This is the minimum number of TSNs we can track. This corresponds * to the size of tsn_map. Note: the overflow_map allows us to * potentially track more than this quantity. */ __u16 len; - /* This is the highest TSN we've marked. */ - __u32 max_tsn_seen; - /* Data chunks pending receipt. used by SCTP_STATUS sockopt */ __u16 pending_data; @@ -110,24 +99,17 @@ struct sctp_tsnmap { /* Record gap ack block information here. */ struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; - - int malloced; - - __u8 raw_map[0]; }; struct sctp_tsnmap_iter { __u32 start; }; -/* This macro assists in creation of external storage for variable length - * internal buffers. We double allocate so the overflow map works. - */ -#define sctp_tsnmap_storage_size(count) (sizeof(__u8) * (count) * 2) - /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len, - __u32 initial_tsn); + __u32 initial_tsn, gfp_t gfp); + +void sctp_tsnmap_free(struct sctp_tsnmap *map); /* Test the tracking state of this TSN. * Returns: @@ -138,7 +120,7 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len, int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ -void sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); @@ -183,10 +165,7 @@ static inline struct sctp_gap_ack_block *sctp_tsnmap_get_gabs(struct sctp_tsnmap /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { - int has_gap; - - has_gap = (map->cumulative_tsn_ack_point != map->max_tsn_seen); - return has_gap; + return (map->cumulative_tsn_ack_point != map->max_tsn_seen); } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN diff --git a/net/sctp/associola.c b/net/sctp/associola.c index abd51cef241..f4b23043b61 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -283,8 +283,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - /* Set up the tsn tracking. */ - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, 0); + memset(&asoc->peer.tsn_map, 0, sizeof(struct sctp_tsnmap)); asoc->need_ecne = 0; @@ -402,6 +401,8 @@ void sctp_association_free(struct sctp_association *asoc) /* Dispose of any pending chunks on the inqueue. */ sctp_inq_free(&asoc->base.inqueue); + sctp_tsnmap_free(&asoc->peer.tsn_map); + /* Free ssnmap storage. */ sctp_ssnmap_free(asoc->ssnmap); @@ -1122,8 +1123,8 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; asoc->peer.i = new->peer.i; - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, - asoc->peer.i.initial_tsn); + sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, GFP_ATOMIC); /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 76726bcff3e..6dd9b3ef33d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2288,8 +2288,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, } /* Set up the TSN tracking pieces. */ - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, - asoc->peer.i.initial_tsn); + if (!sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, gfp)) + goto clean_up; /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number * diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 13d9eea5cf1..e1d6076b4f5 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1152,7 +1152,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ - sctp_tsnmap_mark(&asoc->peer.tsn_map, cmd->obj.u32); + error = sctp_tsnmap_mark(&asoc->peer.tsn_map, + cmd->obj.u32); break; case SCTP_CMD_REPORT_FWDTSN: diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f3e58b27590..142ed7ca424 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -43,37 +43,44 @@ */ #include +#include #include #include static void sctp_tsnmap_update(struct sctp_tsnmap *map); -static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, - __u16 len, __u16 base, - int *started, __u16 *start, - int *ended, __u16 *end); +static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, + __u16 len, __u16 *start, __u16 *end); +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap); /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, - __u32 initial_tsn) + __u32 initial_tsn, gfp_t gfp) { - map->tsn_map = map->raw_map; - map->overflow_map = map->tsn_map + len; - map->len = len; - - /* Clear out a TSN ack status. */ - memset(map->tsn_map, 0x00, map->len + map->len); + if (!map->tsn_map) { + map->tsn_map = kzalloc(len>>3, gfp); + if (map->tsn_map == NULL) + return NULL; + + map->len = len; + } else { + bitmap_zero(map->tsn_map, map->len); + } /* Keep track of TSNs represented by tsn_map. */ map->base_tsn = initial_tsn; - map->overflow_tsn = initial_tsn + map->len; map->cumulative_tsn_ack_point = initial_tsn - 1; map->max_tsn_seen = map->cumulative_tsn_ack_point; - map->malloced = 0; map->num_dup_tsns = 0; return map; } +void sctp_tsnmap_free(struct sctp_tsnmap *map) +{ + map->len = 0; + kfree(map->tsn_map); +} + /* Test the tracking state of this TSN. * Returns: * 0 if the TSN has not yet been seen @@ -82,66 +89,69 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, */ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; - int dup; + u32 gap; + + /* Check to see if this is an old TSN */ + if (TSN_lte(tsn, map->cumulative_tsn_ack_point)) + return 1; + + /* Verify that we can hold this TSN and that it will not + * overlfow our map + */ + if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) + return -1; /* Calculate the index into the mapping arrays. */ gap = tsn - map->base_tsn; - /* Verify that we can hold this TSN. */ - if (gap >= (/* base */ map->len + /* overflow */ map->len)) { - dup = -1; - goto out; - } - - /* Honk if we've already seen this TSN. - * We have three cases: - * 1. The TSN is ancient or belongs to a previous tsn_map. - * 2. The TSN is already marked in the tsn_map. - * 3. The TSN is already marked in the tsn_map_overflow. - */ - if (gap < 0 || - (gap < map->len && map->tsn_map[gap]) || - (gap >= map->len && map->overflow_map[gap - map->len])) - dup = 1; + /* Check to see if TSN has already been recorded. */ + if (gap < map->len && test_bit(gap, map->tsn_map)) + return 1; else - dup = 0; - -out: - return dup; + return 0; } /* Mark this TSN as seen. */ -void sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u16 gap; - /* Vacuously mark any TSN which precedes the map base or - * exceeds the end of the map. - */ if (TSN_lt(tsn, map->base_tsn)) - return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) - return; - - /* Bump the max. */ - if (TSN_lt(map->max_tsn_seen, tsn)) - map->max_tsn_seen = tsn; + return 0; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; - /* Mark the TSN as received. */ - if (gap < map->len) - map->tsn_map[gap]++; - else - map->overflow_map[gap - map->len]++; + if (gap >= map->len && !sctp_tsnmap_grow(map, gap)) + return -ENOMEM; - /* Go fixup any internal TSN mapping variables including - * cumulative_tsn_ack_point. - */ - sctp_tsnmap_update(map); + if (!sctp_tsnmap_has_gap(map) && gap == 0) { + /* In this case the map has no gaps and the tsn we are + * recording is the next expected tsn. We don't touch + * the map but simply bump the values. + */ + map->max_tsn_seen++; + map->cumulative_tsn_ack_point++; + map->base_tsn++; + } else { + /* Either we already have a gap, or about to record a gap, so + * have work to do. + * + * Bump the max. + */ + if (TSN_lt(map->max_tsn_seen, tsn)) + map->max_tsn_seen = tsn; + + /* Mark the TSN as received. */ + set_bit(gap, map->tsn_map); + + /* Go fixup any internal TSN mapping variables including + * cumulative_tsn_ack_point. + */ + sctp_tsnmap_update(map); + } + + return 0; } @@ -160,66 +170,34 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, struct sctp_tsnmap_iter *iter, __u16 *start, __u16 *end) { - int started, ended; - __u16 start_, end_, offset; - - /* We haven't found a gap yet. */ - started = ended = 0; + int ended = 0; + __u16 start_ = 0, end_ = 0, offset; /* If there are no more gap acks possible, get out fast. */ if (TSN_lte(map->max_tsn_seen, iter->start)) return 0; - /* Search the first mapping array. */ - if (iter->start - map->base_tsn < map->len) { - - offset = iter->start - map->base_tsn; - sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, 0, - &started, &start_, &ended, &end_); - } - - /* Do we need to check the overflow map? */ - if (!ended) { - /* Fix up where we'd like to start searching in the - * overflow map. - */ - if (iter->start - map->base_tsn < map->len) - offset = 0; - else - offset = iter->start - map->base_tsn - map->len; - - /* Search the overflow map. */ - sctp_tsnmap_find_gap_ack(map->overflow_map, - offset, - map->len, - map->len, - &started, &start_, - &ended, &end_); - } + offset = iter->start - map->base_tsn; + sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, + &start_, &end_); - /* The Gap Ack Block happens to end at the end of the - * overflow map. - */ - if (started && !ended) { - ended++; - end_ = map->len + map->len - 1; - } + /* The Gap Ack Block happens to end at the end of the map. */ + if (start_ && !end_) + end_ = map->len - 1; /* If we found a Gap Ack Block, return the start and end and * bump the iterator forward. */ - if (ended) { + if (end_) { /* Fix up the start and end based on the - * Cumulative TSN Ack offset into the map. + * Cumulative TSN Ack which is always 1 behind base. */ - int gap = map->cumulative_tsn_ack_point - - map->base_tsn; - - *start = start_ - gap; - *end = end_ - gap; + *start = start_ + 1; + *end = end_ + 1; /* Move the iterator forward. */ iter->start = map->cumulative_tsn_ack_point + *end + 1; + ended = 1; } return ended; @@ -228,35 +206,33 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, /* Mark this and any lower TSN as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u32 gap; - /* Vacuously mark any TSN which precedes the map base or - * exceeds the end of the map. - */ if (TSN_lt(tsn, map->base_tsn)) return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) + if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) return; /* Bump the max. */ if (TSN_lt(map->max_tsn_seen, tsn)) map->max_tsn_seen = tsn; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn + 1; - /* Mark the TSNs as received. */ - if (gap <= map->len) - memset(map->tsn_map, 0x01, gap); - else { - memset(map->tsn_map, 0x01, map->len); - memset(map->overflow_map, 0x01, (gap - map->len)); + map->base_tsn += gap; + map->cumulative_tsn_ack_point += gap; + if (gap >= map->len) { + /* If our gap is larger then the map size, just + * zero out the map. + */ + bitmap_zero(map->tsn_map, map->len); + } else { + /* If the gap is smaller then the map size, + * shift the map by 'gap' bits and update further. + */ + bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len); + sctp_tsnmap_update(map); } - - /* Go fixup any internal TSN mapping variables including - * cumulative_tsn_ack_point. - */ - sctp_tsnmap_update(map); } /******************************************************************** @@ -268,27 +244,19 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) */ static void sctp_tsnmap_update(struct sctp_tsnmap *map) { - __u32 ctsn; - - ctsn = map->cumulative_tsn_ack_point; - do { - ctsn++; - if (ctsn == map->overflow_tsn) { - /* Now tsn_map must have been all '1's, - * so we swap the map and check the overflow table - */ - __u8 *tmp = map->tsn_map; - memset(tmp, 0, map->len); - map->tsn_map = map->overflow_map; - map->overflow_map = tmp; - - /* Update the tsn_map boundaries. */ - map->base_tsn += map->len; - map->overflow_tsn += map->len; - } - } while (map->tsn_map[ctsn - map->base_tsn]); + u16 len; + unsigned long zero_bit; + + + len = map->max_tsn_seen - map->cumulative_tsn_ack_point; + zero_bit = find_first_zero_bit(map->tsn_map, len); + if (!zero_bit) + return; /* The first 0-bit is bit 0. nothing to do */ + + map->base_tsn += zero_bit; + map->cumulative_tsn_ack_point += zero_bit; - map->cumulative_tsn_ack_point = ctsn - 1; /* Back up one. */ + bitmap_shift_right(map->tsn_map, map->tsn_map, zero_bit, map->len); } /* How many data chunks are we missing from our peer? @@ -299,31 +267,19 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map) __u32 max_tsn = map->max_tsn_seen; __u32 base_tsn = map->base_tsn; __u16 pending_data; - __s32 gap, start, end, i; + u32 gap, i; pending_data = max_tsn - cum_tsn; gap = max_tsn - base_tsn; - if (gap <= 0 || gap >= (map->len + map->len)) + if (gap == 0 || gap >= map->len) goto out; - start = ((cum_tsn >= base_tsn) ? (cum_tsn - base_tsn + 1) : 0); - end = ((gap > map->len ) ? map->len : gap + 1); - - for (i = start; i < end; i++) { - if (map->tsn_map[i]) + for (i = 0; i < gap+1; i++) { + if (test_bit(i, map->tsn_map)) pending_data--; } - if (gap >= map->len) { - start = 0; - end = gap - map->len + 1; - for (i = start; i < end; i++) { - if (map->overflow_map[i]) - pending_data--; - } - } - out: return pending_data; } @@ -334,10 +290,8 @@ out: * The flags "started" and "ended" tell is if we found the beginning * or (respectively) the end of a Gap Ack Block. */ -static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, - __u16 len, __u16 base, - int *started, __u16 *start, - int *ended, __u16 *end) +static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, + __u16 len, __u16 *start, __u16 *end) { int i = off; @@ -348,49 +302,36 @@ static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, /* Also, stop looking past the maximum TSN seen. */ /* Look for the start. */ - if (!(*started)) { - for (; i < len; i++) { - if (map[i]) { - (*started)++; - *start = base + i; - break; - } - } - } + i = find_next_bit(map, len, off); + if (i < len) + *start = i; /* Look for the end. */ - if (*started) { + if (*start) { /* We have found the start, let's find the * end. If we find the end, break out. */ - for (; i < len; i++) { - if (!map[i]) { - (*ended)++; - *end = base + i - 1; - break; - } - } + i = find_next_zero_bit(map, len, i); + if (i < len) + *end = i - 1; } } /* Renege that we have seen a TSN. */ void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u32 gap; if (TSN_lt(tsn, map->base_tsn)) return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) + /* Assert: TSN is in range. */ + if (!TSN_lt(tsn, map->base_tsn + map->len)) return; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; /* Pretend we never saw the TSN. */ - if (gap < map->len) - map->tsn_map[gap] = 0; - else - map->overflow_map[gap - map->len] = 0; + clear_bit(gap, map->tsn_map); } /* How many gap ack blocks do we have recorded? */ @@ -416,3 +357,27 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) } return gabs; } + +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) +{ + unsigned long *new; + unsigned long inc; + u16 len; + + if (gap >= SCTP_TSN_MAP_SIZE) + return 0; + + inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; + len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE); + + new = kzalloc(len>>3, GFP_ATOMIC); + if (!new) + return 0; + + bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn); + kfree(map->tsn_map); + map->tsn_map = new; + map->len = len; + + return 1; +} diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index a1f654aea26..5f186ca550d 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -713,7 +713,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, /* Now that all memory allocations for this chunk succeeded, we * can mark it as received so the tsn_map is updated correctly. */ - sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn)); + if (sctp_tsnmap_mark(&asoc->peer.tsn_map, + ntohl(chunk->subh.data_hdr->tsn))) + goto fail_mark; /* First calculate the padding, so we don't inadvertently * pass up the wrong length to the user. @@ -755,8 +757,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, event->msg_flags |= chunk->chunk_hdr->flags; event->iif = sctp_chunk_iif(chunk); -fail: return event; + +fail_mark: + kfree_skb(skb); +fail: + return NULL; } /* Create a partial delivery related event. -- cgit v1.2.3-70-g09d2 From 02015180e2509afd2e3fe3790a333b30708a116b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 8 Oct 2008 14:19:01 -0700 Subject: sctp: shrink sctp_tsnmap some more by removing gabs array The gabs array in the sctp_tsnmap structure is only used in one place, sctp_make_sack(). As such, carrying the array around in the sctp_tsnmap and thus directly in the sctp_association is rather pointless since most of the time it's just taking up space. Now, let sctp_make_sack create and populate it and then throw it away when it's done. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/tsnmap.h | 14 +++----------- net/sctp/sm_make_chunk.c | 6 ++++-- net/sctp/tsnmap.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 6dabbee8bbd..4aabc5a96cf 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -94,11 +94,8 @@ struct sctp_tsnmap { * every SACK. Store up to SCTP_MAX_DUP_TSNS worth of * information. */ - __be32 dup_tsns[SCTP_MAX_DUP_TSNS]; __u16 num_dup_tsns; - - /* Record gap ack block information here. */ - struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + __be32 dup_tsns[SCTP_MAX_DUP_TSNS]; }; struct sctp_tsnmap_iter { @@ -151,17 +148,12 @@ static inline __be32 *sctp_tsnmap_get_dups(struct sctp_tsnmap *map) } /* How many gap ack blocks do we have recorded? */ -__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map); +__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, + struct sctp_gap_ack_block *gabs); /* Refresh the count on pending data. */ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); -/* Return pointer to gap ack blocks as needed by SACK. */ -static inline struct sctp_gap_ack_block *sctp_tsnmap_get_gabs(struct sctp_tsnmap *map) -{ - return map->gabs; -} - /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 6dd9b3ef33d..fd8acb48c3f 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -702,12 +702,14 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) __u32 ctsn; __u16 num_gabs, num_dup_tsns; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; + struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); /* How much room is needed in the chunk? */ - num_gabs = sctp_tsnmap_num_gabs(map); + num_gabs = sctp_tsnmap_num_gabs(map, gabs); num_dup_tsns = sctp_tsnmap_num_dups(map); /* Initialize the SACK header. */ @@ -763,7 +765,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) /* Add the gap ack block information. */ if (num_gabs) sctp_addto_chunk(retval, sizeof(__u32) * num_gabs, - sctp_tsnmap_get_gabs(map)); + gabs); /* Add the duplicate TSN information. */ if (num_dup_tsns) diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 142ed7ca424..35c73e82553 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -335,10 +335,11 @@ void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn) } /* How many gap ack blocks do we have recorded? */ -__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) +__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, + struct sctp_gap_ack_block *gabs) { struct sctp_tsnmap_iter iter; - int gabs = 0; + int ngaps = 0; /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { @@ -348,14 +349,14 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) &start, &end)) { - map->gabs[gabs].start = htons(start); - map->gabs[gabs].end = htons(end); - gabs++; - if (gabs >= SCTP_MAX_GABS) + gabs[ngaps].start = htons(start); + gabs[ngaps].end = htons(end); + ngaps++; + if (ngaps >= SCTP_MAX_GABS) break; } } - return gabs; + return ngaps; } static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) -- cgit v1.2.3-70-g09d2 From 91da11f870f00a3322b81c73042291d7f0be5a17 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:44:02 +0000 Subject: net: Distributed Switch Architecture protocol support Distributed Switch Architecture is a protocol for managing hardware switch chips. It consists of a set of MII management registers and commands to configure the switch, and an ethernet header format to signal which of the ports of the switch a packet was received from or is intended to be sent to. The switches that this driver supports are typically embedded in access points and routers, and a typical setup with a DSA switch looks something like this: +-----------+ +-----------+ | | RGMII | | | +-------+ +------ 1000baseT MDI ("WAN") | | | 6-port +------ 1000baseT MDI ("LAN1") | CPU | | ethernet +------ 1000baseT MDI ("LAN2") | |MIImgmt| switch +------ 1000baseT MDI ("LAN3") | +-------+ w/5 PHYs +------ 1000baseT MDI ("LAN4") | | | | +-----------+ +-----------+ The switch driver presents each port on the switch as a separate network interface to Linux, polls the switch to maintain software link state of those ports, forwards MII management interface accesses to those network interfaces (e.g. as done by ethtool) to the switch, and exposes the switch's hardware statistics counters via the appropriate Linux kernel interfaces. This initial patch supports the MII management interface register layout of the Marvell 88E6123, 88E6161 and 88E6165 switch chips, and supports the "Ethertype DSA" packet tagging format. (There is no officially registered ethertype for the Ethertype DSA packet format, so we just grab a random one. The ethertype to use is programmed into the switch, and the switch driver uses the value of ETH_P_EDSA for this, so this define can be changed at any time in the future if the one we chose is allocated to another protocol or if Ethertype DSA gets its own officially registered ethertype, and everything will continue to work.) Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Byron Bradley Tested-by: Tim Ellis Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 3 + include/net/dsa.h | 34 ++++ net/Kconfig | 1 + net/Makefile | 1 + net/dsa/Kconfig | 31 ++++ net/dsa/Makefile | 9 + net/dsa/dsa.c | 369 ++++++++++++++++++++++++++++++++++++++++ net/dsa/dsa_priv.h | 110 ++++++++++++ net/dsa/mv88e6123_61_65.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++ net/dsa/mv88e6xxx.c | 377 +++++++++++++++++++++++++++++++++++++++++ net/dsa/mv88e6xxx.h | 77 +++++++++ net/dsa/slave.c | 288 ++++++++++++++++++++++++++++++++ net/dsa/tag_edsa.c | 213 +++++++++++++++++++++++ 14 files changed, 1931 insertions(+) create mode 100644 include/net/dsa.h create mode 100644 net/dsa/Kconfig create mode 100644 net/dsa/Makefile create mode 100644 net/dsa/dsa.c create mode 100644 net/dsa/dsa_priv.h create mode 100644 net/dsa/mv88e6123_61_65.c create mode 100644 net/dsa/mv88e6xxx.c create mode 100644 net/dsa/mv88e6xxx.h create mode 100644 net/dsa/slave.c create mode 100644 net/dsa/tag_edsa.c (limited to 'include/net') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 723a1c5fbc6..2140aacb633 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -77,6 +77,7 @@ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_TIPC 0x88CA /* TIPC */ +#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ /* * Non DIX types. Won't clash for 1500 types. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9cfd20be8b7..794eeb4b346 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -607,6 +607,9 @@ struct net_device /* Protocol specific pointers */ +#ifdef CONFIG_NET_DSA + void *dsa_ptr; /* dsa specific data */ +#endif void *atalk_ptr; /* AppleTalk link */ void *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ diff --git a/include/net/dsa.h b/include/net/dsa.h new file mode 100644 index 00000000000..dc4784f5452 --- /dev/null +++ b/include/net/dsa.h @@ -0,0 +1,34 @@ +/* + * include/net/dsa.h - Driver for Distributed Switch Architecture switch chips + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_NET_DSA_H +#define __LINUX_NET_DSA_H + +#define DSA_MAX_PORTS 12 + +struct dsa_platform_data { + /* + * Reference to a Linux network interface that connects + * to the switch chip. + */ + struct device *netdev; + + /* + * How to access the switch configuration registers, and + * the names of the switch ports (use "cpu" to designate + * the switch port that the cpu is connected to). + */ + struct device *mii_bus; + int sw_addr; + char *port_names[DSA_MAX_PORTS]; +}; + + +#endif diff --git a/net/Kconfig b/net/Kconfig index 9103a16a77b..d789d79551a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -180,6 +180,7 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/802/Kconfig" source "net/bridge/Kconfig" +source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" diff --git a/net/Makefile b/net/Makefile index acaf819f24a..27d1f10dc0e 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_NET_SCHED) += sched/ obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_WAN_ROUTER) += wanrouter/ diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig new file mode 100644 index 00000000000..7cf55e5eb39 --- /dev/null +++ b/net/dsa/Kconfig @@ -0,0 +1,31 @@ +menuconfig NET_DSA + bool "Distributed Switch Architecture support" + default n + depends on EXPERIMENTAL + ---help--- + This allows you to use hardware switch chips that use + the Distributed Switch Architecture. + + +if NET_DSA + +# tagging formats +config NET_DSA_TAG_EDSA + bool + default n + + +# switch drivers +config NET_DSA_MV88E6XXX + bool + default n + +config NET_DSA_MV88E6123_61_65 + bool "Marvell 88E6123/6161/6165 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_TAG_EDSA + ---help--- + This enables support for the Marvell 88E6123/6161/6165 + ethernet switch chips. + +endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile new file mode 100644 index 00000000000..b59a6f6bcf5 --- /dev/null +++ b/net/dsa/Makefile @@ -0,0 +1,9 @@ +# tagging formats +obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o + +# switch drivers +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o + +# the core +obj-$(CONFIG_NET_DSA) += dsa.o slave.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c new file mode 100644 index 00000000000..6cc5be2ec7f --- /dev/null +++ b/net/dsa/dsa.c @@ -0,0 +1,369 @@ +/* + * net/dsa/dsa.c - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include "dsa_priv.h" + +char dsa_driver_version[] = "0.1"; + + +/* switch driver registration ***********************************************/ +static DEFINE_MUTEX(dsa_switch_drivers_mutex); +static LIST_HEAD(dsa_switch_drivers); + +void register_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_add_tail(&drv->list, &dsa_switch_drivers); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +void unregister_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_del_init(&drv->list); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +static struct dsa_switch_driver * +dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) +{ + struct dsa_switch_driver *ret; + struct list_head *list; + char *name; + + ret = NULL; + name = NULL; + + mutex_lock(&dsa_switch_drivers_mutex); + list_for_each(list, &dsa_switch_drivers) { + struct dsa_switch_driver *drv; + + drv = list_entry(list, struct dsa_switch_driver, list); + + name = drv->probe(bus, sw_addr); + if (name != NULL) { + ret = drv; + break; + } + } + mutex_unlock(&dsa_switch_drivers_mutex); + + *_name = name; + + return ret; +} + + +/* basic switch operations **************************************************/ +static struct dsa_switch * +dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, + struct mii_bus *bus, struct net_device *dev) +{ + struct dsa_switch *ds; + int ret; + struct dsa_switch_driver *drv; + char *name; + int i; + + /* + * Probe for switch model. + */ + drv = dsa_switch_probe(bus, pd->sw_addr, &name); + if (drv == NULL) { + printk(KERN_ERR "%s: could not detect attached switch\n", + dev->name); + return ERR_PTR(-EINVAL); + } + printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); + + + /* + * Allocate and initialise switch state. + */ + ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + if (ds == NULL) + return ERR_PTR(-ENOMEM); + + ds->pd = pd; + ds->master_netdev = dev; + ds->master_mii_bus = bus; + + ds->drv = drv; + ds->tag_protocol = drv->tag_protocol; + + + /* + * Validate supplied switch configuration. + */ + ds->cpu_port = -1; + for (i = 0; i < DSA_MAX_PORTS; i++) { + char *name; + + name = pd->port_names[i]; + if (name == NULL) + continue; + + if (!strcmp(name, "cpu")) { + if (ds->cpu_port != -1) { + printk(KERN_ERR "multiple cpu ports?!\n"); + ret = -EINVAL; + goto out; + } + ds->cpu_port = i; + } else { + ds->valid_port_mask |= 1 << i; + } + } + + if (ds->cpu_port == -1) { + printk(KERN_ERR "no cpu port?!\n"); + ret = -EINVAL; + goto out; + } + + + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. (Which will + * discard received packets until we set ds->ports[] below.) + */ + wmb(); + dev->dsa_ptr = (void *)ds; + + + /* + * Do basic register setup. + */ + ret = drv->setup(ds); + if (ret < 0) + goto out; + + ret = drv->set_addr(ds, dev->dev_addr); + if (ret < 0) + goto out; + + ds->slave_mii_bus = mdiobus_alloc(); + if (ds->slave_mii_bus == NULL) { + ret = -ENOMEM; + goto out; + } + dsa_slave_mii_bus_init(ds); + + ret = mdiobus_register(ds->slave_mii_bus); + if (ret < 0) + goto out_free; + + + /* + * Create network devices for physical switch ports. + */ + wmb(); + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *slave_dev; + + if (!(ds->valid_port_mask & (1 << i))) + continue; + + slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); + if (slave_dev == NULL) { + printk(KERN_ERR "%s: can't create dsa slave " + "device for port %d(%s)\n", + dev->name, i, pd->port_names[i]); + continue; + } + + ds->ports[i] = slave_dev; + } + + return ds; + +out_free: + mdiobus_free(ds->slave_mii_bus); +out: + dev->dsa_ptr = NULL; + kfree(ds); + return ERR_PTR(ret); +} + +static void dsa_switch_destroy(struct dsa_switch *ds) +{ +} + + +/* link polling *************************************************************/ +static void dsa_link_poll_work(struct work_struct *ugly) +{ + struct dsa_switch *ds; + + ds = container_of(ugly, struct dsa_switch, link_poll_work); + + ds->drv->poll_link(ds); + mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); +} + +static void dsa_link_poll_timer(unsigned long _ds) +{ + struct dsa_switch *ds = (void *)_ds; + + schedule_work(&ds->link_poll_work); +} + + +/* platform driver init and cleanup *****************************************/ +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +static struct mii_bus *dev_to_mii_bus(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "mdio_bus"); + if (d != NULL) { + struct mii_bus *bus; + + bus = to_mii_bus(d); + put_device(d); + + return bus; + } + + return NULL; +} + +static struct net_device *dev_to_net_device(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "net"); + if (d != NULL) { + struct net_device *nd; + + nd = to_net_dev(d); + dev_hold(nd); + put_device(d); + + return nd; + } + + return NULL; +} + +static int dsa_probe(struct platform_device *pdev) +{ + static int dsa_version_printed; + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct mii_bus *bus; + struct dsa_switch *ds; + + if (!dsa_version_printed++) + printk(KERN_NOTICE "Distributed Switch Architecture " + "driver version %s\n", dsa_driver_version); + + if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) + return -EINVAL; + + bus = dev_to_mii_bus(pd->mii_bus); + if (bus == NULL) + return -EINVAL; + + dev = dev_to_net_device(pd->netdev); + if (dev == NULL) + return -EINVAL; + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + return -EEXIST; + } + + ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); + if (IS_ERR(ds)) { + dev_put(dev); + return PTR_ERR(ds); + } + + if (ds->drv->poll_link != NULL) { + INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); + init_timer(&ds->link_poll_timer); + ds->link_poll_timer.data = (unsigned long)ds; + ds->link_poll_timer.function = dsa_link_poll_timer; + ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); + add_timer(&ds->link_poll_timer); + } + + platform_set_drvdata(pdev, ds); + + return 0; +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch *ds = platform_get_drvdata(pdev); + + if (ds->drv->poll_link != NULL) + del_timer_sync(&ds->link_poll_timer); + + flush_scheduled_work(); + + dsa_switch_destroy(ds); + + return 0; +} + +static void dsa_shutdown(struct platform_device *pdev) +{ +} + +static struct platform_driver dsa_driver = { + .probe = dsa_probe, + .remove = dsa_remove, + .shutdown = dsa_shutdown, + .driver = { + .name = "dsa", + .owner = THIS_MODULE, + }, +}; + +static int __init dsa_init_module(void) +{ + return platform_driver_register(&dsa_driver); +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + platform_driver_unregister(&dsa_driver); +} +module_exit(dsa_cleanup_module); + +MODULE_AUTHOR("Lennert Buytenhek ") +MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dsa"); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h new file mode 100644 index 00000000000..21ee9052079 --- /dev/null +++ b/net/dsa/dsa_priv.h @@ -0,0 +1,110 @@ +/* + * net/dsa/dsa_priv.h - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __DSA_PRIV_H +#define __DSA_PRIV_H + +#include +#include +#include +#include +#include + +struct dsa_switch { + /* + * Configuration data for the platform device that owns + * this dsa switch instance. + */ + struct dsa_platform_data *pd; + + /* + * References to network device and mii bus to use. + */ + struct net_device *master_netdev; + struct mii_bus *master_mii_bus; + + /* + * The used switch driver and frame tagging type. + */ + struct dsa_switch_driver *drv; + __be16 tag_protocol; + + /* + * Slave mii_bus and devices for the individual ports. + */ + int cpu_port; + u32 valid_port_mask; + struct mii_bus *slave_mii_bus; + struct net_device *ports[DSA_MAX_PORTS]; + + /* + * Link state polling. + */ + struct work_struct link_poll_work; + struct timer_list link_poll_timer; +}; + +struct dsa_slave_priv { + struct net_device *dev; + struct dsa_switch *parent; + int port; + struct phy_device *phy; +}; + +struct dsa_switch_driver { + struct list_head list; + + __be16 tag_protocol; + int priv_size; + + /* + * Probing and setup. + */ + char *(*probe)(struct mii_bus *bus, int sw_addr); + int (*setup)(struct dsa_switch *ds); + int (*set_addr)(struct dsa_switch *ds, u8 *addr); + + /* + * Access to the switch's PHY registers. + */ + int (*phy_read)(struct dsa_switch *ds, int port, int regnum); + int (*phy_write)(struct dsa_switch *ds, int port, + int regnum, u16 val); + + /* + * Link state polling and IRQ handling. + */ + void (*poll_link)(struct dsa_switch *ds); + + /* + * ethtool hardware statistics. + */ + void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); + void (*get_ethtool_stats)(struct dsa_switch *ds, + int port, uint64_t *data); + int (*get_sset_count)(struct dsa_switch *ds); +}; + +/* dsa.c */ +extern char dsa_driver_version[]; +void register_switch_driver(struct dsa_switch_driver *type); +void unregister_switch_driver(struct dsa_switch_driver *type); + +/* slave.c */ +void dsa_slave_mii_bus_init(struct dsa_switch *ds); +struct net_device *dsa_slave_create(struct dsa_switch *ds, + struct device *parent, + int port, char *name); + +/* tag_edsa.c */ +int edsa_xmit(struct sk_buff *skb, struct net_device *dev); + + +#endif diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c new file mode 100644 index 00000000000..147818cc706 --- /dev/null +++ b/net/dsa/mv88e6123_61_65.c @@ -0,0 +1,417 @@ +/* + * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1210) + return "Marvell 88E6123"; + if (ret == 0x1610) + return "Marvell 88E6161"; + if (ret == 0x1650) + return "Marvell 88E6165"; + } + + return NULL; +} + +static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Disable the PHY polling unit (since there won't be any + * external PHYs to poll), don't discard packets with + * excessive collisions, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0000); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Configure the cpu port, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); + + /* + * Disable remote management for now, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:2x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Disable the loopback filter, disable flow control + * messages, disable flood broadcast override, disable + * removing of provider tags, disable ATU age violation + * interrupts, disable tag flow control, force flow + * control priority to the highest, and send all special + * multicast frames to the CPU at the highest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Disable ingress rate limiting by resetting all ingress + * rate limit registers to their initial state. + */ + for (i = 0; i < 6; i++) + REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); + + /* + * Initialise cross-chip port VLAN table to reset defaults. + */ + REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); + + /* + * Clear the priority override table. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); + + /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + + return 0; +} + +static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Do not limit the period of time that this port can be + * paused for by the remote end or the period of time that + * this port can pause the remote end. + */ + REG_WRITE(addr, 0x02, 0x0000); + + /* + * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, + * configure the EDSA tagging mode if this is the CPU port, + * disable Header mode, enable IGMP/MLD snooping, disable VLAN + * tunneling, determine priority by looking at 802.1p and IP + * priority fields (IP prio has precedence), and set STP state + * to Forwarding. Finally, if this is the CPU port, additionally + * enable forwarding of unknown unicast and multicast addresses. + */ + REG_WRITE(addr, 0x04, + (p == ds->cpu_port) ? 0x373f : 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, set the maximum + * frame size to 10240 bytes, don't let the switch add or + * strip 802.1q tags, don't discard tagged or untagged frames + * on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. + */ + REG_WRITE(addr, 0x08, 0x2080); + + /* + * Egress rate control: disable egress rate control. + */ + REG_WRITE(addr, 0x09, 0x0001); + + /* + * Egress rate control 2: disable egress rate control. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Port ATU control: disable limiting the number of address + * database entries that this port is allowed to use. + */ + REG_WRITE(addr, 0x0c, 0x0000); + + /* + * Priorit Override: disable DA, SA and VTU priority override. + */ + REG_WRITE(addr, 0x0d, 0x0000); + + /* + * Port Ethertype: use the Ethertype DSA Ethertype value. + */ + REG_WRITE(addr, 0x0f, ETH_P_EDSA); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6123_61_65_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mutex_init(&ps->stats_mutex); + + ret = mv88e6123_61_65_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6123_61_65_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6123_61_65_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6123_61_65_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 4) + return port; + return -1; +} + +static int +mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_read(ds, addr, regnum); +} + +static int +mv88e6123_61_65_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_write(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static void +mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6123_61_65_hw_stats); +} + +static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_EDSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6123_61_65_probe, + .setup = mv88e6123_61_65_setup, + .set_addr = mv88e6xxx_set_addr_indirect, + .phy_read = mv88e6123_61_65_phy_read, + .phy_write = mv88e6123_61_65_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6123_61_65_get_strings, + .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats, + .get_sset_count = mv88e6123_61_65_get_sset_count, +}; + +int __init mv88e6123_61_65_init(void) +{ + register_switch_driver(&mv88e6123_61_65_switch_driver); + return 0; +} +module_init(mv88e6123_61_65_init); + +void __exit mv88e6123_61_65_cleanup(void) +{ + unregister_switch_driver(&mv88e6123_61_65_switch_driver); +} +module_exit(mv88e6123_61_65_cleanup); diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c new file mode 100644 index 00000000000..13d2328a240 --- /dev/null +++ b/net/dsa/mv88e6xxx.c @@ -0,0 +1,377 @@ +/* + * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +/* + * If the switch's ADDR[4:0] strap pins are strapped to zero, it will + * use all 32 SMI bus addresses on its SMI bus, and all switch registers + * will be directly accessible on some {device address,register address} + * pair. If the ADDR[4:0] pins are not strapped to zero, the switch + * will only respond to SMI transactions to that specific address, and + * an indirect addressing mechanism needs to be used to access its + * registers. + */ +static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) +{ + int ret; + int i; + + for (i = 0; i < 16; i++) { + ret = mdiobus_read(bus, sw_addr, 0); + if (ret < 0) + return ret; + + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_read(bus, addr, reg); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the read command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the read command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Read the data. + */ + ret = mdiobus_read(bus, sw_addr, 1); + if (ret < 0) + return ret; + + return ret & 0xffff; +} + +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_read(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_write(bus, addr, reg, val); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the data to write. + */ + ret = mdiobus_write(bus, sw_addr, 1, val); + if (ret < 0) + return ret; + + /* + * Transmit the write command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the write command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + return 0; +} + +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_write(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg, val); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_config_prio(struct dsa_switch *ds) +{ + /* + * Configure the IP ToS mapping registers. + */ + REG_WRITE(REG_GLOBAL, 0x10, 0x0000); + REG_WRITE(REG_GLOBAL, 0x11, 0x0000); + REG_WRITE(REG_GLOBAL, 0x12, 0x5555); + REG_WRITE(REG_GLOBAL, 0x13, 0x5555); + REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x16, 0xffff); + REG_WRITE(REG_GLOBAL, 0x17, 0xffff); + + /* + * Configure the IEEE 802.1p priority mapping register. + */ + REG_WRITE(REG_GLOBAL, 0x18, 0xfa41); + + return 0; +} + +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) +{ + int i; + int ret; + + for (i = 0; i < 6; i++) { + int j; + + /* + * Write the MAC address byte. + */ + REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]); + + /* + * Wait for the write to complete. + */ + for (j = 0; j < 16; j++) { + ret = REG_READ(REG_GLOBAL2, 0x0d); + if ((ret & 0x8000) == 0) + break; + } + if (j == 16) + return -ETIMEDOUT; + } + + return 0; +} + +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) +{ + if (addr >= 0) + return mv88e6xxx_reg_read(ds, addr, regnum); + return 0xffff; +} + +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) +{ + if (addr >= 0) + return mv88e6xxx_reg_write(ds, addr, regnum, val); + return 0; +} + +void mv88e6xxx_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x0800); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + switch (port_status & 0x0300) { + case 0x0000: + speed = 10; + break; + case 0x0100: + speed = 100; + break; + case 0x0200: + speed = 1000; + break; + default: + speed = -1; + break; + } + duplex = (port_status & 0x0400) ? 1 : 0; + fc = (port_status & 0x8000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static int mv88e6xxx_stats_wait(struct dsa_switch *ds) +{ + int ret; + int i; + + for (i = 0; i < 10; i++) { + ret = REG_READ(REG_GLOBAL2, 0x1d); + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +{ + int ret; + + /* + * Snapshot the hardware statistics counters for this port. + */ + REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port); + + /* + * Wait for the snapshotting to complete. + */ + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return ret; + + return 0; +} + +static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +{ + u32 _val; + int ret; + + *val = 0; + + ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat); + if (ret < 0) + return; + + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e); + if (ret < 0) + return; + + _val = ret << 16; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f); + if (ret < 0) + return; + + *val = _val | ret; +} + +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data) +{ + int i; + + for (i = 0; i < nr_stats; i++) { + memcpy(data + i * ETH_GSTRING_LEN, + stats[i].string, ETH_GSTRING_LEN); + } +} + +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + int i; + + mutex_lock(&ps->stats_mutex); + + ret = mv88e6xxx_stats_snapshot(ds, port); + if (ret < 0) { + mutex_unlock(&ps->stats_mutex); + return; + } + + /* + * Read each of the counters. + */ + for (i = 0; i < nr_stats; i++) { + struct mv88e6xxx_hw_stat *s = stats + i; + u32 low; + u32 high; + + mv88e6xxx_stats_read(ds, s->reg, &low); + if (s->sizeof_stat == 8) + mv88e6xxx_stats_read(ds, s->reg + 1, &high); + else + high = 0; + + data[i] = (((u64)high) << 32) | low; + } + + mutex_unlock(&ps->stats_mutex); +} diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h new file mode 100644 index 00000000000..a004d4d0208 --- /dev/null +++ b/net/dsa/mv88e6xxx.h @@ -0,0 +1,77 @@ +/* + * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __MV88E6XXX_H +#define __MV88E6XXX_H + +#define REG_PORT(p) (0x10 + (p)) +#define REG_GLOBAL 0x1b +#define REG_GLOBAL2 0x1c + +struct mv88e6xxx_priv_state { + /* + * When using multi-chip addressing, this mutex protects + * access to the indirect access registers. (In single-chip + * mode, this mutex is effectively useless.) + */ + struct mutex smi_mutex; + + /* + * This mutex serialises access to the statistics unit. + * Hold this mutex over snapshot + dump sequences. + */ + struct mutex stats_mutex; +}; + +struct mv88e6xxx_hw_stat { + char string[ETH_GSTRING_LEN]; + int sizeof_stat; + int reg; +}; + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val); +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); +int mv88e6xxx_config_prio(struct dsa_switch *ds); +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val); +void mv88e6xxx_poll_link(struct dsa_switch *ds); +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data); +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data); + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + + + +#endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c new file mode 100644 index 00000000000..3cb331e98b8 --- /dev/null +++ b/net/dsa/slave.c @@ -0,0 +1,288 @@ +/* + * net/dsa/slave.c - Slave device handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "dsa_priv.h" + +/* slave mii_bus handling ***************************************************/ +static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_read(ds, addr, reg); + + return 0xffff; +} + +static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_write(ds, addr, reg, val); + + return 0; +} + +void dsa_slave_mii_bus_init(struct dsa_switch *ds) +{ + ds->slave_mii_bus->priv = (void *)ds; + ds->slave_mii_bus->name = "dsa slave smi"; + ds->slave_mii_bus->read = dsa_slave_phy_read; + ds->slave_mii_bus->write = dsa_slave_phy_write; + snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", + ds->master_mii_bus->id, ds->pd->sw_addr); + ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); +} + + +/* slave device handling ****************************************************/ +static int dsa_slave_open(struct net_device *dev) +{ + return 0; +} + +static int dsa_slave_close(struct net_device *dev) +{ + return 0; +} + +static void dsa_slave_change_rx_flags(struct net_device *dev, int change) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); +} + +static void dsa_slave_set_rx_mode(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_sync(master, dev); + dev_unicast_sync(master, dev); +} + +static int dsa_slave_set_mac_address(struct net_device *dev, void *addr) +{ + memcpy(dev->dev_addr, addr + 2, 6); + + return 0; +} + +static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct mii_ioctl_data *mii_data = if_mii(ifr); + + if (p->phy != NULL) + return phy_mii_ioctl(p->phy, mii_data, cmd); + + return -EOPNOTSUPP; +} + + +/* ethtool operations *******************************************************/ +static int +dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + int err; + + err = -EOPNOTSUPP; + if (p->phy != NULL) { + err = phy_read_status(p->phy); + if (err == 0) + err = phy_ethtool_gset(p->phy, cmd); + } + + return err; +} + +static int +dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return phy_ethtool_sset(p->phy, cmd); + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strncpy(drvinfo->driver, "dsa", 32); + strncpy(drvinfo->version, dsa_driver_version, 32); + strncpy(drvinfo->fw_version, "N/A", 32); + strncpy(drvinfo->bus_info, "platform", 32); +} + +static int dsa_slave_nway_reset(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return genphy_restart_aneg(p->phy); + + return -EOPNOTSUPP; +} + +static u32 dsa_slave_get_link(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) { + genphy_update_link(p->phy); + return p->phy->link; + } + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (stringset == ETH_SS_STATS) { + int len = ETH_GSTRING_LEN; + + strncpy(data, "tx_packets", len); + strncpy(data + len, "tx_bytes", len); + strncpy(data + 2 * len, "rx_packets", len); + strncpy(data + 3 * len, "rx_bytes", len); + if (ds->drv->get_strings != NULL) + ds->drv->get_strings(ds, p->port, data + 4 * len); + } +} + +static void dsa_slave_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + data[0] = p->dev->stats.tx_packets; + data[1] = p->dev->stats.tx_bytes; + data[2] = p->dev->stats.rx_packets; + data[3] = p->dev->stats.rx_bytes; + if (ds->drv->get_ethtool_stats != NULL) + ds->drv->get_ethtool_stats(ds, p->port, data + 4); +} + +static int dsa_slave_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (sset == ETH_SS_STATS) { + int count; + + count = 4; + if (ds->drv->get_sset_count != NULL) + count += ds->drv->get_sset_count(ds); + + return count; + } + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops dsa_slave_ethtool_ops = { + .get_settings = dsa_slave_get_settings, + .set_settings = dsa_slave_set_settings, + .get_drvinfo = dsa_slave_get_drvinfo, + .nway_reset = dsa_slave_nway_reset, + .get_link = dsa_slave_get_link, + .set_sg = ethtool_op_set_sg, + .get_strings = dsa_slave_get_strings, + .get_ethtool_stats = dsa_slave_get_ethtool_stats, + .get_sset_count = dsa_slave_get_sset_count, +}; + + +/* slave device setup *******************************************************/ +struct net_device * +dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name) +{ + struct net_device *master = ds->master_netdev; + struct net_device *slave_dev; + struct dsa_slave_priv *p; + int ret; + + slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), + name, ether_setup); + if (slave_dev == NULL) + return slave_dev; + + slave_dev->features = master->vlan_features; + SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); + slave_dev->tx_queue_len = 0; + switch (ds->tag_protocol) { +#ifdef CONFIG_NET_DSA_TAG_EDSA + case htons(ETH_P_EDSA): + slave_dev->hard_start_xmit = edsa_xmit; + break; +#endif + default: + BUG(); + } + slave_dev->open = dsa_slave_open; + slave_dev->stop = dsa_slave_close; + slave_dev->change_rx_flags = dsa_slave_change_rx_flags; + slave_dev->set_rx_mode = dsa_slave_set_rx_mode; + slave_dev->set_multicast_list = dsa_slave_set_rx_mode; + slave_dev->set_mac_address = dsa_slave_set_mac_address; + slave_dev->do_ioctl = dsa_slave_ioctl; + SET_NETDEV_DEV(slave_dev, parent); + slave_dev->vlan_features = master->vlan_features; + + p = netdev_priv(slave_dev); + p->dev = slave_dev; + p->parent = ds; + p->port = port; + p->phy = ds->slave_mii_bus->phy_map[port]; + + ret = register_netdev(slave_dev); + if (ret) { + printk(KERN_ERR "%s: error %d registering interface %s\n", + master->name, ret, slave_dev->name); + free_netdev(slave_dev); + return NULL; + } + + netif_carrier_off(slave_dev); + + if (p->phy != NULL) { + phy_attach(slave_dev, p->phy->dev.bus_id, + 0, PHY_INTERFACE_MODE_GMII); + + p->phy->autoneg = AUTONEG_ENABLE; + p->phy->speed = 0; + p->phy->duplex = 0; + p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; + phy_start_aneg(p->phy); + } + + return slave_dev; +} diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c new file mode 100644 index 00000000000..f985ea99384 --- /dev/null +++ b/net/dsa/tag_edsa.c @@ -0,0 +1,213 @@ +/* + * net/dsa/tag_edsa.c - Ethertype DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "dsa_priv.h" + +#define DSA_HLEN 4 +#define EDSA_HLEN 8 + +int edsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *edsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag and prepend + * a DSA ethertype field is the packet is tagged, or insert + * a DSA ethertype plus DSA tag between the addresses and the + * current ethertype field if the packet is untagged. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x60; + edsa_header[5] = p->port << 3; + + /* + * Move CFI field from byte 6 to byte 5. + */ + if (edsa_header[6] & 0x10) { + edsa_header[5] |= 0x01; + edsa_header[6] &= ~0x10; + } + } else { + if (skb_cow_head(skb, EDSA_HLEN) < 0) + goto out_free; + skb_push(skb, EDSA_HLEN); + + memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x40; + edsa_header[5] = p->port << 3; + edsa_header[6] = 0x00; + edsa_header[7] = 0x00; + } + + skb->protocol = htons(ETH_P_EDSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *edsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) + goto out_drop; + + /* + * Skip the two null bytes after the ethertype. + */ + edsa_header = skb->data + 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (edsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * If the 'tagged' bit is set, convert the DSA tag to a 802.1q + * tag and delete the ethertype part. If the 'tagged' bit is + * clear, delete the ethertype and the DSA tag parts. + */ + if (edsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = edsa_header[2] & ~0x10; + new_header[3] = edsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (edsa_header[1] & 0x01) + new_header[2] |= 0x10; + + skb_pull_rcsum(skb, DSA_HLEN); + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(edsa_header, new_header, DSA_HLEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, EDSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - EDSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type edsa_packet_type = { + .type = __constant_htons(ETH_P_EDSA), + .func = edsa_rcv, +}; + +static int __init edsa_init_module(void) +{ + dev_add_pack(&edsa_packet_type); + return 0; +} +module_init(edsa_init_module); + +static void __exit edsa_cleanup_module(void) +{ + dev_remove_pack(&edsa_packet_type); +} +module_exit(edsa_cleanup_module); -- cgit v1.2.3-70-g09d2 From cf85d08fdf4548ee46657ccfb7f9949a85145db5 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:45:02 +0000 Subject: dsa: add support for original DSA tagging format Most of the DSA switches currently in the field do not support the Ethertype DSA tagging format that one of the previous patches added support for, but only the original DSA tagging format. The original DSA tagging format carries the same information as the Ethertype DSA tagging format, but with the difference that it does not have an ethertype field. In other words, when receiving a packet that is tagged with an original DSA tag, there is no way of telling in eth_type_trans() that this packet is in fact a DSA-tagged packet. This patch adds a hook into eth_type_trans() which is only compiled in if support for a switch chip that doesn't support Ethertype DSA is selected, and which checks whether there is a DSA switch driver instance attached to this network device which uses the old tag format. If so, it sets the protocol field to ETH_P_DSA without looking at the packet, so that the packet ends up in the right place. Signed-off-by: Lennert Buytenhek Tested-by: Nicolas Pitre Tested-by: Peter van Valderen Tested-by: Dirk Teurlings Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 11 +++ include/net/dsa.h | 2 + net/dsa/Kconfig | 4 + net/dsa/Makefile | 1 + net/dsa/dsa.c | 16 ++++ net/dsa/dsa_priv.h | 3 + net/dsa/mv88e6123_61_65.c | 18 +++-- net/dsa/slave.c | 5 ++ net/dsa/tag_dsa.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++ net/ethernet/eth.c | 10 +++ 11 files changed, 258 insertions(+), 7 deletions(-) create mode 100644 net/dsa/tag_dsa.c (limited to 'include/net') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 2140aacb633..32b9dcda68c 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -101,6 +101,7 @@ #define ETH_P_ECONET 0x0018 /* Acorn Econet */ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ +#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794eeb4b346..97f0c64c152 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -42,6 +42,7 @@ #include #include +#include struct vlan_group; struct ethtool_ops; @@ -801,6 +802,16 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } +static inline bool netdev_uses_dsa_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_DSA + if (dev->dsa_ptr != NULL) + return dsa_uses_dsa_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device diff --git a/include/net/dsa.h b/include/net/dsa.h index dc4784f5452..72e509b6a12 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -30,5 +30,7 @@ struct dsa_platform_data { char *port_names[DSA_MAX_PORTS]; }; +extern bool dsa_uses_dsa_tags(void *dsa_ptr); + #endif diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 7cf55e5eb39..6b68016827d 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -10,6 +10,10 @@ menuconfig NET_DSA if NET_DSA # tagging formats +config NET_DSA_TAG_DSA + bool + default n + config NET_DSA_TAG_EDSA bool default n diff --git a/net/dsa/Makefile b/net/dsa/Makefile index b59a6f6bcf5..8b92123315b 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,4 +1,5 @@ # tagging formats +obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o # switch drivers diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 6cc5be2ec7f..f8c549281c3 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -202,6 +202,22 @@ static void dsa_switch_destroy(struct dsa_switch *ds) } +/* hooks for ethertype-less tagging formats *********************************/ +/* + * The original DSA tag format and some other tag formats have no + * ethertype, which means that we need to add a little hack to the + * networking receive path to make sure that received frames get + * the right ->protocol assigned to them when one of those tag + * formats is in use. + */ +bool dsa_uses_dsa_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_DSA)); +} + + /* link polling *************************************************************/ static void dsa_link_poll_work(struct work_struct *ugly) { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 21ee9052079..2f1d68c495e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -103,6 +103,9 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name); +/* tag_dsa.c */ +int dsa_xmit(struct sk_buff *skb, struct net_device *dev); + /* tag_edsa.c */ int edsa_xmit(struct sk_buff *skb, struct net_device *dev); diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c index 147818cc706..555b164082f 100644 --- a/net/dsa/mv88e6123_61_65.c +++ b/net/dsa/mv88e6123_61_65.c @@ -192,15 +192,19 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) /* * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * configure the EDSA tagging mode if this is the CPU port, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. Finally, if this is the CPU port, additionally - * enable forwarding of unknown unicast and multicast addresses. + * configure the requested (DSA/EDSA) tagging mode if this is + * the CPU port, disable Header mode, enable IGMP/MLD snooping, + * disable VLAN tunneling, determine priority by looking at + * 802.1p and IP priority fields (IP prio has precedence), and + * set STP state to Forwarding. Finally, if this is the CPU + * port, additionally enable forwarding of unknown unicast and + * multicast addresses. */ REG_WRITE(addr, 0x04, - (p == ds->cpu_port) ? 0x373f : 0x0433); + (p == ds->cpu_port) ? + (ds->tag_protocol == htons(ETH_P_DSA)) ? + 0x053f : 0x373f : + 0x0433); /* * Port Control 1: disable trunking. Also, if this is the diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 3cb331e98b8..8f8868dd430 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -239,6 +239,11 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); slave_dev->tx_queue_len = 0; switch (ds->tag_protocol) { +#ifdef CONFIG_NET_DSA_TAG_DSA + case htons(ETH_P_DSA): + slave_dev->hard_start_xmit = dsa_xmit; + break; +#endif #ifdef CONFIG_NET_DSA_TAG_EDSA case htons(ETH_P_EDSA): slave_dev->hard_start_xmit = edsa_xmit; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c new file mode 100644 index 00000000000..bdc0510b53b --- /dev/null +++ b/net/dsa/tag_dsa.c @@ -0,0 +1,194 @@ +/* + * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "dsa_priv.h" + +#define DSA_HLEN 4 + +int dsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *dsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag for tagged + * packets, or insert a DSA tag between the addresses and + * the ethertype field for untagged packets. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x60; + dsa_header[1] = p->port << 3; + + /* + * Move CFI field from byte 2 to byte 1. + */ + if (dsa_header[2] & 0x10) { + dsa_header[1] |= 0x01; + dsa_header[2] &= ~0x10; + } + } else { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x40; + dsa_header[1] = p->port << 3; + dsa_header[2] = 0x00; + dsa_header[3] = 0x00; + } + + skb->protocol = htons(ETH_P_DSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *dsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) + goto out_drop; + + /* + * The ethertype field is part of the DSA header. + */ + dsa_header = skb->data - 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (dsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * Convert the DSA header to an 802.1q header if the 'tagged' + * bit in the DSA header is set. If the 'tagged' bit is clear, + * delete the DSA header entirely. + */ + if (dsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = dsa_header[2] & ~0x10; + new_header[3] = dsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (dsa_header[1] & 0x01) + new_header[2] |= 0x10; + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(dsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(dsa_header, new_header, DSA_HLEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, DSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type dsa_packet_type = { + .type = __constant_htons(ETH_P_DSA), + .func = dsa_rcv, +}; + +static int __init dsa_init_module(void) +{ + dev_add_pack(&dsa_packet_type); + return 0; +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + dev_remove_pack(&dsa_packet_type); +} +module_exit(dsa_cleanup_module); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 647a9edee37..dae47e7a44d 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,15 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->pkt_type = PACKET_OTHERHOST; } + /* + * Some variants of DSA tagging don't have an ethertype field + * at all, so we check here whether one of those tagging + * variants has been configured on the receiving interface, + * and if so, set skb->protocol without looking at the packet. + */ + if (netdev_uses_dsa_tags(dev)) + return htons(ETH_P_DSA); + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; -- cgit v1.2.3-70-g09d2 From 396138f03f4521c55ecc3a5dd75d4c56e6323244 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 7 Oct 2008 13:46:07 +0000 Subject: dsa: add support for Trailer tagging format This adds support for the Trailer switch tagging format. This is another tagging that doesn't explicitly mark tagged packets with a distinct ethertype, so that we need to add a similar hack in the receive path as for the Original DSA tagging format. Signed-off-by: Lennert Buytenhek Tested-by: Byron Bradley Tested-by: Tim Ellis Signed-off-by: David S. Miller --- include/linux/if_ether.h | 1 + include/linux/netdevice.h | 10 ++++ include/net/dsa.h | 1 + net/dsa/Kconfig | 4 ++ net/dsa/Makefile | 1 + net/dsa/dsa.c | 7 +++ net/dsa/dsa_priv.h | 3 ++ net/dsa/slave.c | 5 ++ net/dsa/tag_trailer.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++ net/ethernet/eth.c | 2 + 10 files changed, 164 insertions(+) create mode 100644 net/dsa/tag_trailer.c (limited to 'include/net') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 32b9dcda68c..a0099e98b5c 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -102,6 +102,7 @@ #define ETH_P_HDLC 0x0019 /* HDLC frames */ #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ +#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97f0c64c152..d3ea3de70a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -812,6 +812,16 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev) return 0; } +static inline bool netdev_uses_trailer_tags(struct net_device *dev) +{ +#ifdef CONFIG_NET_DSA_TAG_TRAILER + if (dev->dsa_ptr != NULL) + return dsa_uses_trailer_tags(dev->dsa_ptr); +#endif + + return 0; +} + /** * netdev_priv - access network device private data * @dev: network device diff --git a/include/net/dsa.h b/include/net/dsa.h index 72e509b6a12..52e97bfca5a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -31,6 +31,7 @@ struct dsa_platform_data { }; extern bool dsa_uses_dsa_tags(void *dsa_ptr); +extern bool dsa_uses_trailer_tags(void *dsa_ptr); #endif diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 79bcd76d3f1..505aa14e67f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -18,6 +18,10 @@ config NET_DSA_TAG_EDSA bool default n +config NET_DSA_TAG_TRAILER + bool + default n + # switch drivers config NET_DSA_MV88E6XXX diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 7fb6f85a69e..63d3c44908b 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,7 @@ # tagging formats obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o # switch drivers obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index f8c549281c3..33e99462023 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -217,6 +217,13 @@ bool dsa_uses_dsa_tags(void *dsa_ptr) return !!(ds->tag_protocol == htons(ETH_P_DSA)); } +bool dsa_uses_trailer_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); +} + /* link polling *************************************************************/ static void dsa_link_poll_work(struct work_struct *ugly) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2f1d68c495e..7063378a1eb 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -109,5 +109,8 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev); /* tag_edsa.c */ int edsa_xmit(struct sk_buff *skb, struct net_device *dev); +/* tag_trailer.c */ +int trailer_xmit(struct sk_buff *skb, struct net_device *dev); + #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8f8868dd430..37616884b8a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -248,6 +248,11 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, case htons(ETH_P_EDSA): slave_dev->hard_start_xmit = edsa_xmit; break; +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + case htons(ETH_P_TRAILER): + slave_dev->hard_start_xmit = trailer_xmit; + break; #endif default: BUG(); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c new file mode 100644 index 00000000000..d3117764b2c --- /dev/null +++ b/net/dsa/tag_trailer.c @@ -0,0 +1,130 @@ +/* + * net/dsa/tag_trailer.c - Trailer tag format handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include "dsa_priv.h" + +int trailer_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + int padlen; + u8 *trailer; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * We have to make sure that the trailer ends up as the very + * last 4 bytes of the packet. This means that we have to pad + * the packet to the minimum ethernet frame size, if necessary, + * before adding the trailer. + */ + padlen = 0; + if (skb->len < 60) + padlen = 60 - skb->len; + + nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); + if (nskb == NULL) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb_reserve(nskb, NET_IP_ALIGN); + + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb_network_header(skb) - skb->head); + skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); + skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + kfree_skb(skb); + + if (padlen) { + u8 *pad = skb_put(nskb, padlen); + memset(pad, 0, padlen); + } + + trailer = skb_put(nskb, 4); + trailer[0] = 0x80; + trailer[1] = 1 << p->port; + trailer[2] = 0x10; + trailer[3] = 0x00; + + nskb->protocol = htons(ETH_P_TRAILER); + + nskb->dev = p->parent->master_netdev; + dev_queue_xmit(nskb); + + return NETDEV_TX_OK; +} + +static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *trailer; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (skb_linearize(skb)) + goto out_drop; + + trailer = skb_tail_pointer(skb) - 4; + if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || + (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00) + goto out_drop; + + source_port = trailer[1] & 7; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + pskb_trim_rcsum(skb, skb->len - 4); + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type trailer_packet_type = { + .type = __constant_htons(ETH_P_TRAILER), + .func = trailer_rcv, +}; + +static int __init trailer_init_module(void) +{ + dev_add_pack(&trailer_packet_type); + return 0; +} +module_init(trailer_init_module); + +static void __exit trailer_cleanup_module(void) +{ + dev_remove_pack(&trailer_packet_type); +} +module_exit(trailer_cleanup_module); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index dae47e7a44d..b9d85af2dd3 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -193,6 +193,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) */ if (netdev_uses_dsa_tags(dev)) return htons(ETH_P_DSA); + if (netdev_uses_trailer_tags(dev)) + return htons(ETH_P_TRAILER); if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; -- cgit v1.2.3-70-g09d2 From 64194c31a0b6f5d84703b772113aafc400eeaad6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Oct 2008 12:03:17 -0700 Subject: inet: Make tunnel RX/TX byte counters more consistent This patch makes the RX/TX byte counters for IPIP, GRE and SIT more consistent. Previously we included the external IP headers on the way out but not when the packet is inbound. The new scheme is to count payload only in both directions. For IPIP and SIT this simply means the exclusion of the external IP header. For GRE this means that we exclude the GRE header as well. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ipip.h | 2 +- net/ipv4/ip_gre.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipip.h b/include/net/ipip.h index a85bda64b85..fdf9bd74370 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -37,7 +37,7 @@ struct ip_tunnel_prl_entry #define IPTUNNEL_XMIT() do { \ int err; \ - int pkt_len = skb->len; \ + int pkt_len = skb->len - skb_transport_offset(skb); \ \ skb->ip_summed = CHECKSUM_NONE; \ ip_select_ident(iph, &rt->u.dst, NULL); \ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 44ed9487fa1..0d5e35b0ed5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -477,6 +477,7 @@ static int ipgre_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; int offset = 4; __be16 gre_proto; + unsigned int len; if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -567,6 +568,8 @@ static int ipgre_rcv(struct sk_buff *skb) tunnel->i_seqno = seqno + 1; } + len = skb->len; + /* Warning: All skb pointers will be invalidated! */ if (tunnel->dev->type == ARPHRD_ETHER) { if (!pskb_may_pull(skb, ETH_HLEN)) { @@ -581,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb) } stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -770,7 +773,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) old_iph = ip_hdr(skb); } - skb->transport_header = skb->network_header; + skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); -- cgit v1.2.3-70-g09d2 From bb21c95e2d3325fcb53c591686dbbf4068a165bc Mon Sep 17 00:00:00 2001 From: Guo-Fu Tseng Date: Thu, 9 Oct 2008 21:10:36 -0700 Subject: nf_conntrack_ecache.h: Fix missing braces This patch add missing braces of today's net-next-2.6: include/net/netfilter/nf_conntrack_ecache.h Signed-off-by: Guo-Fu Tseng Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_ecache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 35f814c1e2c..11480e633a9 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -74,6 +74,7 @@ static inline void nf_ct_event_cache_flush(struct net *net) {} static inline int nf_conntrack_ecache_init(struct net *net) { return 0; +} static inline void nf_conntrack_ecache_fini(struct net *net) { -- cgit v1.2.3-70-g09d2 From 948a72438d4178d0728c4b0a38836d280b846939 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:30 -0400 Subject: netlabel: Remove unneeded in-kernel API functions After some discussions with the Smack folks, well just Casey, I now have a better idea of what Smack wants out of NetLabel in the future so I think it is now safe to do some API "pruning". If another LSM comes along that needs this functionality we can always add it back in, but I don't see any LSMs on the horizon which might make use of these functions. Thanks to Rami Rosen who suggested removing netlbl_cfg_cipsov4_del() back in February 2008. Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/netlabel.h | 13 ------- net/netlabel/netlabel_kapi.c | 84 ++++++++++++-------------------------------- 2 files changed, 23 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index e4d2d6baa98..5303749b709 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -352,12 +352,9 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr) int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info); int netlbl_cfg_unlbl_add_map(const char *domain, struct netlbl_audit *audit_info); -int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, - struct netlbl_audit *audit_info); int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, const char *domain, struct netlbl_audit *audit_info); -int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info); /* * LSM security attribute operations @@ -404,22 +401,12 @@ static inline int netlbl_cfg_unlbl_add_map(const char *domain, { return -ENOSYS; } -static inline int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, - struct netlbl_audit *audit_info) -{ - return -ENOSYS; -} static inline int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, const char *domain, struct netlbl_audit *audit_info) { return -ENOSYS; } -static inline int netlbl_cfg_cipsov4_del(u32 doi, - struct netlbl_audit *audit_info) -{ - return -ENOSYS; -} static inline int netlbl_secattr_catmap_walk( struct netlbl_lsm_secattr_catmap *catmap, u32 offset) diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 39793a1a93a..6c211fe9778 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - goto cfg_unlbl_add_map_failure; + return -ENOMEM; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) @@ -103,49 +103,6 @@ cfg_unlbl_add_map_failure: return ret_val; } -/** - * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition - * @doi_def: the DOI definition - * @audit_info: NetLabel audit information - * - * Description: - * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on - * success, negative values on failure. - * - */ -int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, - struct netlbl_audit *audit_info) -{ - int ret_val; - const char *type_str; - struct audit_buffer *audit_buf; - - ret_val = cipso_v4_doi_add(doi_def); - - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, - audit_info); - if (audit_buf != NULL) { - switch (doi_def->type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; - break; - case CIPSO_V4_MAP_PASS: - type_str = "pass"; - break; - default: - type_str = "(unknown)"; - } - audit_log_format(audit_buf, - " cipso_doi=%u cipso_type=%s res=%u", - doi_def->doi, - type_str, - ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); - } - - return ret_val; -} - /** * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping * @doi_def: the DOI definition @@ -165,10 +122,12 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, { int ret_val = -ENOMEM; struct netlbl_dom_map *entry; + const char *type_str; + struct audit_buffer *audit_buf; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - goto cfg_cipsov4_add_map_failure; + return -ENOMEM; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) @@ -182,7 +141,7 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, * domain mapping for it. */ rcu_read_lock(); - ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info); + ret_val = cipso_v4_doi_add(doi_def); if (ret_val != 0) goto cfg_cipsov4_add_map_failure_unlock; ret_val = netlbl_domhsh_add(entry, audit_info); @@ -196,6 +155,24 @@ cfg_cipsov4_add_map_failure_remove_doi: cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); cfg_cipsov4_add_map_failure_unlock: rcu_read_unlock(); + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + audit_info); + if (audit_buf != NULL) { + switch (doi_def->type) { + case CIPSO_V4_MAP_STD: + type_str = "std"; + break; + case CIPSO_V4_MAP_PASS: + type_str = "pass"; + break; + default: + type_str = "(unknown)"; + } + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi_def->doi, type_str, ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } cfg_cipsov4_add_map_failure: if (entry != NULL) kfree(entry->domain); @@ -203,21 +180,6 @@ cfg_cipsov4_add_map_failure: return ret_val; } -/** - * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition - * @doi: the CIPSO DOI value - * @audit_info: NetLabel audit information - * - * Description: - * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem. - * Returns zero on success, negative values on failure. - * - */ -int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) -{ - return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free); -} - /* * Security Attribute Functions */ -- cgit v1.2.3-70-g09d2 From dfaebe9825ff34983778f287101bc5f3bce00640 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:31 -0400 Subject: selinux: Fix missing calls to netlbl_skbuff_err() At some point I think I messed up and dropped the calls to netlbl_skbuff_err() which are necessary for CIPSO to send error notifications to remote systems. This patch re-introduces the error handling calls into the SELinux code. Signed-off-by: Paul Moore Acked-by: James Morris --- include/net/netlabel.h | 6 ++++-- net/netlabel/netlabel_kapi.c | 5 +++-- security/selinux/hooks.c | 19 +++++++++++++++---- security/selinux/include/netlabel.h | 9 +++++++++ security/selinux/netlabel.c | 20 +++++++++++++++++++- 5 files changed, 50 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 5303749b709..e16db096126 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -382,7 +382,7 @@ int netlbl_sock_getattr(struct sock *sk, int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); -void netlbl_skbuff_err(struct sk_buff *skb, int error); +void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway); /* * LSM label mapping cache operations @@ -454,7 +454,9 @@ static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, { return -ENOSYS; } -static inline void netlbl_skbuff_err(struct sk_buff *skb, int error) +static inline void netlbl_skbuff_err(struct sk_buff *skb, + int error, + int gateway) { return; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 6c211fe9778..22faba620e4 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -490,6 +490,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * netlbl_skbuff_err - Handle a LSM error on a sk_buff * @skb: the packet * @error: the error code + * @gateway: true if host is acting as a gateway, false otherwise * * Description: * Deal with a LSM problem when handling the packet in @skb, typically this is @@ -497,10 +498,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * according to the packet's labeling protocol. * */ -void netlbl_skbuff_err(struct sk_buff *skb, int error) +void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { if (CIPSO_V4_OPTEXIST(skb)) - cipso_v4_error(skb, error, 0); + cipso_v4_error(skb, error, gateway); } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b520667a24b..a91146a6b37 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4101,6 +4101,8 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, return err; err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); + if (err) + selinux_netlbl_err(skb, err, 0); } else { err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad); if (err) @@ -4156,10 +4158,14 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) return err; err = selinux_inet_sys_rcv_skb(skb->iif, addrp, family, peer_sid, &ad); - if (err) + if (err) { + selinux_netlbl_err(skb, err, 0); return err; + } err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); + if (err) + selinux_netlbl_err(skb, err, 0); } if (secmark_active) { @@ -4396,6 +4402,7 @@ out: static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, u16 family) { + int err; char *addrp; u32 peer_sid; struct avc_audit_data ad; @@ -4419,10 +4426,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) return NF_DROP; - if (peerlbl_active) - if (selinux_inet_sys_rcv_skb(ifindex, addrp, family, - peer_sid, &ad) != 0) + if (peerlbl_active) { + err = selinux_inet_sys_rcv_skb(ifindex, addrp, family, + peer_sid, &ad); + if (err) { + selinux_netlbl_err(skb, err, 1); return NF_DROP; + } + } if (secmark_active) if (avc_has_perm(peer_sid, skb->secmark, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index 487a7d81fe2..d4e3ac8a7fb 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -39,6 +39,8 @@ #ifdef CONFIG_NETLABEL void selinux_netlbl_cache_invalidate(void); +void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway); + void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec, int family); @@ -63,6 +65,13 @@ static inline void selinux_netlbl_cache_invalidate(void) return; } +static inline void selinux_netlbl_err(struct sk_buff *skb, + int error, + int gateway) +{ + return; +} + static inline void selinux_netlbl_sk_security_reset( struct sk_security_struct *ssec, int family) diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index b9ce5fcf343..4053f7fc95f 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -107,6 +107,24 @@ void selinux_netlbl_cache_invalidate(void) netlbl_cache_invalidate(); } +/** + * selinux_netlbl_err - Handle a NetLabel packet error + * @skb: the packet + * @error: the error code + * @gateway: true if host is acting as a gateway, false otherwise + * + * Description: + * When a packet is dropped due to a call to avc_has_perm() pass the error + * code to the NetLabel subsystem so any protocol specific processing can be + * done. This is safe to call even if you are unsure if NetLabel labeling is + * present on the packet, NetLabel is smart enough to only act when it should. + * + */ +void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway) +{ + netlbl_skbuff_err(skb, error, gateway); +} + /** * selinux_netlbl_sk_security_reset - Reset the NetLabel fields * @ssec: the sk_security_struct @@ -289,7 +307,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, return 0; if (nlbl_sid != SECINITSID_UNLABELED) - netlbl_skbuff_err(skb, rc); + netlbl_skbuff_err(skb, rc, 0); return rc; } -- cgit v1.2.3-70-g09d2 From b1edeb102397546438ab4624489c6ccd7b410d97 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:31 -0400 Subject: netlabel: Replace protocol/NetLabel linking with refrerence counts NetLabel has always had a list of backpointers in the CIPSO DOI definition structure which pointed to the NetLabel LSM domain mapping structures which referenced the CIPSO DOI struct. The rationale for this was that when an administrator removed a CIPSO DOI from the system all of the associated NetLabel LSM domain mappings should be removed as well; a list of backpointers made this a simple operation. Unfortunately, while the backpointers did make the removal easier they were a bit of a mess from an implementation point of view which was making further development difficult. Since the removal of a CIPSO DOI is a realtively rare event it seems to make sense to remove this backpointer list as the optimization was hurting us more then it was helping. However, we still need to be able to track when a CIPSO DOI definition is being used so replace the backpointer list with a reference count. In order to preserve the current functionality of removing the associated LSM domain mappings when a CIPSO DOI is removed we walk the LSM domain mapping table, removing the relevant entries. Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/cipso_ipv4.h | 21 ++-- net/ipv4/cipso_ipv4.c | 235 ++++++++++++++++--------------------- net/netlabel/netlabel_cipso_v4.c | 77 ++++++------ net/netlabel/netlabel_domainhash.c | 95 ++++++++------- net/netlabel/netlabel_domainhash.h | 2 + net/netlabel/netlabel_kapi.c | 43 ++++--- net/netlabel/netlabel_mgmt.c | 24 +--- security/smack/smackfs.c | 4 +- 8 files changed, 235 insertions(+), 266 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a6bb94530cf..5fe6556fb3c 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -40,6 +40,7 @@ #include #include #include +#include /* known doi values */ #define CIPSO_V4_DOI_UNKNOWN 0x00000000 @@ -79,10 +80,9 @@ struct cipso_v4_doi { } map; u8 tags[CIPSO_V4_TAG_MAXCNT]; - u32 valid; + atomic_t refcount; struct list_head list; struct rcu_head rcu; - struct list_head dom_list; }; /* Standard CIPSO mapping table */ @@ -128,25 +128,26 @@ extern int cipso_v4_rbm_strictvalid; #ifdef CONFIG_NETLABEL int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); -int cipso_v4_doi_remove(u32 doi, - struct netlbl_audit *audit_info, - void (*callback) (struct rcu_head * head)); +void cipso_v4_doi_free(struct cipso_v4_doi *doi_def); +int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); +void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def); int cipso_v4_doi_walk(u32 *skip_cnt, int (*callback) (struct cipso_v4_doi *doi_def, void *arg), void *cb_arg); -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain); #else static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) { return -ENOSYS; } +static inline void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) +{ + return; +} + static inline int cipso_v4_doi_remove(u32 doi, - struct netlbl_audit *audit_info, - void (*callback) (struct rcu_head * head)) + struct netlbl_audit *audit_info) { return 0; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2c0e4572cc9..bf87eddfec3 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -47,17 +47,7 @@ #include #include -struct cipso_v4_domhsh_entry { - char *domain; - u32 valid; - struct list_head list; - struct rcu_head rcu; -}; - /* List of available DOI definitions */ -/* XXX - Updates should be minimal so having a single lock for the - * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be - * okay. */ /* XXX - This currently assumes a minimal number of different DOIs in use, * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we @@ -193,25 +183,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap, bitmap[byte_spot] &= ~bitmask; } -/** - * cipso_v4_doi_domhsh_free - Frees a domain list entry - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to a domain list entry can be released - * safely. - * - */ -static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) -{ - struct cipso_v4_domhsh_entry *ptr; - - ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); - kfree(ptr->domain); - kfree(ptr); -} - /** * cipso_v4_cache_entry_free - Frees a cache entry * @entry: the entry to free @@ -457,7 +428,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) struct cipso_v4_doi *iter; list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->doi == doi && iter->valid) + if (iter->doi == doi && atomic_read(&iter->refcount)) return iter; return NULL; } @@ -501,9 +472,8 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } } - doi_def->valid = 1; + atomic_set(&doi_def->refcount, 1); INIT_RCU_HEAD(&doi_def->rcu); - INIT_LIST_HEAD(&doi_def->dom_list); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) @@ -518,60 +488,130 @@ doi_add_failure: return -EEXIST; } +/** + * cipso_v4_doi_free - Frees a DOI definition + * @entry: the entry's RCU field + * + * Description: + * This function frees all of the memory associated with a DOI definition. + * + */ +void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + switch (doi_def->type) { + case CIPSO_V4_MAP_STD: + kfree(doi_def->map.std->lvl.cipso); + kfree(doi_def->map.std->lvl.local); + kfree(doi_def->map.std->cat.cipso); + kfree(doi_def->map.std->cat.local); + break; + } + kfree(doi_def); +} + +/** + * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void cipso_v4_doi_free_rcu(struct rcu_head *entry) +{ + struct cipso_v4_doi *doi_def; + + doi_def = container_of(entry, struct cipso_v4_doi, rcu); + cipso_v4_doi_free(doi_def); +} + /** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value * @audit_secid: the LSM secid to use in the audit message - * @callback: the DOI cleanup/free callback * * Description: - * Removes a DOI definition from the CIPSO engine, @callback is called to - * free any memory. The NetLabel routines will be called to release their own - * LSM domain mappings as well as our own domain list. Returns zero on - * success and negative values on failure. + * Removes a DOI definition from the CIPSO engine. The NetLabel routines will + * be called to release their own LSM domain mappings as well as our own + * domain list. Returns zero on success and negative values on failure. * */ -int cipso_v4_doi_remove(u32 doi, - struct netlbl_audit *audit_info, - void (*callback) (struct rcu_head * head)) +int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) { struct cipso_v4_doi *doi_def; - struct cipso_v4_domhsh_entry *dom_iter; spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); - if (doi_def != NULL) { - doi_def->valid = 0; - list_del_rcu(&doi_def->list); + if (doi_def == NULL) { spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_lock(); - list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) - if (dom_iter->valid) - netlbl_cfg_map_del(dom_iter->domain, - audit_info); - rcu_read_unlock(); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, callback); - return 0; + return -ENOENT; + } + if (!atomic_dec_and_test(&doi_def->refcount)) { + spin_unlock(&cipso_v4_doi_list_lock); + return -EBUSY; } + list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - return -ENOENT; + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + + return 0; } /** - * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that - * rcu_read_lock() is held while accessing the returned definition. + * rcu_read_lock() is held while accessing the returned definition and the DOI + * definition reference count is decremented when the caller is done. * */ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) { - return cipso_v4_doi_search(doi); + struct cipso_v4_doi *doi_def; + + rcu_read_lock(); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) + goto doi_getdef_return; + if (!atomic_inc_not_zero(&doi_def->refcount)) + doi_def = NULL; + +doi_getdef_return: + rcu_read_unlock(); + return doi_def; +} + +/** + * cipso_v4_doi_putdef - Releases a reference for the given DOI definition + * @doi_def: the DOI definition + * + * Description: + * Releases a DOI definition reference obtained from cipso_v4_doi_getdef(). + * + */ +void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + if (!atomic_dec_and_test(&doi_def->refcount)) + return; + spin_lock(&cipso_v4_doi_list_lock); + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); } /** @@ -597,7 +637,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt, rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) - if (iter_doi->valid) { + if (atomic_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); @@ -613,85 +653,6 @@ doi_walk_return: return ret_val; } -/** - * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to add - * - * Description: - * Adds the @domain to the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). This function - * does allocate memory. Returns zero on success, negative values on failure. - * - */ -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - struct cipso_v4_domhsh_entry *new_dom; - - new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); - if (new_dom == NULL) - return -ENOMEM; - if (domain) { - new_dom->domain = kstrdup(domain, GFP_KERNEL); - if (new_dom->domain == NULL) { - kfree(new_dom); - return -ENOMEM; - } - } - new_dom->valid = 1; - INIT_RCU_HEAD(&new_dom->rcu); - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - spin_unlock(&cipso_v4_doi_list_lock); - kfree(new_dom->domain); - kfree(new_dom); - return -EEXIST; - } - list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); - spin_unlock(&cipso_v4_doi_list_lock); - - return 0; -} - -/** - * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to remove - * - * Description: - * Removes the @domain from the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). Returns zero - * on success and negative values on error. - * - */ -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - iter->valid = 0; - list_del_rcu(&iter->list); - spin_unlock(&cipso_v4_doi_list_lock); - call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); - return 0; - } - spin_unlock(&cipso_v4_doi_list_lock); - - return -ENOENT; -} - /* * Label Mapping Functions */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index aaf50032b3a..5c4f60bbc82 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -43,6 +43,7 @@ #include "netlabel_user.h" #include "netlabel_cipso_v4.h" #include "netlabel_mgmt.h" +#include "netlabel_domainhash.h" /* Argument struct for cipso_v4_doi_walk() */ struct netlbl_cipsov4_doiwalk_arg { @@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg { u32 seq; }; +/* Argument struct for netlbl_domhsh_walk() */ +struct netlbl_domhsh_walk_arg { + struct netlbl_audit *audit_info; + u32 doi; +}; + /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { .id = GENL_ID_GENERATE, @@ -80,32 +87,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1 * Helper Functions */ -/** - * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to the DOI definition can be released - * safely. - * - */ -void netlbl_cipsov4_doi_free(struct rcu_head *entry) -{ - struct cipso_v4_doi *ptr; - - ptr = container_of(entry, struct cipso_v4_doi, rcu); - switch (ptr->type) { - case CIPSO_V4_MAP_STD: - kfree(ptr->map.std->lvl.cipso); - kfree(ptr->map.std->lvl.local); - kfree(ptr->map.std->cat.cipso); - kfree(ptr->map.std->cat.local); - break; - } - kfree(ptr); -} - /** * netlbl_cipsov4_add_common - Parse the common sections of a ADD message * @info: the Generic NETLINK info block @@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) add_std_failure: if (doi_def) - netlbl_cipsov4_doi_free(&doi_def->rcu); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -379,7 +360,7 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info) return 0; add_pass_failure: - netlbl_cipsov4_doi_free(&doi_def->rcu); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -667,6 +648,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, return skb->len; } +/** + * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE + * @entry: LSM domain mapping entry + * @arg: the netlbl_domhsh_walk_arg structure + * + * Description: + * This function is intended for use by netlbl_cipsov4_remove() as the callback + * for the netlbl_domhsh_walk() function; it removes LSM domain map entries + * which are associated with the CIPSO DOI specified in @arg. Returns zero on + * success, negative values on failure. + * + */ +static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) +{ + struct netlbl_domhsh_walk_arg *cb_arg = arg; + + if (entry->type == NETLBL_NLTYPE_CIPSOV4 && + entry->type_def.cipsov4->doi == cb_arg->doi) + return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); + + return 0; +} + /** * netlbl_cipsov4_remove - Handle a REMOVE message * @skb: the NETLINK buffer @@ -681,8 +685,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; u32 doi = 0; + struct netlbl_domhsh_walk_arg cb_arg; struct audit_buffer *audit_buf; struct netlbl_audit audit_info; + u32 skip_bkt = 0; + u32 skip_chain = 0; if (!info->attrs[NLBL_CIPSOV4_A_DOI]) return -EINVAL; @@ -690,11 +697,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); netlbl_netlink_auditinfo(skb, &audit_info); - ret_val = cipso_v4_doi_remove(doi, - &audit_info, - netlbl_cipsov4_doi_free); - if (ret_val == 0) - atomic_dec(&netlabel_mgmt_protocount); + cb_arg.doi = doi; + cb_arg.audit_info = &audit_info; + ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, + netlbl_cipsov4_remove_cb, &cb_arg); + if (ret_val == 0 || ret_val == -ENOENT) { + ret_val = cipso_v4_doi_remove(doi, &audit_info); + if (ret_val == 0) + atomic_dec(&netlabel_mgmt_protocount); + } audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, &audit_info); diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index dc42206c431..0243f0c57b4 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -217,20 +217,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 bkt; struct audit_buffer *audit_buf; - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = 0; - break; - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, - entry->domain); - break; - default: - return -EINVAL; - } - if (ret_val != 0) - return ret_val; - entry->valid = 1; INIT_RCU_HEAD(&entry->rcu); @@ -271,16 +257,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, } rcu_read_unlock(); - if (ret_val != 0) { - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain) != 0) - BUG(); - break; - } - } - return ret_val; } @@ -302,35 +278,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, } /** - * netlbl_domhsh_remove - Removes an entry from the domain hash table - * @domain: the domain to remove + * netlbl_domhsh_remove_entry - Removes a given entry from the domain table + * @entry: the entry to remove * @audit_info: NetLabel audit information * * Description: * Removes an entry from the domain hash table and handles any updates to the - * lower level protocol handler (i.e. CIPSO). Returns zero on success, - * negative on failure. + * lower level protocol handler (i.e. CIPSO). Caller is responsible for + * ensuring that the RCU read lock is held. Returns zero on success, negative + * on failure. * */ -int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; - struct netlbl_dom_map *entry; + int ret_val = 0; struct audit_buffer *audit_buf; - rcu_read_lock(); - if (domain) - entry = netlbl_domhsh_search(domain); - else - entry = netlbl_domhsh_search_def(domain); if (entry == NULL) - goto remove_return; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain); - break; - } + return -ENOENT; + spin_lock(&netlbl_domhsh_lock); if (entry->valid) { entry->valid = 0; @@ -338,8 +305,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) list_del_rcu(&entry->list); else rcu_assign_pointer(netlbl_domhsh_def, NULL); - ret_val = 0; - } + } else + ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); @@ -351,10 +318,42 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) audit_log_end(audit_buf); } -remove_return: - rcu_read_unlock(); - if (ret_val == 0) + if (ret_val == 0) { + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + cipso_v4_doi_putdef(entry->type_def.cipsov4); + break; + } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + } + + return ret_val; +} + +/** + * netlbl_domhsh_remove - Removes an entry from the domain hash table + * @domain: the domain to remove + * @audit_info: NetLabel audit information + * + * Description: + * Removes an entry from the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +{ + int ret_val; + struct netlbl_dom_map *entry; + + rcu_read_lock(); + if (domain) + entry = netlbl_domhsh_search(domain); + else + entry = netlbl_domhsh_search_def(domain); + ret_val = netlbl_domhsh_remove_entry(entry, audit_info); + rcu_read_unlock(); + return ret_val; } diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 8220990ceb9..afcc41a7432 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -61,6 +61,8 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 22faba620e4..7d8ecea9391 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -121,10 +121,15 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, struct netlbl_audit *audit_info) { int ret_val = -ENOMEM; + u32 doi; + u32 doi_type; struct netlbl_dom_map *entry; const char *type_str; struct audit_buffer *audit_buf; + doi = doi_def->doi; + doi_type = doi_def->type; + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; @@ -133,32 +138,25 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, if (entry->domain == NULL) goto cfg_cipsov4_add_map_failure; } - entry->type = NETLBL_NLTYPE_CIPSOV4; - entry->type_def.cipsov4 = doi_def; - - /* Grab a RCU read lock here so nothing happens to the doi_def variable - * between adding it to the CIPSOv4 protocol engine and adding a - * domain mapping for it. */ - rcu_read_lock(); ret_val = cipso_v4_doi_add(doi_def); if (ret_val != 0) - goto cfg_cipsov4_add_map_failure_unlock; + goto cfg_cipsov4_add_map_failure_remove_doi; + entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi); + if (entry->type_def.cipsov4 == NULL) { + ret_val = -ENOENT; + goto cfg_cipsov4_add_map_failure_remove_doi; + } ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) - goto cfg_cipsov4_add_map_failure_remove_doi; - rcu_read_unlock(); - - return 0; + goto cfg_cipsov4_add_map_failure_release_doi; -cfg_cipsov4_add_map_failure_remove_doi: - cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); -cfg_cipsov4_add_map_failure_unlock: - rcu_read_unlock(); +cfg_cipsov4_add_map_return: audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, audit_info); if (audit_buf != NULL) { - switch (doi_def->type) { + switch (doi_type) { case CIPSO_V4_MAP_STD: type_str = "std"; break; @@ -170,14 +168,21 @@ cfg_cipsov4_add_map_failure_unlock: } audit_log_format(audit_buf, " cipso_doi=%u cipso_type=%s res=%u", - doi_def->doi, type_str, ret_val == 0 ? 1 : 0); + doi, type_str, ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } + + return ret_val; + +cfg_cipsov4_add_map_failure_release_doi: + cipso_v4_doi_putdef(doi_def); +cfg_cipsov4_add_map_failure_remove_doi: + cipso_v4_doi_remove(doi, audit_info); cfg_cipsov4_add_map_failure: if (entry != NULL) kfree(entry->domain); kfree(entry); - return ret_val; + goto cfg_cipsov4_add_map_return; } /* diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 44be5d5261f..c4e18c7bc0c 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -122,18 +122,12 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) goto add_failure; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - /* We should be holding a rcu_read_lock() here while we hold - * the result but since the entry will always be deleted when - * the CIPSO DOI is deleted we aren't going to keep the - * lock. */ - rcu_read_lock(); entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); + if (entry->type_def.cipsov4 == NULL) goto add_failure; - } ret_val = netlbl_domhsh_add(entry, &audit_info); - rcu_read_unlock(); + if (ret_val != 0) + cipso_v4_doi_putdef(entry->type_def.cipsov4); break; default: goto add_failure; @@ -294,18 +288,12 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) goto adddef_failure; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - /* We should be holding a rcu_read_lock() here while we hold - * the result but since the entry will always be deleted when - * the CIPSO DOI is deleted we aren't going to keep the - * lock. */ - rcu_read_lock(); entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); + if (entry->type_def.cipsov4 == NULL) goto adddef_failure; - } ret_val = netlbl_domhsh_add_default(entry, &audit_info); - rcu_read_unlock(); + if (ret_val != 0) + cipso_v4_doi_putdef(entry->type_def.cipsov4); break; default: goto adddef_failure; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 271a835fbbe..9733f8eb1a2 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -343,9 +343,11 @@ static void smk_cipso_doi(void) doip->tags[rc] = CIPSO_V4_TAG_INVALID; rc = netlbl_cfg_cipsov4_add_map(doip, NULL, &audit_info); - if (rc != 0) + if (rc != 0) { printk(KERN_WARNING "%s:%d add rc = %d\n", __func__, __LINE__, rc); + kfree(doip); + } } /** -- cgit v1.2.3-70-g09d2 From 63c41688743760631188cf0f4ae986a6793ccb0a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:32 -0400 Subject: netlabel: Add network address selectors to the NetLabel/LSM domain mapping This patch extends the NetLabel traffic labeling capabilities to individual packets based not only on the LSM domain but the by the destination address as well. The changes here only affect the core NetLabel infrastructre, changes to the NetLabel KAPI and individial protocol engines are also required but are split out into a different patch to ease review. Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/netlabel.h | 7 +- net/netlabel/netlabel_addrlist.c | 130 ++++++++++++ net/netlabel/netlabel_addrlist.h | 15 ++ net/netlabel/netlabel_domainhash.c | 290 +++++++++++++++++++++++---- net/netlabel/netlabel_domainhash.h | 38 +++- net/netlabel/netlabel_kapi.c | 7 +- net/netlabel/netlabel_mgmt.c | 398 ++++++++++++++++++++++++++++--------- net/netlabel/netlabel_mgmt.h | 59 +++++- net/netlabel/netlabel_unlabeled.c | 96 ++------- 9 files changed, 816 insertions(+), 224 deletions(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index e16db096126..0729f8ce504 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -9,7 +9,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -72,8 +72,9 @@ struct cipso_v4_doi; /* NetLabel NETLINK protocol version * 1: initial version * 2: added static labels for unlabeled connections + * 3: network selectors added to the NetLabel/LSM domain mapping */ -#define NETLBL_PROTO_VERSION 2 +#define NETLBL_PROTO_VERSION 3 /* NetLabel NETLINK types/families */ #define NETLBL_NLTYPE_NONE 0 @@ -87,6 +88,8 @@ struct cipso_v4_doi; #define NETLBL_NLTYPE_CIPSOV6_NAME "NLBL_CIPSOv6" #define NETLBL_NLTYPE_UNLABELED 5 #define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" +#define NETLBL_NLTYPE_ADDRSELECT 6 +#define NETLBL_NLTYPE_ADDRSELECT_NAME "NLBL_ADRSEL" /* * NetLabel - Kernel API for accessing the network packet label mappings. diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index dd928aa52db..b0925a30335 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "netlabel_addrlist.h" @@ -69,6 +70,32 @@ struct netlbl_af4list *netlbl_af4list_search(__be32 addr, return NULL; } +/** + * netlbl_af4list_search_exact - Search for an exact IPv4 address entry + * @addr: IPv4 address + * @mask: IPv4 address mask + * @head: the list head + * + * Description: + * Searches the IPv4 address list given by @head. If an exact match if found + * it is returned, otherwise NULL is returned. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, + __be32 mask, + struct list_head *head) +{ + struct netlbl_af4list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && iter->addr == addr && iter->mask == mask) + return iter; + + return NULL; +} + + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /** * netlbl_af6list_search - Search for a matching IPv6 address entry @@ -93,6 +120,33 @@ struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, return NULL; } + +/** + * netlbl_af6list_search_exact - Search for an exact IPv6 address entry + * @addr: IPv6 address + * @mask: IPv6 address mask + * @head: the list head + * + * Description: + * Searches the IPv6 address list given by @head. If an exact match if found + * it is returned, otherwise NULL is returned. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head) +{ + struct netlbl_af6list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_addr_equal(&iter->addr, addr) && + ipv6_addr_equal(&iter->mask, mask)) + return iter; + + return NULL; +} #endif /* IPv6 */ /** @@ -256,3 +310,79 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, return NULL; } #endif /* IPv6 */ + +/* + * Audit Helper Functions + */ + +/** + * netlbl_af4list_audit_addr - Audit an IPv4 address + * @audit_buf: audit buffer + * @src: true if source address, false if destination + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv4 address and address mask, if necessary, to @audit_buf. + * + */ +void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask) +{ + u32 mask_val = ntohl(mask); + char *dir = (src ? "src" : "dst"); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr)); + if (mask_val != 0xffffffff) { + u32 mask_len = 0; + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); + } +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_audit_addr - Audit an IPv6 address + * @audit_buf: audit buffer + * @src: true if source address, false if destination + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv6 address and address mask, if necessary, to @audit_buf. + * + */ +void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + char *dir = (src ? "src" : "dst"); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr)); + if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { + u32 mask_len = 0; + u32 mask_val; + int iter = -1; + while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) + mask_len += 32; + mask_val = ntohl(mask->s6_addr32[iter]); + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); + } +} +#endif /* IPv6 */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 0c41df057fa..0242bead405 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -36,6 +36,7 @@ #include #include #include +#include /** * struct netlbl_af4list - NetLabel IPv4 address list @@ -116,6 +117,12 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, void netlbl_af4list_remove_entry(struct netlbl_af4list *entry); struct netlbl_af4list *netlbl_af4list_search(__be32 addr, struct list_head *head); +struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, + __be32 mask, + struct list_head *head); +void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -169,6 +176,14 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, void netlbl_af6list_remove_entry(struct netlbl_af6list *entry); struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, struct list_head *head); +struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head); +void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask); #endif /* IPV6 */ #endif diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 0243f0c57b4..5fadf10e5dd 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -11,7 +11,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +40,7 @@ #include #include "netlabel_mgmt.h" +#include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_user.h" @@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL; static void netlbl_domhsh_free_entry(struct rcu_head *entry) { struct netlbl_dom_map *ptr; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); + if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + netlbl_af4list_foreach_safe(iter4, tmp4, + &ptr->type_def.addrsel->list4) { + netlbl_af4list_remove_entry(iter4); + kfree(netlbl_domhsh_addr4_entry(iter4)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, + &ptr->type_def.addrsel->list6) { + netlbl_af6list_remove_entry(iter6); + kfree(netlbl_domhsh_addr6_entry(iter6)); + } +#endif /* IPv6 */ + } kfree(ptr->domain); kfree(ptr); } @@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) return entry; } +/** + * netlbl_domhsh_audit_add - Generate an audit entry for an add event + * @entry: the entry being added + * @addr4: the IPv4 address information + * @addr6: the IPv6 address information + * @result: the result code + * @audit_info: NetLabel audit information + * + * Description: + * Generate an audit record for adding a new NetLabel/LSM mapping entry with + * the given information. Caller is responsibile for holding the necessary + * locks. + * + */ +static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, + struct netlbl_af4list *addr4, + struct netlbl_af6list *addr6, + int result, + struct netlbl_audit *audit_info) +{ + struct audit_buffer *audit_buf; + struct cipso_v4_doi *cipsov4 = NULL; + u32 type; + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + if (audit_buf != NULL) { + audit_log_format(audit_buf, " nlbl_domain=%s", + entry->domain ? entry->domain : "(default)"); + if (addr4 != NULL) { + struct netlbl_domaddr4_map *map4; + map4 = netlbl_domhsh_addr4_entry(addr4); + type = map4->type; + cipsov4 = map4->type_def.cipsov4; + netlbl_af4list_audit_addr(audit_buf, 0, NULL, + addr4->addr, addr4->mask); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (addr6 != NULL) { + struct netlbl_domaddr6_map *map6; + map6 = netlbl_domhsh_addr6_entry(addr6); + type = map6->type; + netlbl_af6list_audit_addr(audit_buf, 0, NULL, + &addr6->addr, &addr6->mask); +#endif /* IPv6 */ + } else { + type = entry->type; + cipsov4 = entry->type_def.cipsov4; + } + switch (type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + BUG_ON(cipsov4 == NULL); + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + cipsov4->doi); + break; + } + audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0); + audit_log_end(audit_buf); + } +} + /* * Domain Hash Table Functions */ @@ -213,50 +297,106 @@ int __init netlbl_domhsh_init(u32 size) int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info) { - int ret_val; - u32 bkt; - struct audit_buffer *audit_buf; - - entry->valid = 1; - INIT_RCU_HEAD(&entry->rcu); + int ret_val = 0; + struct netlbl_dom_map *entry_old; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ rcu_read_lock(); + spin_lock(&netlbl_domhsh_lock); - if (entry->domain != NULL) { - bkt = netlbl_domhsh_hash(entry->domain); - if (netlbl_domhsh_search(entry->domain) == NULL) + if (entry->domain != NULL) + entry_old = netlbl_domhsh_search(entry->domain); + else + entry_old = netlbl_domhsh_search_def(entry->domain); + if (entry_old == NULL) { + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + if (entry->domain != NULL) { + u32 bkt = netlbl_domhsh_hash(entry->domain); list_add_tail_rcu(&entry->list, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); - else - ret_val = -EEXIST; - } else { - INIT_LIST_HEAD(&entry->list); - if (rcu_dereference(netlbl_domhsh_def) == NULL) + } else { + INIT_LIST_HEAD(&entry->list); rcu_assign_pointer(netlbl_domhsh_def, entry); - else - ret_val = -EEXIST; - } - spin_unlock(&netlbl_domhsh_lock); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); - if (audit_buf != NULL) { - audit_log_format(audit_buf, - " nlbl_domain=%s", - entry->domain ? entry->domain : "(default)"); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " nlbl_protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: - audit_log_format(audit_buf, - " nlbl_protocol=cipsov4 cipso_doi=%u", - entry->type_def.cipsov4->doi); - break; } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); - } - rcu_read_unlock(); + if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) + netlbl_domhsh_audit_add(entry, iter4, NULL, + ret_val, audit_info); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) + netlbl_domhsh_audit_add(entry, NULL, iter6, + ret_val, audit_info); +#endif /* IPv6 */ + } else + netlbl_domhsh_audit_add(entry, NULL, NULL, + ret_val, audit_info); + } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && + entry->type == NETLBL_NLTYPE_ADDRSELECT) { + struct list_head *old_list4; + struct list_head *old_list6; + + old_list4 = &entry_old->type_def.addrsel->list4; + old_list6 = &entry_old->type_def.addrsel->list6; + + /* we only allow the addition of address selectors if all of + * the selectors do not exist in the existing domain map */ + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) + if (netlbl_af4list_search_exact(iter4->addr, + iter4->mask, + old_list4)) { + ret_val = -EEXIST; + goto add_return; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) + if (netlbl_af6list_search_exact(&iter6->addr, + &iter6->mask, + old_list6)) { + ret_val = -EEXIST; + goto add_return; + } +#endif /* IPv6 */ + + netlbl_af4list_foreach_safe(iter4, tmp4, + &entry->type_def.addrsel->list4) { + netlbl_af4list_remove_entry(iter4); + iter4->valid = 1; + ret_val = netlbl_af4list_add(iter4, old_list4); + netlbl_domhsh_audit_add(entry_old, iter4, NULL, + ret_val, audit_info); + if (ret_val != 0) + goto add_return; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, + &entry->type_def.addrsel->list6) { + netlbl_af6list_remove_entry(iter6); + iter6->valid = 1; + ret_val = netlbl_af6list_add(iter6, old_list6); + netlbl_domhsh_audit_add(entry_old, NULL, iter6, + ret_val, audit_info); + if (ret_val != 0) + goto add_return; + } +#endif /* IPv6 */ + } else + ret_val = -EINVAL; + +add_return: + spin_unlock(&netlbl_domhsh_lock); + rcu_read_unlock(); return ret_val; } @@ -319,7 +459,19 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, } if (ret_val == 0) { + struct netlbl_af4list *iter4; + struct netlbl_domaddr4_map *map4; + switch (entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) { + map4 = netlbl_domhsh_addr4_entry(iter4); + cipso_v4_doi_putdef(map4->type_def.cipsov4); + } + /* no need to check the IPv6 list since we currently + * support only unlabeled protocols for IPv6 */ + break; case NETLBL_NLTYPE_CIPSOV4: cipso_v4_doi_putdef(entry->type_def.cipsov4); break; @@ -387,6 +539,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) return netlbl_domhsh_search_def(domain); } +/** + * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table + * @domain: the domain name to search for + * @addr: the IP address to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain + * and @addr, return a pointer to a copy of the entry or NULL. The caller is + * responsible for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) +{ + struct netlbl_dom_map *dom_iter; + struct netlbl_af4list *addr_iter; + + dom_iter = netlbl_domhsh_search_def(domain); + if (dom_iter == NULL) + return NULL; + if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) + return NULL; + + addr_iter = netlbl_af4list_search(addr, + &dom_iter->type_def.addrsel->list4); + if (addr_iter == NULL) + return NULL; + + return netlbl_domhsh_addr4_entry(addr_iter); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table + * @domain: the domain name to search for + * @addr: the IP address to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain + * and @addr, return a pointer to a copy of the entry or NULL. The caller is + * responsible for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr) +{ + struct netlbl_dom_map *dom_iter; + struct netlbl_af6list *addr_iter; + + dom_iter = netlbl_domhsh_search_def(domain); + if (dom_iter == NULL) + return NULL; + if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) + return NULL; + + addr_iter = netlbl_af6list_search(addr, + &dom_iter->type_def.addrsel->list6); + if (addr_iter == NULL) + return NULL; + + return netlbl_domhsh_addr6_entry(addr_iter); +} +#endif /* IPv6 */ + /** * netlbl_domhsh_walk - Iterate through the domain mapping hash table * @skip_bkt: the number of buckets to skip at the start diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index afcc41a7432..bfcb6763a1a 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -11,7 +11,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,16 +36,43 @@ #include #include +#include "netlabel_addrlist.h" + /* Domain hash table size */ /* XXX - currently this number is an uneducated guess */ #define NETLBL_DOMHSH_BITSIZE 7 -/* Domain mapping definition struct */ +/* Domain mapping definition structures */ +#define netlbl_domhsh_addr4_entry(iter) \ + container_of(iter, struct netlbl_domaddr4_map, list) +struct netlbl_domaddr4_map { + u32 type; + union { + struct cipso_v4_doi *cipsov4; + } type_def; + + struct netlbl_af4list list; +}; +#define netlbl_domhsh_addr6_entry(iter) \ + container_of(iter, struct netlbl_domaddr6_map, list) +struct netlbl_domaddr6_map { + u32 type; + + /* NOTE: no 'type_def' union needed at present since we don't currently + * support any IPv6 labeling protocols */ + + struct netlbl_af6list list; +}; +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; struct netlbl_dom_map { char *domain; u32 type; union { struct cipso_v4_doi *cipsov4; + struct netlbl_domaddr_map *addrsel; } type_def; u32 valid; @@ -66,9 +93,16 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); +struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7d8ecea9391..8b820dc9806 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -419,7 +419,9 @@ int netlbl_enabled(void) * Attach the correct label to the given socket using the security attributes * specified in @secattr. This function requires exclusive access to @sk, * which means it either needs to be in the process of being created or locked. - * Returns zero on success, negative values on failure. + * Returns zero on success, -EDESTADDRREQ if the domain is configured to use + * network address selectors (can't blindly label the socket), and negative + * values on all other failures. * */ int netlbl_sock_setattr(struct sock *sk, @@ -433,6 +435,9 @@ int netlbl_sock_setattr(struct sock *sk, if (dom_entry == NULL) goto socket_setattr_return; switch (dom_entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + ret_val = -EDESTADDRREQ; + break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, dom_entry->type_def.cipsov4, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index c4e18c7bc0c..ee769ecaa13 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,9 +32,13 @@ #include #include #include +#include +#include #include #include #include +#include +#include #include #include #include @@ -71,79 +75,336 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { }; /* - * NetLabel Command Handlers + * Helper Functions */ /** * netlbl_mgmt_add - Handle an ADD message - * @skb: the NETLINK buffer * @info: the Generic NETLINK info block + * @audit_info: NetLabel audit information * * Description: - * Process a user generated ADD message and add the domains from the message - * to the hash table. See netlabel.h for a description of the message format. - * Returns zero on success, negative values on failure. + * Helper function for the ADD and ADDDEF messages to add the domain mappings + * from the message to the hash table. See netlabel.h for a description of the + * message format. Returns zero on success, negative values on failure. * */ -static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +static int netlbl_mgmt_add_common(struct genl_info *info, + struct netlbl_audit *audit_info) { int ret_val = -EINVAL; struct netlbl_dom_map *entry = NULL; - size_t tmp_size; + struct netlbl_domaddr_map *addrmap = NULL; + struct cipso_v4_doi *cipsov4 = NULL; u32 tmp_val; - struct netlbl_audit audit_info; - - if (!info->attrs[NLBL_MGMT_A_DOMAIN] || - !info->attrs[NLBL_MGMT_A_PROTOCOL]) - goto add_failure; - - netlbl_netlink_auditinfo(skb, &audit_info); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; goto add_failure; } - tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); - entry->domain = kmalloc(tmp_size, GFP_KERNEL); - if (entry->domain == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + if (info->attrs[NLBL_MGMT_A_DOMAIN]) { + size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); + entry->domain = kmalloc(tmp_size, GFP_KERNEL); + if (entry->domain == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + nla_strlcpy(entry->domain, + info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + } + + /* NOTE: internally we allow/use a entry->type value of + * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users + * to pass that as a protocol value because we need to know the + * "real" protocol */ switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) goto add_failure; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) + cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (cipsov4 == NULL) goto add_failure; - ret_val = netlbl_domhsh_add(entry, &audit_info); - if (ret_val != 0) - cipso_v4_doi_putdef(entry->type_def.cipsov4); + entry->type_def.cipsov4 = cipsov4; break; default: goto add_failure; } + + if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { + struct in_addr *addr; + struct in_addr *mask; + struct netlbl_domaddr4_map *map; + + addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); + if (addrmap == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + INIT_LIST_HEAD(&addrmap->list4); + INIT_LIST_HEAD(&addrmap->list6); + + if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != + sizeof(struct in_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != + sizeof(struct in_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); + mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + map->list.addr = addr->s_addr & mask->s_addr; + map->list.mask = mask->s_addr; + map->list.valid = 1; + map->type = entry->type; + if (cipsov4) + map->type_def.cipsov4 = cipsov4; + + ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); + if (ret_val != 0) { + kfree(map); + goto add_failure; + } + + entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->type_def.addrsel = addrmap; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { + struct in6_addr *addr; + struct in6_addr *mask; + struct netlbl_domaddr6_map *map; + + addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); + if (addrmap == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + INIT_LIST_HEAD(&addrmap->list4); + INIT_LIST_HEAD(&addrmap->list6); + + if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != + sizeof(struct in6_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != + sizeof(struct in6_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); + mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + ipv6_addr_copy(&map->list.addr, addr); + map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; + map->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; + map->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; + map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; + ipv6_addr_copy(&map->list.mask, mask); + map->list.valid = 1; + map->type = entry->type; + + ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); + if (ret_val != 0) { + kfree(map); + goto add_failure; + } + + entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->type_def.addrsel = addrmap; +#endif /* IPv6 */ + } + + ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) goto add_failure; return 0; add_failure: + if (cipsov4) + cipso_v4_doi_putdef(cipsov4); if (entry) kfree(entry->domain); + kfree(addrmap); kfree(entry); return ret_val; } +/** + * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry + * @skb: the NETLINK buffer + * @entry: the map entry + * + * Description: + * This function is a helper function used by the LISTALL and LISTDEF command + * handlers. The caller is responsibile for ensuring that the RCU read lock + * is held. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_listentry(struct sk_buff *skb, + struct netlbl_dom_map *entry) +{ + int ret_val; + struct nlattr *nla_a; + struct nlattr *nla_b; + struct netlbl_af4list *iter4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; +#endif + + if (entry->domain != NULL) { + ret_val = nla_put_string(skb, + NLBL_MGMT_A_DOMAIN, entry->domain); + if (ret_val != 0) + return ret_val; + } + + switch (entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); + if (nla_a == NULL) + return -ENOMEM; + + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) { + struct netlbl_domaddr4_map *map4; + struct in_addr addr_struct; + + nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + if (nla_b == NULL) + return -ENOMEM; + + addr_struct.s_addr = iter4->addr; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + return ret_val; + addr_struct.s_addr = iter4->mask; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + return ret_val; + map4 = netlbl_domhsh_addr4_entry(iter4); + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, + map4->type); + if (ret_val != 0) + return ret_val; + switch (map4->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, + map4->type_def.cipsov4->doi); + if (ret_val != 0) + return ret_val; + break; + } + + nla_nest_end(skb, nla_b); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) { + struct netlbl_domaddr6_map *map6; + + nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + if (nla_b == NULL) + return -ENOMEM; + + ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, + sizeof(struct in6_addr), + &iter6->addr); + if (ret_val != 0) + return ret_val; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, + sizeof(struct in6_addr), + &iter6->mask); + if (ret_val != 0) + return ret_val; + map6 = netlbl_domhsh_addr6_entry(iter6); + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, + map6->type); + if (ret_val != 0) + return ret_val; + + nla_nest_end(skb, nla_b); + } +#endif /* IPv6 */ + + nla_nest_end(skb, nla_a); + break; + case NETLBL_NLTYPE_UNLABELED: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + if (ret_val != 0) + return ret_val; + ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); + break; + } + + return ret_val; +} + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_mgmt_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADD message and add the domains from the message + * to the hash table. See netlabel.h for a description of the message format. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +{ + struct netlbl_audit audit_info; + + if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) || + (!info->attrs[NLBL_MGMT_A_PROTOCOL]) || + (info->attrs[NLBL_MGMT_A_IPV4ADDR] && + info->attrs[NLBL_MGMT_A_IPV6ADDR]) || + (info->attrs[NLBL_MGMT_A_IPV4MASK] && + info->attrs[NLBL_MGMT_A_IPV6MASK]) || + ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || + ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_mgmt_add_common(info, &audit_info); +} + /** * netlbl_mgmt_remove - Handle a REMOVE message * @skb: the NETLINK buffer @@ -192,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) if (data == NULL) goto listall_cb_failure; - ret_val = nla_put_string(cb_arg->skb, - NLBL_MGMT_A_DOMAIN, - entry->domain); + ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry); if (ret_val != 0) goto listall_cb_failure; - ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type); - if (ret_val != 0) - goto listall_cb_failure; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(cb_arg->skb, - NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); - if (ret_val != 0) - goto listall_cb_failure; - break; - } cb_arg->seq++; return genlmsg_end(cb_arg->skb, data); @@ -262,50 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb, */ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; - struct netlbl_dom_map *entry = NULL; - u32 tmp_val; struct netlbl_audit audit_info; - if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) - goto adddef_failure; + if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) || + (info->attrs[NLBL_MGMT_A_IPV4ADDR] && + info->attrs[NLBL_MGMT_A_IPV6ADDR]) || + (info->attrs[NLBL_MGMT_A_IPV4MASK] && + info->attrs[NLBL_MGMT_A_IPV6MASK]) || + ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || + ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) + return -EINVAL; netlbl_netlink_auditinfo(skb, &audit_info); - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto adddef_failure; - } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry, &audit_info); - break; - case NETLBL_NLTYPE_CIPSOV4: - if (!info->attrs[NLBL_MGMT_A_CV4DOI]) - goto adddef_failure; - - tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) - goto adddef_failure; - ret_val = netlbl_domhsh_add_default(entry, &audit_info); - if (ret_val != 0) - cipso_v4_doi_putdef(entry->type_def.cipsov4); - break; - default: - goto adddef_failure; - } - if (ret_val != 0) - goto adddef_failure; - - return 0; - -adddef_failure: - kfree(entry); - return ret_val; + return netlbl_mgmt_add_common(info, &audit_info); } /** @@ -359,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) ret_val = -ENOENT; goto listdef_failure_lock; } - ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); - if (ret_val != 0) - goto listdef_failure_lock; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(ans_skb, - NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); - if (ret_val != 0) - goto listdef_failure_lock; - break; - } + ret_val = netlbl_mgmt_listentry(ans_skb, entry); rcu_read_unlock(); + if (ret_val != 0) + goto listdef_failure; genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index a43bff169d6..05d96431f81 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -45,6 +45,16 @@ * NLBL_MGMT_A_DOMAIN * NLBL_MGMT_A_PROTOCOL * + * If IPv4 is specified the following attributes are required: + * + * NLBL_MGMT_A_IPV4ADDR + * NLBL_MGMT_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_MGMT_A_IPV6ADDR + * NLBL_MGMT_A_IPV6MASK + * * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * * NLBL_MGMT_A_CV4DOI @@ -68,13 +78,24 @@ * Required attributes: * * NLBL_MGMT_A_DOMAIN + * + * If the IP address selectors are not used the following attribute is + * required: + * * NLBL_MGMT_A_PROTOCOL * - * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * If the IP address selectors are used then the following attritbute is + * required: + * + * NLBL_MGMT_A_SELECTORLIST + * + * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following + * attributes are required: * * NLBL_MGMT_A_CV4DOI * - * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other + * attributes are required. * * o ADDDEF: * Sent by an application to set the default domain mapping for the NetLabel @@ -100,15 +121,23 @@ * application there is no payload. On success the kernel should send a * response using the following format. * - * Required attributes: + * If the IP address selectors are not used the following attribute is + * required: * * NLBL_MGMT_A_PROTOCOL * - * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * If the IP address selectors are used then the following attritbute is + * required: + * + * NLBL_MGMT_A_SELECTORLIST + * + * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following + * attributes are required: * * NLBL_MGMT_A_CV4DOI * - * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other + * attributes are required. * * o PROTOCOLS: * Sent by an application to request a list of configured NetLabel protocols @@ -162,6 +191,26 @@ enum { NLBL_MGMT_A_CV4DOI, /* (NLA_U32) * the CIPSOv4 DOI value */ + NLBL_MGMT_A_IPV6ADDR, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address */ + NLBL_MGMT_A_IPV6MASK, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address mask */ + NLBL_MGMT_A_IPV4ADDR, + /* (NLA_BINARY, struct in_addr) + * an IPv4 address */ + NLBL_MGMT_A_IPV4MASK, + /* (NLA_BINARY, struct in_addr) + * and IPv4 address mask */ + NLBL_MGMT_A_ADDRSELECTOR, + /* (NLA_NESTED) + * an IP address selector, must contain an address, mask, and protocol + * attribute plus any protocol specific attributes */ + NLBL_MGMT_A_SELECTORLIST, + /* (NLA_NESTED) + * the selector list, there must be at least one + * NLBL_MGMT_A_ADDRSELECTOR attribute */ __NLBL_MGMT_A_MAX, }; #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index ab8131a8e48..e8a5c32b0f1 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -145,76 +145,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1 [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY } }; -/* - * Audit Helper Functions - */ - -/** - * netlbl_unlabel_audit_addr4 - Audit an IPv4 address - * @audit_buf: audit buffer - * @dev: network interface - * @addr: IP address - * @mask: IP address mask - * - * Description: - * Write the IPv4 address and address mask, if necessary, to @audit_buf. - * - */ -static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf, - const char *dev, - __be32 addr, __be32 mask) -{ - u32 mask_val = ntohl(mask); - - if (dev != NULL) - audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr)); - if (mask_val != 0xffffffff) { - u32 mask_len = 0; - while (mask_val > 0) { - mask_val <<= 1; - mask_len++; - } - audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); - } -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -/** - * netlbl_unlabel_audit_addr6 - Audit an IPv6 address - * @audit_buf: audit buffer - * @dev: network interface - * @addr: IP address - * @mask: IP address mask - * - * Description: - * Write the IPv6 address and address mask, if necessary, to @audit_buf. - * - */ -static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf, - const char *dev, - const struct in6_addr *addr, - const struct in6_addr *mask) -{ - if (dev != NULL) - audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr)); - if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { - u32 mask_len = 0; - u32 mask_val; - int iter = -1; - while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) - mask_len += 32; - mask_val = ntohl(mask->s6_addr32[iter]); - while (mask_val > 0) { - mask_val <<= 1; - mask_len++; - } - audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); - } -} -#endif /* IPv6 */ - /* * Unlabeled Connection Hash Table Functions */ @@ -571,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net, mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) - netlbl_unlabel_audit_addr4(audit_buf, - dev_name, - addr4->s_addr, - mask4->s_addr); + netlbl_af4list_audit_addr(audit_buf, 1, + dev_name, + addr4->s_addr, + mask4->s_addr); break; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -585,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net, mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) - netlbl_unlabel_audit_addr6(audit_buf, - dev_name, - addr6, mask6); + netlbl_af6list_audit_addr(audit_buf, 1, + dev_name, + addr6, mask6); break; } #endif /* IPv6 */ @@ -652,9 +582,9 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); - netlbl_unlabel_audit_addr4(audit_buf, - (dev != NULL ? dev->name : NULL), - addr->s_addr, mask->s_addr); + netlbl_af4list_audit_addr(audit_buf, 1, + (dev != NULL ? dev->name : NULL), + addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); if (entry && security_secid_to_secctx(entry->secid, @@ -712,9 +642,9 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); - netlbl_unlabel_audit_addr6(audit_buf, - (dev != NULL ? dev->name : NULL), - addr, mask); + netlbl_af6list_audit_addr(audit_buf, 1, + (dev != NULL ? dev->name : NULL), + addr, mask); if (dev != NULL) dev_put(dev); if (entry && security_secid_to_secctx(entry->secid, -- cgit v1.2.3-70-g09d2 From 948bf85c1bc9a84754786a9d5dd99b7ecc46451e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:32 -0400 Subject: netlabel: Add functionality to set the security attributes of a packet This patch builds upon the new NetLabel address selector functionality by providing the NetLabel KAPI and CIPSO engine support needed to enable the new packet-based labeling. The only new addition to the NetLabel KAPI at this point is shown below: * int netlbl_skbuff_setattr(skb, family, secattr) ... and is designed to be called from a Netfilter hook after the packet's IP header has been populated such as in the FORWARD or LOCAL_OUT hooks. This patch also provides the necessary SELinux hooks to support this new functionality. Smack support is not currently included due to uncertainty regarding the permissions needed to expand the Smack network access controls. Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/cipso_ipv4.h | 16 +++ include/net/netlabel.h | 9 ++ net/ipv4/cipso_ipv4.c | 222 +++++++++++++++++++++++++++++------- net/netlabel/netlabel_kapi.c | 60 ++++++++++ security/selinux/hooks.c | 50 +++++++- security/selinux/include/netlabel.h | 9 ++ security/selinux/include/objsec.h | 1 + security/selinux/netlabel.c | 68 ++++++++++- 8 files changed, 393 insertions(+), 42 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 5fe6556fb3c..2ce093ba553 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -208,6 +208,10 @@ int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_delattr(struct sk_buff *skb); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr); int cipso_v4_validate(unsigned char **option); @@ -232,6 +236,18 @@ static inline int cipso_v4_sock_getattr(struct sock *sk, return -ENOSYS; } +static inline int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + return -ENOSYS; +} + static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 0729f8ce504..3f67e6d49e4 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -382,6 +382,9 @@ int netlbl_sock_setattr(struct sock *sk, const struct netlbl_lsm_secattr *secattr); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); @@ -451,6 +454,12 @@ static inline int netlbl_sock_getattr(struct sock *sk, { return -ENOSYS; } +static inline int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bf87eddfec3..e13d6dbb66a 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -13,7 +13,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1665,48 +1665,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) } /** - * cipso_v4_sock_setattr - Add a CIPSO option to a socket - * @sk: the socket + * cipso_v4_genopt - Generate a CIPSO option + * @buf: the option buffer + * @buf_len: the size of opt_buf * @doi_def: the CIPSO DOI to use - * @secattr: the specific security attributes of the socket + * @secattr: the security attributes * * Description: - * Set the CIPSO option on the given socket using the DOI definition and - * security attributes passed to the function. This function requires - * exclusive access to @sk, which means it either needs to be in the - * process of being created or locked. Returns zero on success and negative - * values on failure. + * Generate a CIPSO option using the DOI definition and security attributes + * passed to the function. Returns the length of the option on success and + * negative values on failure. * */ -int cipso_v4_sock_setattr(struct sock *sk, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) +static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { - int ret_val = -EPERM; + int ret_val; u32 iter; - unsigned char *buf; - u32 buf_len = 0; - u32 opt_len; - struct ip_options *opt = NULL; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; - - /* In the case of sock_create_lite(), the sock->sk field is not - * defined yet but it is not a problem as the only users of these - * "lite" PF_INET sockets are functions which do an accept() call - * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) - return 0; - /* We allocate the maximum CIPSO option size here so we are probably - * being a little wasteful, but it makes our life _much_ easier later - * on and after all we are only talking about 40 bytes. */ - buf_len = CIPSO_V4_OPT_LEN_MAX; - buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { - ret_val = -ENOMEM; - goto socket_setattr_failure; - } + if (buf_len <= CIPSO_V4_HDR_LEN) + return -ENOSPC; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC @@ -1734,8 +1713,7 @@ int cipso_v4_sock_setattr(struct sock *sk, buf_len - CIPSO_V4_HDR_LEN); break; default: - ret_val = -EPERM; - goto socket_setattr_failure; + return -EPERM; } iter++; @@ -1743,9 +1721,58 @@ int cipso_v4_sock_setattr(struct sock *sk, iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); if (ret_val < 0) - goto socket_setattr_failure; + return ret_val; cipso_v4_gentag_hdr(doi_def, buf, ret_val); - buf_len = CIPSO_V4_HDR_LEN + ret_val; + return CIPSO_V4_HDR_LEN + ret_val; +} + +/** + * cipso_v4_sock_setattr - Add a CIPSO option to a socket + * @sk: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. + * + */ +int cipso_v4_sock_setattr(struct sock *sk, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + if (sk == NULL) + return 0; + + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto socket_setattr_failure; + buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and @@ -1853,6 +1880,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) secattr); } +/** + * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Set the CIPSO option on the given packet based on the security attributes. + * Returns a pointer to the IP header on success and NULL on failure. + * + */ +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; + u32 buf_len = CIPSO_V4_OPT_LEN_MAX; + u32 opt_len; + int len_delta; + + buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (buf_len < 0) + return buf_len; + opt_len = (buf_len + 3) & ~3; + + /* we overwrite any existing options to ensure that we have enough + * room for the CIPSO option, the reason is that we _need_ to guarantee + * that the security label is applied to the packet - we do the same + * thing when using the socket options and it hasn't caused a problem, + * if we need to we can always revisit this choice later */ + + len_delta = opt_len - opt->optlen; + /* if we don't ensure enough headroom we could panic on the skb_push() + * call below so make sure we have enough, we are also "mangling" the + * packet so we should probably do a copy-on-write call anyway */ + ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + if (ret_val < 0) + return ret_val; + + if (len_delta > 0) { + /* we assume that the header + opt->optlen have already been + * "pushed" in ip_options_build() or similar */ + iph = ip_hdr(skb); + skb_push(skb, len_delta); + memmove((char *)iph - len_delta, iph, iph->ihl << 2); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + } else if (len_delta < 0) { + iph = ip_hdr(skb); + memset(iph + 1, IPOPT_NOP, opt->optlen); + } else + iph = ip_hdr(skb); + + if (opt->optlen > 0) + memset(opt, 0, sizeof(*opt)); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + opt->is_changed = 1; + + /* we have to do the following because we are being called from a + * netfilter hook which means the packet already has had the header + * fields populated and the checksum calculated - yes this means we + * are doing more work than needed but we do it to keep the core + * stack clean and tidy */ + memcpy(iph + 1, buf, buf_len); + if (opt_len > buf_len) + memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); + if (len_delta != 0) { + iph->ihl = 5 + (opt_len >> 2); + iph->tot_len = htons(skb->len); + } + ip_send_check(iph); + + return 0; +} + +/** + * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet + * @skb: the packet + * + * Description: + * Removes any and all CIPSO options from the given packet. Returns zero on + * success, negative values on failure. + * + */ +int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char *cipso_ptr; + + if (opt->cipso == 0) + return 0; + + /* since we are changing the packet we should make a copy */ + ret_val = skb_cow(skb, skb_headroom(skb)); + if (ret_val < 0) + return ret_val; + + /* the easiest thing to do is just replace the cipso option with noop + * options since we don't change the size of the packet, although we + * still need to recalculate the checksum */ + + iph = ip_hdr(skb); + cipso_ptr = (unsigned char *)iph + opt->cipso; + memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + opt->cipso = 0; + opt->is_changed = 1; + + ip_send_check(iph); + + return 0; +} + /** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 8b820dc9806..cc8047d1f50 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -472,6 +472,66 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) return cipso_v4_sock_getattr(sk, secattr); } +/** + * netlbl_skbuff_setattr - Label a packet using the correct protocol + * @skb: the packet + * @family: protocol family + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given packet using the security attributes + * specified in @secattr. Returns zero on success, negative values on failure. + * + */ +int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *hdr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (family) { + case AF_INET: + hdr4 = ip_hdr(skb); + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + hdr4->daddr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto skbuff_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_skbuff_setattr(skb, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + ret_val = cipso_v4_skbuff_delattr(skb); + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +skbuff_setattr_return: + rcu_read_unlock(); + return ret_val; +} + /** * netlbl_skbuff_getattr - Determine the security attributes of a packet * @skb: the packet diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a91146a6b37..7432bdd5d36 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4407,13 +4407,15 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, u32 peer_sid; struct avc_audit_data ad; u8 secmark_active; + u8 netlbl_active; u8 peerlbl_active; if (!selinux_policycap_netpeer) return NF_ACCEPT; secmark_active = selinux_secmark_enabled(); - peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); + netlbl_active = netlbl_enabled(); + peerlbl_active = netlbl_active || selinux_xfrm_enabled(); if (!secmark_active && !peerlbl_active) return NF_ACCEPT; @@ -4440,6 +4442,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) return NF_DROP; + if (netlbl_active) + /* we do this in the FORWARD path and not the POST_ROUTING + * path because we want to make sure we apply the necessary + * labeling before IPsec is applied so we can leverage AH + * protection */ + if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0) + return NF_DROP; + return NF_ACCEPT; } @@ -4463,6 +4473,37 @@ static unsigned int selinux_ipv6_forward(unsigned int hooknum, } #endif /* IPV6 */ +static unsigned int selinux_ip_output(struct sk_buff *skb, + u16 family) +{ + u32 sid; + + if (!netlbl_enabled()) + return NF_ACCEPT; + + /* we do this in the LOCAL_OUT path and not the POST_ROUTING path + * because we want to make sure we apply the necessary labeling + * before IPsec is applied so we can leverage AH protection */ + if (skb->sk) { + struct sk_security_struct *sksec = skb->sk->sk_security; + sid = sksec->sid; + } else + sid = SECINITSID_KERNEL; + if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0) + return NF_DROP; + + return NF_ACCEPT; +} + +static unsigned int selinux_ipv4_output(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return selinux_ip_output(skb, PF_INET); +} + static int selinux_ip_postroute_iptables_compat(struct sock *sk, int ifindex, struct avc_audit_data *ad, @@ -5700,6 +5741,13 @@ static struct nf_hook_ops selinux_ipv4_ops[] = { .pf = PF_INET, .hooknum = NF_INET_FORWARD, .priority = NF_IP_PRI_SELINUX_FIRST, + }, + { + .hook = selinux_ipv4_output, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SELINUX_FIRST, } }; diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index d4e3ac8a7fb..b3e6ae071fc 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -48,6 +48,9 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, u16 family, u32 *type, u32 *sid); +int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + u16 family, + u32 sid); void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); int selinux_netlbl_socket_post_create(struct socket *sock); @@ -88,6 +91,12 @@ static inline int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, *sid = SECSID_NULL; return 0; } +static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + u16 family, + u32 sid) +{ + return 0; +} static inline void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 91070ab874c..f46dd1c3d01 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -117,6 +117,7 @@ struct sk_security_struct { NLBL_UNSET = 0, NLBL_REQUIRE, NLBL_LABELED, + NLBL_REQSKB, } nlbl_state; #endif }; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 4053f7fc95f..090404d6e51 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -9,7 +9,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2007 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2007, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include "objsec.h" #include "security.h" @@ -77,6 +79,8 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) int rc; struct sk_security_struct *sksec = sk->sk_security; struct netlbl_lsm_secattr secattr; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; if (sksec->nlbl_state != NLBL_REQUIRE) return 0; @@ -87,8 +91,29 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) if (rc != 0) goto sock_setsid_return; rc = netlbl_sock_setattr(sk, &secattr); - if (rc == 0) + switch (rc) { + case 0: sksec->nlbl_state = NLBL_LABELED; + break; + case -EDESTADDRREQ: + /* we are going to possibly end up labeling the individual + * packets later which is problematic for stream sockets + * because of the additional IP header size, our solution is to + * allow for the maximum IP header length (40 bytes for IPv4, + * we don't have to worry about IPv6 yet) just in case */ + sk_inet = inet_sk(sk); + if (sk_inet->is_icsk) { + sk_conn = inet_csk(sk); + if (sk_inet->opt) + sk_conn->icsk_ext_hdr_len -= + sk_inet->opt->optlen; + sk_conn->icsk_ext_hdr_len += 40; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } + sksec->nlbl_state = NLBL_REQSKB; + rc = 0; + break; + } sock_setsid_return: netlbl_secattr_destroy(&secattr); @@ -182,6 +207,45 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, return rc; } +/** + * selinux_netlbl_skbuff_setsid - Set the NetLabel on a packet given a sid + * @skb: the packet + * @family: protocol family + * @sid: the SID + * + * Description + * Call the NetLabel mechanism to set the label of a packet using @sid. + * Returns zero on auccess, negative values on failure. + * + */ +int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, + u16 family, + u32 sid) +{ + int rc; + struct netlbl_lsm_secattr secattr; + struct sock *sk; + + /* if this is a locally generated packet check to see if it is already + * being labeled by it's parent socket, if it is just exit */ + sk = skb->sk; + if (sk != NULL) { + struct sk_security_struct *sksec = sk->sk_security; + if (sksec->nlbl_state != NLBL_REQSKB) + return 0; + } + + netlbl_secattr_init(&secattr); + rc = security_netlbl_sid_to_secattr(sid, &secattr); + if (rc != 0) + goto skbuff_setsid_return; + rc = netlbl_skbuff_setattr(skb, family, &secattr); + +skbuff_setsid_return: + netlbl_secattr_destroy(&secattr); + return rc; +} + /** * selinux_netlbl_sock_graft - Netlabel the new socket * @sk: the new connection -- cgit v1.2.3-70-g09d2 From 014ab19a69c325f52d7bae54ceeda73d6307ae0c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:33 -0400 Subject: selinux: Set socket NetLabel based on connection endpoint Previous work enabled the use of address based NetLabel selectors, which while highly useful, brought the potential for additional per-packet overhead when used. This patch attempts to solve that by applying NetLabel socket labels when sockets are connect()'d. This should alleviate the per-packet NetLabel labeling for all connected sockets (yes, it even works for connected DGRAM sockets). Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/cipso_ipv4.h | 5 ++ include/net/netlabel.h | 13 ++++ net/ipv4/cipso_ipv4.c | 74 ++++++++++++++++++ net/netlabel/netlabel_kapi.c | 78 ++++++++++++++++++- security/selinux/hooks.c | 11 +-- security/selinux/include/netlabel.h | 19 ++++- security/selinux/include/objsec.h | 1 + security/selinux/netlabel.c | 147 +++++++++++++++++++++++++++++------- 8 files changed, 311 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 2ce093ba553..811febf97ca 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -207,6 +207,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); +void cipso_v4_sock_delattr(struct sock *sk); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int cipso_v4_skbuff_setattr(struct sk_buff *skb, const struct cipso_v4_doi *doi_def, @@ -230,6 +231,10 @@ static inline int cipso_v4_sock_setattr(struct sock *sk, return -ENOSYS; } +static inline void cipso_v4_sock_delattr(struct sock *sk) +{ +} + static inline int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 3f67e6d49e4..074cad40ac6 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -380,8 +380,12 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, int netlbl_enabled(void); int netlbl_sock_setattr(struct sock *sk, const struct netlbl_lsm_secattr *secattr); +void netlbl_sock_delattr(struct sock *sk); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); +int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr); @@ -449,11 +453,20 @@ static inline int netlbl_sock_setattr(struct sock *sk, { return -ENOSYS; } +static inline void netlbl_sock_delattr(struct sock *sk) +{ +} static inline int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { return -ENOSYS; } +static inline int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} static inline int netlbl_skbuff_setattr(struct sk_buff *skb, u16 family, const struct netlbl_lsm_secattr *secattr) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e13d6dbb66a..23768b9d6b6 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1809,6 +1809,80 @@ socket_setattr_failure: return ret_val; } +/** + * cipso_v4_sock_delattr - Delete the CIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CIPSO option from a socket, if present. + * + */ +void cipso_v4_sock_delattr(struct sock *sk) +{ + u8 hdr_delta; + struct ip_options *opt; + struct inet_sock *sk_inet; + + sk_inet = inet_sk(sk); + opt = sk_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + u8 cipso_len; + u8 cipso_off; + unsigned char *cipso_ptr; + int iter; + int optlen_new; + + cipso_off = opt->cipso - sizeof(struct iphdr); + cipso_ptr = &opt->__data[cipso_off]; + cipso_len = cipso_ptr[1]; + + if (opt->srr > opt->cipso) + opt->srr -= cipso_len; + if (opt->rr > opt->cipso) + opt->rr -= cipso_len; + if (opt->ts > opt->cipso) + opt->ts -= cipso_len; + if (opt->router_alert > opt->cipso) + opt->router_alert -= cipso_len; + opt->cipso = 0; + + memmove(cipso_ptr, cipso_ptr + cipso_len, + opt->optlen - cipso_off - cipso_len); + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length */ + iter = 0; + optlen_new = 0; + while (iter < opt->optlen) + if (opt->__data[iter] != IPOPT_NOP) { + iter += opt->__data[iter + 1]; + optlen_new = iter; + } else + iter++; + hdr_delta = opt->optlen; + opt->optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->optlen; + } else { + /* only the cipso option was present on the socket so we can + * remove the entire option struct */ + sk_inet->opt = NULL; + hdr_delta = opt->optlen; + kfree(opt); + } + + if (sk_inet->is_icsk && hdr_delta > 0) { + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len -= hdr_delta; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } +} + /** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index cc8047d1f50..78fc557689b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -455,6 +455,20 @@ socket_setattr_return: return ret_val; } +/** + * netlbl_sock_delattr - Delete all the NetLabel labels on a socket + * @sk: the socket + * + * Description: + * Remove all the NetLabel labeling from @sk. The caller is responsible for + * ensuring that @sk is locked. + * + */ +void netlbl_sock_delattr(struct sock *sk) +{ + cipso_v4_sock_delattr(sk); +} + /** * netlbl_sock_getattr - Determine the security attributes of a sock * @sk: the sock @@ -472,6 +486,68 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) return cipso_v4_sock_getattr(sk, secattr); } +/** + * netlbl_conn_setattr - Label a connected socket using the correct protocol + * @sk: the socket to label + * @addr: the destination address + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given connected socket using the security + * attributes specified in @secattr. The caller is responsible for ensuring + * that @sk is locked. Returns zero on success, negative values on failure. + * + */ +int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct sockaddr_in *addr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (addr->sa_family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto conn_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_sock_setattr(sk, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + cipso_v4_sock_delattr(sk); + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +conn_setattr_return: + rcu_read_unlock(); + return ret_val; +} + /** * netlbl_skbuff_setattr - Label a packet using the correct protocol * @skb: the packet diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7432bdd5d36..632ac3e80a6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3794,6 +3794,7 @@ out: static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) { + struct sock *sk = sock->sk; struct inode_security_struct *isec; int err; @@ -3807,7 +3808,6 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, isec = SOCK_INODE(sock)->i_security; if (isec->sclass == SECCLASS_TCP_SOCKET || isec->sclass == SECCLASS_DCCP_SOCKET) { - struct sock *sk = sock->sk; struct avc_audit_data ad; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; @@ -3841,6 +3841,8 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, goto out; } + err = selinux_netlbl_socket_connect(sk, address); + out: return err; } @@ -4290,8 +4292,6 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent) sk->sk_family == PF_UNIX) isec->sid = sksec->sid; sksec->sclass = isec->sclass; - - selinux_netlbl_sock_graft(sk, parent); } static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, @@ -4342,8 +4342,7 @@ static void selinux_inet_csk_clone(struct sock *newsk, selinux_netlbl_sk_security_reset(newsksec, req->rsk_ops->family); } -static void selinux_inet_conn_established(struct sock *sk, - struct sk_buff *skb) +static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb) { u16 family = sk->sk_family; struct sk_security_struct *sksec = sk->sk_security; @@ -4353,6 +4352,8 @@ static void selinux_inet_conn_established(struct sock *sk, family = PF_INET; selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid); + + selinux_netlbl_inet_conn_established(sk, family); } static void selinux_req_classify_flow(const struct request_sock *req, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index b3e6ae071fc..982bac0ac32 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -52,7 +52,7 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, u16 family, u32 sid); -void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); +void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family); int selinux_netlbl_socket_post_create(struct socket *sock); int selinux_netlbl_inode_permission(struct inode *inode, int mask); int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, @@ -62,6 +62,8 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, int selinux_netlbl_socket_setsockopt(struct socket *sock, int level, int optname); +int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr); + #else static inline void selinux_netlbl_cache_invalidate(void) { @@ -98,8 +100,14 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, return 0; } -static inline void selinux_netlbl_sock_graft(struct sock *sk, - struct socket *sock) +static inline int selinux_netlbl_conn_setsid(struct sock *sk, + struct sockaddr *addr) +{ + return 0; +} + +static inline void selinux_netlbl_inet_conn_established(struct sock *sk, + u16 family) { return; } @@ -125,6 +133,11 @@ static inline int selinux_netlbl_socket_setsockopt(struct socket *sock, { return 0; } +static inline int selinux_netlbl_socket_connect(struct sock *sk, + struct sockaddr *addr) +{ + return 0; +} #endif /* CONFIG_NETLABEL */ #endif diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index f46dd1c3d01..ad34787c6c0 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -118,6 +118,7 @@ struct sk_security_struct { NLBL_REQUIRE, NLBL_LABELED, NLBL_REQSKB, + NLBL_CONNLABELED, } nlbl_state; #endif }; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 090404d6e51..b22b7dafa0e 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -29,10 +29,12 @@ #include #include +#include +#include #include #include -#include -#include +#include +#include #include "objsec.h" #include "security.h" @@ -79,8 +81,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) int rc; struct sk_security_struct *sksec = sk->sk_security; struct netlbl_lsm_secattr secattr; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; if (sksec->nlbl_state != NLBL_REQUIRE) return 0; @@ -96,20 +96,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk) sksec->nlbl_state = NLBL_LABELED; break; case -EDESTADDRREQ: - /* we are going to possibly end up labeling the individual - * packets later which is problematic for stream sockets - * because of the additional IP header size, our solution is to - * allow for the maximum IP header length (40 bytes for IPv4, - * we don't have to worry about IPv6 yet) just in case */ - sk_inet = inet_sk(sk); - if (sk_inet->is_icsk) { - sk_conn = inet_csk(sk); - if (sk_inet->opt) - sk_conn->icsk_ext_hdr_len -= - sk_inet->opt->optlen; - sk_conn->icsk_ext_hdr_len += 40; - sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); - } sksec->nlbl_state = NLBL_REQSKB; rc = 0; break; @@ -247,21 +233,77 @@ skbuff_setsid_return: } /** - * selinux_netlbl_sock_graft - Netlabel the new socket + * selinux_netlbl_inet_conn_established - Netlabel the newly accepted connection * @sk: the new connection - * @sock: the new socket * * Description: - * The connection represented by @sk is being grafted onto @sock so set the - * socket's NetLabel to match the SID of @sk. + * A new connection has been established on @sk so make sure it is labeled + * correctly with the NetLabel susbsystem. * */ -void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) +void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family) { - /* Try to set the NetLabel on the socket to save time later, if we fail - * here we will pick up the pieces in later calls to - * selinux_netlbl_inode_permission(). */ - selinux_netlbl_sock_setsid(sk); + int rc; + struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr secattr; + struct inet_sock *sk_inet = inet_sk(sk); + struct sockaddr_in addr; + + if (sksec->nlbl_state != NLBL_REQUIRE) + return; + + netlbl_secattr_init(&secattr); + if (security_netlbl_sid_to_secattr(sksec->sid, &secattr) != 0) + goto inet_conn_established_return; + + rc = netlbl_sock_setattr(sk, &secattr); + switch (rc) { + case 0: + sksec->nlbl_state = NLBL_LABELED; + break; + case -EDESTADDRREQ: + /* no PF_INET6 support yet because we don't support any IPv6 + * labeling protocols */ + if (family != PF_INET) { + sksec->nlbl_state = NLBL_UNSET; + goto inet_conn_established_return; + } + + addr.sin_family = family; + addr.sin_addr.s_addr = sk_inet->daddr; + if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr, + &secattr) != 0) { + /* we failed to label the connected socket (could be + * for a variety of reasons, the actual "why" isn't + * important here) so we have to go to our backup plan, + * labeling the packets individually in the netfilter + * local output hook. this is okay but we need to + * adjust the MSS of the connection to take into + * account any labeling overhead, since we don't know + * the exact overhead at this point we'll use the worst + * case value which is 40 bytes for IPv4 */ + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len += 40 - + (sk_inet->opt ? sk_inet->opt->optlen : 0); + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + + sksec->nlbl_state = NLBL_REQSKB; + } else + sksec->nlbl_state = NLBL_CONNLABELED; + break; + default: + /* note that we are failing to label the socket which could be + * a bad thing since it means traffic could leave the system + * without the desired labeling, however, all is not lost as + * we have a check in selinux_netlbl_inode_permission() to + * pick up the pieces that we might drop here because we can't + * return an error code */ + break; + } + +inet_conn_established_return: + netlbl_secattr_destroy(&secattr); + return; } /** @@ -398,7 +440,8 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock, struct netlbl_lsm_secattr secattr; if (level == IPPROTO_IP && optname == IP_OPTIONS && - sksec->nlbl_state == NLBL_LABELED) { + (sksec->nlbl_state == NLBL_LABELED || + sksec->nlbl_state == NLBL_CONNLABELED)) { netlbl_secattr_init(&secattr); lock_sock(sk); rc = netlbl_sock_getattr(sk, &secattr); @@ -410,3 +453,51 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock, return rc; } + +/** + * selinux_netlbl_socket_connect - Label a client-side socket on connect + * @sk: the socket to label + * @addr: the destination address + * + * Description: + * Attempt to label a connected socket with NetLabel using the given address. + * Returns zero values on success, negative values on failure. + * + */ +int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr) +{ + int rc; + struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr secattr; + + if (sksec->nlbl_state != NLBL_REQSKB && + sksec->nlbl_state != NLBL_CONNLABELED) + return 0; + + netlbl_secattr_init(&secattr); + local_bh_disable(); + bh_lock_sock_nested(sk); + + /* connected sockets are allowed to disconnect when the address family + * is set to AF_UNSPEC, if that is what is happening we want to reset + * the socket */ + if (addr->sa_family == AF_UNSPEC) { + netlbl_sock_delattr(sk); + sksec->nlbl_state = NLBL_REQSKB; + rc = 0; + goto socket_connect_return; + } + rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr); + if (rc != 0) + goto socket_connect_return; + rc = netlbl_conn_setattr(sk, addr, &secattr); + if (rc != 0) + goto socket_connect_return; + sksec->nlbl_state = NLBL_CONNLABELED; + +socket_connect_return: + bh_unlock_sock(sk); + local_bh_enable(); + netlbl_secattr_destroy(&secattr); + return rc; +} -- cgit v1.2.3-70-g09d2 From 8d75899d033617316e06296b7c0729612f56aba0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:33 -0400 Subject: netlabel: Changes to the NetLabel security attributes to allow LSMs to pass full contexts This patch provides support for including the LSM's secid in addition to the LSM's MLS information in the NetLabel security attributes structure. Signed-off-by: Paul Moore Acked-by: James Morris --- include/net/netlabel.h | 2 +- security/selinux/ss/services.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 074cad40ac6..d56517ac3ba 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -203,7 +203,7 @@ struct netlbl_lsm_secattr { u32 type; char *domain; struct netlbl_lsm_cache *cache; - union { + struct { struct { struct netlbl_lsm_secattr_catmap *cat; u32 lvl; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index c8f688a1004..ed0ca649d7d 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2803,7 +2803,8 @@ int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr) rc = -ENOMEM; goto netlbl_sid_to_secattr_failure; } - secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY; + secattr->attr.secid = sid; + secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY | NETLBL_SECATTR_SECID; mls_export_netlbl_lvl(ctx, secattr); rc = mls_export_netlbl_cat(ctx, secattr); if (rc != 0) -- cgit v1.2.3-70-g09d2 From 15c45f7b2e81655f6eb500ec949c8bd70a04325a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:34 -0400 Subject: cipso: Add support for native local labeling and fixup mapping names This patch accomplishes three minor tasks: add a new tag type for local labeling, rename the CIPSO_V4_MAP_STD define to CIPSO_V4_MAP_TRANS and replace some of the CIPSO "magic numbers" with constants from the header file. The first change allows CIPSO to support full LSM labels/contexts, not just MLS attributes. The second change brings the mapping names inline with what userspace is using, compatibility is preserved since we don't actually change the value. The last change is to aid readability and help prevent mistakes. Signed-off-by: Paul Moore --- include/net/cipso_ipv4.h | 13 ++-- net/ipv4/cipso_ipv4.c | 127 +++++++++++++++++++++++++++++++++------ net/ipv4/ip_options.c | 2 +- net/netlabel/netlabel_cipso_v4.c | 14 ++--- net/netlabel/netlabel_cipso_v4.h | 4 +- net/netlabel/netlabel_kapi.c | 4 +- 6 files changed, 128 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 811febf97ca..9909774eb99 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -45,7 +45,7 @@ /* known doi values */ #define CIPSO_V4_DOI_UNKNOWN 0x00000000 -/* tag types */ +/* standard tag types */ #define CIPSO_V4_TAG_INVALID 0 #define CIPSO_V4_TAG_RBITMAP 1 #define CIPSO_V4_TAG_ENUM 2 @@ -53,10 +53,14 @@ #define CIPSO_V4_TAG_PBITMAP 6 #define CIPSO_V4_TAG_FREEFORM 7 +/* non-standard tag types (tags > 127) */ +#define CIPSO_V4_TAG_LOCAL 128 + /* doi mapping types */ #define CIPSO_V4_MAP_UNKNOWN 0 -#define CIPSO_V4_MAP_STD 1 +#define CIPSO_V4_MAP_TRANS 1 #define CIPSO_V4_MAP_PASS 2 +#define CIPSO_V4_MAP_LOCAL 3 /* limits */ #define CIPSO_V4_MAX_REM_LVLS 255 @@ -215,7 +219,7 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, int cipso_v4_skbuff_delattr(struct sk_buff *skb); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr); -int cipso_v4_validate(unsigned char **option); +int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option); #else static inline void cipso_v4_error(struct sk_buff *skb, int error, @@ -259,7 +263,8 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, return -ENOSYS; } -static inline int cipso_v4_validate(unsigned char **option) +static inline int cipso_v4_validate(const struct sk_buff *skb, + unsigned char **option) { return -ENOSYS; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 23768b9d6b6..490e035c6d9 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -109,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1; * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 +/* Base length of the local tag (non-standard tag). + * Tag definition (may change between kernel versions) + * + * 0 8 16 24 32 + * +----------+----------+----------+----------+ + * | 10000000 | 00000110 | 32-bit secid value | + * +----------+----------+----------+----------+ + * | in (host byte order)| + * +----------+----------+ + * + */ +#define CIPSO_V4_TAG_LOC_BLEN 6 + /* * Helper Functions */ @@ -467,6 +480,10 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) if (doi_def->type != CIPSO_V4_MAP_PASS) return -EINVAL; break; + case CIPSO_V4_TAG_LOCAL: + if (doi_def->type != CIPSO_V4_MAP_LOCAL) + return -EINVAL; + break; default: return -EINVAL; } @@ -502,7 +519,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) return; switch (doi_def->type) { - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: kfree(doi_def->map.std->lvl.cipso); kfree(doi_def->map.std->lvl.local); kfree(doi_def->map.std->cat.cipso); @@ -673,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) return 0; break; @@ -702,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *net_lvl = host_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_lvl < doi_def->map.std->lvl.local_size && doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { *net_lvl = doi_def->map.std->lvl.local[host_lvl]; @@ -736,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *host_lvl = net_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: map_tbl = doi_def->map.std; if (net_lvl < map_tbl->lvl.cipso_size && map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { @@ -773,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: cipso_cat_size = doi_def->map.std->cat.cipso_size; cipso_array = doi_def->map.std->cat.cipso; for (;;) { @@ -821,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, u32 host_cat_size = 0; u32 *host_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { host_cat_size = doi_def->map.std->cat.local_size; host_cat_array = doi_def->map.std->cat.local; } @@ -836,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: net_spot = host_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_spot >= host_cat_size) return -EPERM; net_spot = host_cat_array[host_spot]; @@ -882,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, u32 net_cat_size = 0; u32 *net_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { net_cat_size = doi_def->map.std->cat.cipso_size; net_cat_array = doi_def->map.std->cat.cipso; } @@ -902,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: host_spot = net_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (net_spot >= net_cat_size) return -EPERM; host_spot = net_cat_array[net_spot]; @@ -1238,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x01; + buffer[0] = CIPSO_V4_TAG_RBITMAP; buffer[1] = tag_len; buffer[3] = level; @@ -1334,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x02; + buffer[0] = CIPSO_V4_TAG_ENUM; buffer[1] = tag_len; buffer[3] = level; @@ -1430,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x05; + buffer[0] = CIPSO_V4_TAG_RANGE; buffer[1] = tag_len; buffer[3] = level; @@ -1483,6 +1500,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, return 0; } +/** + * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the local tag. Returns the size of the tag + * on success, negative values on failure. + * + */ +static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *buffer, + u32 buffer_len) +{ + if (!(secattr->flags & NETLBL_SECATTR_SECID)) + return -EPERM; + + buffer[0] = CIPSO_V4_TAG_LOCAL; + buffer[1] = CIPSO_V4_TAG_LOC_BLEN; + *(u32 *)&buffer[2] = secattr->attr.secid; + + return CIPSO_V4_TAG_LOC_BLEN; +} + +/** + * cipso_v4_parsetag_loc - Parse a CIPSO local tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO local tag and return the security attributes in @secattr. + * Return zero on success, negatives values on failure. + * + */ +static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + secattr->attr.secid = *(u32 *)&tag[2]; + secattr->flags |= NETLBL_SECATTR_SECID; + + return 0; +} + /** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error @@ -1502,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, * that is unrecognized." * */ -int cipso_v4_validate(unsigned char **option) +int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { unsigned char *opt = *option; unsigned char *tag; @@ -1527,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } - opt_iter = 6; + opt_iter = CIPSO_V4_HDR_LEN; tag = opt + opt_iter; while (opt_iter < opt_len) { for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) @@ -1545,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option) switch (tag[0]) { case CIPSO_V4_TAG_RBITMAP: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RBM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1563,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RBM_BLEN && cipso_v4_map_cat_rbm_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1573,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_ENUM: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1583,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_ENUM_BLEN && cipso_v4_map_cat_enum_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1592,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_RANGE: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RNG_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1602,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RNG_BLEN && cipso_v4_map_cat_rng_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1610,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } break; + case CIPSO_V4_TAG_LOCAL: + /* This is a non-standard tag that we only allow for + * local connections, so if the incoming interface is + * not the loopback device drop the packet. */ + if (!(skb->dev->flags & IFF_LOOPBACK)) { + err_offset = opt_iter; + goto validate_return_locked; + } + if (tag_len != CIPSO_V4_TAG_LOC_BLEN) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + break; default: err_offset = opt_iter; goto validate_return_locked; @@ -1712,6 +1790,12 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_gentag_loc(doi_def, + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); + break; default: return -EPERM; } @@ -1921,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso, case CIPSO_V4_TAG_RANGE: ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr); + break; } if (ret_val == 0) secattr->type = NETLBL_NLTYPE_CIPSOV4; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index be3f18a7a40..2c88da6e786 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -438,7 +438,7 @@ int ip_options_compile(struct net *net, goto error; } opt->cipso = optptr - iph; - if (cipso_v4_validate(&optptr)) { + if (cipso_v4_validate(skb, &optptr)) { pp_ptr = optptr; goto error; } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 5c4f60bbc82..db83a67cbc7 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -132,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, * @info: the Generic NETLINK info block * * Description: - * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message - * and add it to the CIPSO V4 engine. Return zero on success and non-zero on - * error. + * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD + * message and add it to the CIPSO V4 engine. Return zero on success and + * non-zero on error. * */ static int netlbl_cipsov4_add_std(struct genl_info *info) @@ -164,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) ret_val = -ENOMEM; goto add_std_failure; } - doi_def->type = CIPSO_V4_MAP_STD; + doi_def->type = CIPSO_V4_MAP_TRANS; ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) @@ -393,8 +393,8 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; + case CIPSO_V4_MAP_TRANS: + type_str = "trans"; ret_val = netlbl_cipsov4_add_std(info); break; case CIPSO_V4_MAP_PASS: @@ -497,7 +497,7 @@ list_start: nla_nest_end(ans_skb, nla_a); switch (doi_def->type) { - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); if (nla_a == NULL) { ret_val = -ENOMEM; diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index 220cb9d06b4..fb3957f1d69 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -45,7 +45,7 @@ * NLBL_CIPSOV4_A_MTYPE * NLBL_CIPSOV4_A_TAGLST * - * If using CIPSO_V4_MAP_STD the following attributes are required: + * If using CIPSO_V4_MAP_TRANS the following attributes are required: * * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST @@ -76,7 +76,7 @@ * NLBL_CIPSOV4_A_MTYPE * NLBL_CIPSOV4_A_TAGLST * - * If using CIPSO_V4_MAP_STD the following attributes are required: + * If using CIPSO_V4_MAP_TRANS the following attributes are required: * * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 78fc557689b..8435b15c3f7 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -157,8 +157,8 @@ cfg_cipsov4_add_map_return: audit_info); if (audit_buf != NULL) { switch (doi_type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; + case CIPSO_V4_MAP_TRANS: + type_str = "trans"; break; case CIPSO_V4_MAP_PASS: type_str = "pass"; -- cgit v1.2.3-70-g09d2 From d91d40799165b0c84c97e7c71fb8039494ff07dc Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 10 Oct 2008 10:16:34 -0400 Subject: netlabel: Add configuration support for local labeling Add the necessary NetLabel support for the new CIPSO mapping, CIPSO_V4_MAP_LOCAL, which allows full LSM label/context support. Signed-off-by: Paul Moore Reviewed-by: James Morris --- include/net/netlabel.h | 3 ++- net/netlabel/netlabel_cipso_v4.c | 41 ++++++++++++++++++++++++++++++++++++++++ net/netlabel/netlabel_cipso_v4.h | 6 ++++-- net/netlabel/netlabel_kapi.c | 3 +++ 4 files changed, 50 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index d56517ac3ba..17c442a4514 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -72,7 +72,8 @@ struct cipso_v4_doi; /* NetLabel NETLINK protocol version * 1: initial version * 2: added static labels for unlabeled connections - * 3: network selectors added to the NetLabel/LSM domain mapping + * 3: network selectors added to the NetLabel/LSM domain mapping and the + * CIPSO_V4_MAP_LOCAL CIPSO mapping was added */ #define NETLBL_PROTO_VERSION 3 diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index db83a67cbc7..fff32b70efa 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -364,6 +364,43 @@ add_pass_failure: return ret_val; } +/** + * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition + * @info: the Generic NETLINK info block + * + * Description: + * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD + * message and add it to the CIPSO V4 engine. Return zero on success and + * non-zero on error. + * + */ +static int netlbl_cipsov4_add_local(struct genl_info *info) +{ + int ret_val; + struct cipso_v4_doi *doi_def = NULL; + + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) + return -EINVAL; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) + return -ENOMEM; + doi_def->type = CIPSO_V4_MAP_LOCAL; + + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) + goto add_local_failure; + + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_local_failure; + return 0; + +add_local_failure: + cipso_v4_doi_free(doi_def); + return ret_val; +} + /** * netlbl_cipsov4_add - Handle an ADD message * @skb: the NETLINK buffer @@ -401,6 +438,10 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) type_str = "pass"; ret_val = netlbl_cipsov4_add_pass(info); break; + case CIPSO_V4_MAP_LOCAL: + type_str = "local"; + ret_val = netlbl_cipsov4_add_local(info); + break; } if (ret_val == 0) atomic_inc(&netlabel_mgmt_protocount); diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index fb3957f1d69..c8a4079261f 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -50,7 +50,8 @@ * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST * - * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes + * are required. * * o REMOVE: * Sent by an application to remove a specific DOI mapping table from the @@ -81,7 +82,8 @@ * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST * - * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes + * are required. * * o LISTALL: * This message is sent by an application to list the valid DOIs on the diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 8435b15c3f7..b32eceb3ab0 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -163,6 +163,9 @@ cfg_cipsov4_add_map_return: case CIPSO_V4_MAP_PASS: type_str = "pass"; break; + case CIPSO_V4_MAP_LOCAL: + type_str = "local"; + break; default: type_str = "(unknown)"; } -- cgit v1.2.3-70-g09d2 From 64f1b65382054f8bfd528f2c4253297c232816eb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 11 Oct 2008 09:46:24 -0700 Subject: net: fix dummy 'nf_conntrack_event_cache()' The dummy version of 'nf_conntrack_event_cache()' (used when the NF_CONNTRACK_EVENTS config option is not enabled) had not been updated when the calling convention changed. This was introduced by commit a71996fccce4b2086a26036aa3c915365ca36926 ("netfilter: netns nf_conntrack: pass conntrack to nf_conntrack_event_cache() not skb") Tssk. Cc: Alexey Dobriyan Cc: Patrick McHardy Cc: David Miller Signed-off-by: Linus Torvalds --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 11480e633a9..1285ff26a01 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -63,7 +63,7 @@ extern void nf_conntrack_ecache_fini(struct net *net); #else /* CONFIG_NF_CONNTRACK_EVENTS */ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, - const struct sk_buff *skb) {} + struct nf_conn *ct) {} static inline void nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} -- cgit v1.2.3-70-g09d2