diff options
Diffstat (limited to 'net')
185 files changed, 2319 insertions, 2051 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index bd93c45778d..de78c9dd713 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -240,10 +240,8 @@ static int unregister_vlan_dev(struct net_device *real_dev, * interlock with HW accelerating devices or SW vlan * input packet processing. */ - if (real_dev->features & - (NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER)) { + if (real_dev->features & NETIF_F_HW_VLAN_FILTER) real_dev->vlan_rx_kill_vid(real_dev, vlan_id); - } vlan_group_set_device(grp, vlan_id, NULL); synchronize_net(); @@ -409,16 +407,14 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, } if ((real_dev->features & NETIF_F_HW_VLAN_RX) && - (real_dev->vlan_rx_register == NULL || - real_dev->vlan_rx_kill_vid == NULL)) { + !real_dev->vlan_rx_register) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); goto out_put_dev; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && - (real_dev->vlan_rx_add_vid == NULL || - real_dev->vlan_rx_kill_vid == NULL)) { + (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", __FUNCTION__, real_dev->name); goto out_put_dev; @@ -740,8 +736,7 @@ static int vlan_ioctl_handler(void __user *arg) case SET_VLAN_NAME_TYPE_CMD: if (!capable(CAP_NET_ADMIN)) return -EPERM; - if ((args.u.name_type >= 0) && - (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { + if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { vlan_name_type = args.u.name_type; err = 0; } else { diff --git a/net/Kconfig b/net/Kconfig index caeacd16656..f3de72978ab 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -218,6 +218,7 @@ config FIB_RULES bool menu "Wireless" + depends on !S390 source "net/wireless/Kconfig" source "net/mac80211/Kconfig" diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index f6a92a0b7aa..fbdfb1224ae 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1844,7 +1844,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { .sendpage = sock_no_sendpage, }; -#include <linux/smp_lock.h> SOCKOPS_WRAP(atalk_dgram, PF_APPLETALK); static struct notifier_block ddp_notifier = { diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig index 43dd86fca4d..2a72aa96a56 100644 --- a/net/ax25/Kconfig +++ b/net/ax25/Kconfig @@ -3,7 +3,7 @@ # menuconfig HAMRADIO - depends on NET + depends on NET && !S390 bool "Amateur Radio support" help If you want to connect your Linux box to an amateur radio, answer Y diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6ded95272a5..429e13a6c6a 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -23,7 +23,6 @@ #include <linux/sched.h> #include <linux/timer.h> #include <linux/string.h> -#include <linux/smp_lock.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/ax25.h> diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 6929490d095..7725da95a76 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -3,7 +3,7 @@ # menuconfig BT - depends on NET + depends on NET && !S390 tristate "Bluetooth subsystem support" help Bluetooth is low-cost, low-power, short-range wireless technology. diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index ab2db55982c..1c8f4a0c5f4 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -37,7 +37,6 @@ #include <linux/init.h> #include <linux/wait.h> #include <linux/errno.h> -#include <linux/smp_lock.h> #include <linux/net.h> #include <net/sock.h> diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index bfc9a35bad3..1dae3dfc66a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -665,7 +665,8 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, node, &hci_sk_list.head) { - lock_sock(sk); + local_bh_disable(); + bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; @@ -674,7 +675,8 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, hci_dev_put(hdev); } - release_sock(sk); + bh_unlock_sock(sk); + local_bh_enable(); } read_unlock(&hci_sk_list.lock); } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index d342e89b8bd..ceadfcf457c 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -174,7 +174,7 @@ static inline int hidp_queue_event(struct hidp_session *session, struct input_de static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { - struct hid_device *hid = dev->private; + struct hid_device *hid = input_get_drvdata(dev); struct hidp_session *session = hid->driver_data; return hidp_queue_event(session, dev, type, code, value); @@ -182,7 +182,7 @@ static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, unsigne static int hidp_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { - struct hidp_session *session = dev->private; + struct hidp_session *session = input_get_drvdata(dev); return hidp_queue_event(session, dev, type, code, value); } @@ -630,7 +630,7 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co struct input_dev *input = session->input; int i; - input->private = session; + input_set_drvdata(input, session); input->name = "Bluetooth HID Boot Protocol Device"; @@ -663,7 +663,7 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co input->relbit[0] |= BIT(REL_WHEEL); } - input->cdev.dev = hidp_get_device(session); + input->dev.parent = hidp_get_device(session); input->event = hidp_input_event; @@ -737,10 +737,8 @@ static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_conn list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list) hidp_send_report(session, report); - if (hidinput_connect(hid) == 0) { + if (hidinput_connect(hid) == 0) hid->claimed |= HID_CLAIMED_INPUT; - hid_ff_init(hid); - } } int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) @@ -864,7 +862,7 @@ failed: if (session->hid) hid_free_device(session->hid); - kfree(session->input); + input_free_device(session->input); kfree(session); return err; } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index a59b1fb63b7..670ff95ca64 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -507,6 +507,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) } /* Default config options */ + pi->conf_len = 0; pi->conf_mtu = L2CAP_DEFAULT_MTU; pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; } @@ -1271,42 +1272,6 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned return len; } -static inline void l2cap_parse_conf_req(struct sock *sk, void *data, int len) -{ - int type, hint, olen; - unsigned long val; - void *ptr = data; - - BT_DBG("sk %p len %d", sk, len); - - while (len >= L2CAP_CONF_OPT_SIZE) { - len -= l2cap_get_conf_opt(&ptr, &type, &olen, &val); - - hint = type & 0x80; - type &= 0x7f; - - switch (type) { - case L2CAP_CONF_MTU: - l2cap_pi(sk)->conf_mtu = val; - break; - - case L2CAP_CONF_FLUSH_TO: - l2cap_pi(sk)->flush_to = val; - break; - - case L2CAP_CONF_QOS: - break; - - default: - if (hint) - break; - - /* FIXME: Reject unknown option */ - break; - } - } -} - static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) { struct l2cap_conf_opt *opt = *ptr; @@ -1358,39 +1323,75 @@ static int l2cap_build_conf_req(struct sock *sk, void *data) return ptr - data; } -static inline int l2cap_conf_output(struct sock *sk, void **ptr) +static int l2cap_parse_conf_req(struct sock *sk, void *data) { struct l2cap_pinfo *pi = l2cap_pi(sk); - int result = 0; + struct l2cap_conf_rsp *rsp = data; + void *ptr = rsp->data; + void *req = pi->conf_req; + int len = pi->conf_len; + int type, hint, olen; + unsigned long val; + u16 result = L2CAP_CONF_SUCCESS; - /* Configure output options and let the other side know - * which ones we don't like. */ - if (pi->conf_mtu < pi->omtu) - result = L2CAP_CONF_UNACCEPT; - else - pi->omtu = pi->conf_mtu; + BT_DBG("sk %p", sk); + + while (len >= L2CAP_CONF_OPT_SIZE) { + len -= l2cap_get_conf_opt(&req, &type, &olen, &val); - l2cap_add_conf_opt(ptr, L2CAP_CONF_MTU, 2, pi->omtu); + hint = type & 0x80; + type &= 0x7f; + + switch (type) { + case L2CAP_CONF_MTU: + pi->conf_mtu = val; + break; + + case L2CAP_CONF_FLUSH_TO: + pi->flush_to = val; + break; + + case L2CAP_CONF_QOS: + break; + + default: + if (hint) + break; + + result = L2CAP_CONF_UNKNOWN; + *((u8 *) ptr++) = type; + break; + } + } + + if (result == L2CAP_CONF_SUCCESS) { + /* Configure output options and let the other side know + * which ones we don't like. */ + + if (pi->conf_mtu < pi->omtu) + result = L2CAP_CONF_UNACCEPT; + else + pi->omtu = pi->conf_mtu; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + } - BT_DBG("sk %p result %d", sk, result); - return result; + rsp->scid = cpu_to_le16(pi->dcid); + rsp->result = cpu_to_le16(result); + rsp->flags = cpu_to_le16(0x0000); + + return ptr - data; } -static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result) +static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 flags) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; - u16 flags = 0; - - BT_DBG("sk %p complete %d", sk, result ? 1 : 0); - if (result) - *result = l2cap_conf_output(sk, &ptr); - else - flags = 0x0001; + BT_DBG("sk %p", sk); rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp->result = cpu_to_le16(result ? *result : 0); + rsp->result = cpu_to_le16(result); rsp->flags = cpu_to_le16(flags); return ptr - data; @@ -1535,7 +1536,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr u16 dcid, flags; u8 rsp[64]; struct sock *sk; - int result; + int len; dcid = __le16_to_cpu(req->dcid); flags = __le16_to_cpu(req->flags); @@ -1548,25 +1549,40 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (sk->sk_state == BT_DISCONN) goto unlock; - l2cap_parse_conf_req(sk, req->data, cmd->len - sizeof(*req)); + /* Reject if config buffer is too small. */ + len = cmd->len - sizeof(*req); + if (l2cap_pi(sk)->conf_len + len > sizeof(l2cap_pi(sk)->conf_req)) { + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, + l2cap_build_conf_rsp(sk, rsp, + L2CAP_CONF_REJECT, flags), rsp); + goto unlock; + } + + /* Store config. */ + memcpy(l2cap_pi(sk)->conf_req + l2cap_pi(sk)->conf_len, req->data, len); + l2cap_pi(sk)->conf_len += len; if (flags & 0x0001) { /* Incomplete config. Send empty response. */ l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, - l2cap_build_conf_rsp(sk, rsp, NULL), rsp); + l2cap_build_conf_rsp(sk, rsp, + L2CAP_CONF_SUCCESS, 0x0001), rsp); goto unlock; } /* Complete config. */ - l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, - l2cap_build_conf_rsp(sk, rsp, &result), rsp); - - if (result) + len = l2cap_parse_conf_req(sk, rsp); + if (len < 0) goto unlock; - /* Output config done */ + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); + + /* Output config done. */ l2cap_pi(sk)->conf_state |= L2CAP_CONF_OUTPUT_DONE; + /* Reset config buffer. */ + l2cap_pi(sk)->conf_len = 0; + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { sk->sk_state = BT_CONNECTED; l2cap_chan_ready(sk); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 91b017016d5..3fc69729381 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -121,6 +121,7 @@ void br_fdb_cleanup(unsigned long _data) { struct net_bridge *br = (struct net_bridge *)_data; unsigned long delay = hold_time(br); + unsigned long next_timer = jiffies + br->forward_delay; int i; spin_lock_bh(&br->hash_lock); @@ -129,14 +130,21 @@ void br_fdb_cleanup(unsigned long _data) struct hlist_node *h, *n; hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { - if (!f->is_static && - time_before_eq(f->ageing_timer + delay, jiffies)) + unsigned long this_timer; + if (f->is_static) + continue; + this_timer = f->ageing_timer + delay; + if (time_before_eq(this_timer, jiffies)) fdb_delete(f); + else if (this_timer < next_timer) + next_timer = this_timer; } } spin_unlock_bh(&br->hash_lock); - mod_timer(&br->gc_timer, jiffies + HZ/10); + /* Add HZ/4 to ensure we round the jiffies upwards to be after the next + * timer, otherwise we might round down and will have no-op run. */ + mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4)); } /* Completely flush all dynamic entries in forwarding database.*/ diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index ebb0861e9bd..e38034aa56f 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -13,7 +13,6 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> -#include <linux/smp_lock.h> #include "br_private.h" #include "br_private_stp.h" @@ -179,7 +178,8 @@ void br_transmit_config(struct net_bridge_port *p) br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; - mod_timer(&p->hold_timer, jiffies + BR_HOLD_TIME); + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); } } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 3e246b37020..a786e786320 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -14,7 +14,6 @@ */ #include <linux/kernel.h> -#include <linux/smp_lock.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 030aa798fea..77f5255e691 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -15,7 +15,6 @@ #include <linux/kernel.h> #include <linux/times.h> -#include <linux/smp_lock.h> #include "br_private.h" #include "br_private_stp.h" @@ -43,7 +42,7 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - mod_timer(&br->hello_timer, jiffies + br->hello_time); + mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } spin_unlock(&br->lock); } diff --git a/net/core/dev.c b/net/core/dev.c index 4317c1be4d3..ee051bb398a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -116,6 +116,7 @@ #include <linux/dmaengine.h> #include <linux/err.h> #include <linux/ctype.h> +#include <linux/if_arp.h> /* * The list of packet types we will receive (as opposed to discard) @@ -217,6 +218,73 @@ extern void netdev_unregister_sysfs(struct net_device *); #define netdev_unregister_sysfs(dev) do { } while(0) #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * according to dev->type + */ +static const unsigned short netdev_lock_type[] = + {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, + ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, + ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, + ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, + ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD, + ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25, + ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP, + ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD, + ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI, + ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE, + ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, + ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, + ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, + ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, + ARPHRD_NONE}; + +static const char *netdev_lock_name[] = + {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", + "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", + "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", + "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", + "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", + "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", + "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", + "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", + "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", + "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", + "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", + "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", + "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", + "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", + "_xmit_NONE"}; + +static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; + +static inline unsigned short netdev_lock_pos(unsigned short dev_type) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++) + if (netdev_lock_type[i] == dev_type) + return i; + /* the last key is used by default */ + return ARRAY_SIZE(netdev_lock_type) - 1; +} + +static inline void netdev_set_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ + int i; + + i = netdev_lock_pos(dev_type); + lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], + netdev_lock_name[i]); +} +#else +static inline void netdev_set_lockdep_class(spinlock_t *lock, + unsigned short dev_type) +{ +} +#endif /******************************************************************************* @@ -1941,6 +2009,7 @@ static void net_rx_action(struct softirq_action *h) } } out: + local_irq_enable(); #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -1954,7 +2023,6 @@ out: rcu_read_unlock(); } #endif - local_irq_enable(); return; softnet_break: @@ -2509,7 +2577,7 @@ unsigned dev_get_flags(const struct net_device *dev) int dev_change_flags(struct net_device *dev, unsigned flags) { - int ret; + int ret, changes; int old_flags = dev->flags; /* @@ -2564,8 +2632,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags) dev_set_allmulti(dev, inc); } - if (old_flags ^ dev->flags) - rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); + /* Exclude state transition flags, already notified */ + changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); + if (changes) + rtmsg_ifinfo(RTM_NEWLINK, dev, changes); return ret; } @@ -3001,6 +3071,7 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); + netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); dev->xmit_lock_owner = -1; spin_lock_init(&dev->ingress_lock); @@ -3245,7 +3316,6 @@ void netdev_run_todo(void) continue; } - netdev_unregister_sysfs(dev); dev->reg_state = NETREG_UNREGISTERED; netdev_wait_allrefs(dev); @@ -3256,11 +3326,11 @@ void netdev_run_todo(void) BUG_TRAP(!dev->ip6_ptr); BUG_TRAP(!dev->dn_ptr); - /* It must be the very last action, - * after this 'dev' may point to freed up memory. - */ if (dev->destructor) dev->destructor(dev); + + /* Free network device */ + kobject_put(&dev->dev.kobj); } out: @@ -3411,6 +3481,9 @@ void unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); + /* Remove entries from sysfs */ + netdev_unregister_sysfs(dev); + /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -3450,7 +3523,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; local_irq_disable(); diff --git a/net/core/dst.c b/net/core/dst.c index 764bccb3d99..c6a05879d58 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -111,13 +111,7 @@ out: spin_unlock(&dst_lock); } -static int dst_discard_in(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - -static int dst_discard_out(struct sk_buff *skb) +static int dst_discard(struct sk_buff *skb) { kfree_skb(skb); return 0; @@ -138,8 +132,7 @@ void * dst_alloc(struct dst_ops * ops) dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; - dst->input = dst_discard_in; - dst->output = dst_discard_out; + dst->input = dst->output = dst_discard; #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -153,8 +146,7 @@ static void ___dst_free(struct dst_entry * dst) protocol module is unloaded. */ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { - dst->input = dst_discard_in; - dst->output = dst_discard_out; + dst->input = dst->output = dst_discard; } dst->obsolete = 2; } @@ -242,8 +234,7 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, return; if (!unregister) { - dst->input = dst_discard_in; - dst->output = dst_discard_out; + dst->input = dst->output = dst_discard; } else { dst->dev = &loopback_dev; dev_hold(&loopback_dev); diff --git a/net/core/flow.c b/net/core/flow.c index 5d25697920b..051430545a0 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -338,7 +338,7 @@ static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index e3c26a9ccad..a5e372b9ec4 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -19,7 +19,6 @@ #include <linux/rtnetlink.h> #include <linux/jiffies.h> #include <linux/spinlock.h> -#include <linux/list.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/bitops.h> @@ -27,8 +26,7 @@ enum lw_bits { - LW_RUNNING = 0, - LW_SE_USED + LW_URGENT = 0, }; static unsigned long linkwatch_flags; @@ -37,17 +35,9 @@ static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); -static LIST_HEAD(lweventlist); +static struct net_device *lweventlist; static DEFINE_SPINLOCK(lweventlist_lock); -struct lw_event { - struct list_head list; - struct net_device *dev; -}; - -/* Avoid kmalloc() for most systems */ -static struct lw_event singleevent; - static unsigned char default_operstate(const struct net_device *dev) { if (!netif_carrier_ok(dev)) @@ -87,25 +77,102 @@ static void rfc2863_policy(struct net_device *dev) } -/* Must be called with the rtnl semaphore held */ -void linkwatch_run_queue(void) +static int linkwatch_urgent_event(struct net_device *dev) { - struct list_head head, *n, *next; + return netif_running(dev) && netif_carrier_ok(dev) && + dev->qdisc != dev->qdisc_sleeping; +} + + +static void linkwatch_add_event(struct net_device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&lweventlist_lock, flags); + dev->link_watch_next = lweventlist; + lweventlist = dev; + spin_unlock_irqrestore(&lweventlist_lock, flags); +} + + +static void linkwatch_schedule_work(int urgent) +{ + unsigned long delay = linkwatch_nextevent - jiffies; + + if (test_bit(LW_URGENT, &linkwatch_flags)) + return; + + /* Minimise down-time: drop delay for up event. */ + if (urgent) { + if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) + return; + delay = 0; + } + + /* If we wrap around we'll delay it by at most HZ. */ + if (delay > HZ) + delay = 0; + + /* + * This is true if we've scheduled it immeditately or if we don't + * need an immediate execution and it's already pending. + */ + if (schedule_delayed_work(&linkwatch_work, delay) == !delay) + return; + + /* Don't bother if there is nothing urgent. */ + if (!test_bit(LW_URGENT, &linkwatch_flags)) + return; + + /* It's already running which is good enough. */ + if (!cancel_delayed_work(&linkwatch_work)) + return; + + /* Otherwise we reschedule it again for immediate exection. */ + schedule_delayed_work(&linkwatch_work, 0); +} + + +static void __linkwatch_run_queue(int urgent_only) +{ + struct net_device *next; + + /* + * Limit the number of linkwatch events to one + * per second so that a runaway driver does not + * cause a storm of messages on the netlink + * socket. This limit does not apply to up events + * while the device qdisc is down. + */ + if (!urgent_only) + linkwatch_nextevent = jiffies + HZ; + /* Limit wrap-around effect on delay. */ + else if (time_after(linkwatch_nextevent, jiffies + HZ)) + linkwatch_nextevent = jiffies; + + clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); - list_replace_init(&lweventlist, &head); + next = lweventlist; + lweventlist = NULL; spin_unlock_irq(&lweventlist_lock); - list_for_each_safe(n, next, &head) { - struct lw_event *event = list_entry(n, struct lw_event, list); - struct net_device *dev = event->dev; + while (next) { + struct net_device *dev = next; - if (event == &singleevent) { - clear_bit(LW_SE_USED, &linkwatch_flags); - } else { - kfree(event); + next = dev->link_watch_next; + + if (urgent_only && !linkwatch_urgent_event(dev)) { + linkwatch_add_event(dev); + continue; } + /* + * Make sure the above read is complete since it can be + * rewritten as soon as we clear the bit below. + */ + smp_mb__before_clear_bit(); + /* We are about to handle this device, * so new events can be accepted */ @@ -124,58 +191,39 @@ void linkwatch_run_queue(void) dev_put(dev); } + + if (lweventlist) + linkwatch_schedule_work(0); } -static void linkwatch_event(struct work_struct *dummy) +/* Must be called with the rtnl semaphore held */ +void linkwatch_run_queue(void) { - /* Limit the number of linkwatch events to one - * per second so that a runaway driver does not - * cause a storm of messages on the netlink - * socket - */ - linkwatch_nextevent = jiffies + HZ; - clear_bit(LW_RUNNING, &linkwatch_flags); + __linkwatch_run_queue(0); +} + +static void linkwatch_event(struct work_struct *dummy) +{ rtnl_lock(); - linkwatch_run_queue(); + __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } void linkwatch_fire_event(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - unsigned long flags; - struct lw_event *event; - - if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) { - event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC); - - if (unlikely(event == NULL)) { - clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - return; - } - } else { - event = &singleevent; - } + int urgent = linkwatch_urgent_event(dev); + if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); - event->dev = dev; - - spin_lock_irqsave(&lweventlist_lock, flags); - list_add_tail(&event->list, &lweventlist); - spin_unlock_irqrestore(&lweventlist_lock, flags); - if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) { - unsigned long delay = linkwatch_nextevent - jiffies; + linkwatch_add_event(dev); + } else if (!urgent) + return; - /* If we wrap around we'll delay it by at most HZ. */ - if (delay > HZ) - delay = 0; - schedule_delayed_work(&linkwatch_work, delay); - } - } + linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6f3bb73053c..9df26a07f06 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1761,7 +1761,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, return NULL; } -static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { +static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { [NDTA_NAME] = { .type = NLA_STRING }, [NDTA_THRESH1] = { .type = NLA_U32 }, [NDTA_THRESH2] = { .type = NLA_U32 }, @@ -1770,7 +1770,7 @@ static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { [NDTA_PARMS] = { .type = NLA_NESTED }, }; -static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { +static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_IFINDEX] = { .type = NLA_U32 }, [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b21307b15b8..5c19b0646d7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -456,9 +456,15 @@ static struct class net_class = { #endif }; +/* Delete sysfs entries but hold kobject reference until after all + * netdev references are gone. + */ void netdev_unregister_sysfs(struct net_device * net) { - device_del(&(net->dev)); + struct device *dev = &(net->dev); + + kobject_get(&dev->kobj); + device_del(dev); } /* Create sysfs entries for network device. */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b316435b0e2..cf40ff91ac0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -9,7 +9,6 @@ * Copyright (C) 2002 Red Hat, Inc. */ -#include <linux/smp_lock.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> @@ -73,7 +72,8 @@ static void queue_process(struct work_struct *work) netif_tx_unlock(dev); local_irq_restore(flags); - schedule_delayed_work(&npinfo->tx_work, HZ/10); + if (atomic_read(&npinfo->refcnt)) + schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } netif_tx_unlock(dev); @@ -251,22 +251,23 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) unsigned long flags; local_irq_save(flags); - if (netif_tx_trylock(dev)) { - /* try until next clock tick */ - for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; - tries > 0; --tries) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (netif_tx_trylock(dev)) { if (!netif_queue_stopped(dev)) status = dev->hard_start_xmit(skb, dev); + netif_tx_unlock(dev); if (status == NETDEV_TX_OK) break; - /* tickle device maybe there is some cleanup */ - netpoll_poll(np); - - udelay(USEC_PER_POLL); } - netif_tx_unlock(dev); + + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); + + udelay(USEC_PER_POLL); } local_irq_restore(flags); } @@ -785,9 +786,15 @@ void netpoll_cleanup(struct netpoll *np) if (atomic_dec_and_test(&npinfo->refcnt)) { skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); - cancel_rearming_delayed_work(&npinfo->tx_work); + cancel_delayed_work(&npinfo->tx_work); flush_scheduled_work(); + /* clean after last, unfinished work */ + if (!skb_queue_empty(&npinfo->txq)) { + struct sk_buff *skb; + skb = __skb_dequeue(&npinfo->txq); + kfree_skb(skb); + } kfree(npinfo); } } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b92a322872a..9cd3a1cb60e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -117,7 +117,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kernel.h> -#include <linux/smp_lock.h> #include <linux/mutex.h> #include <linux/sched.h> #include <linux/slab.h> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8c971a2efe2..02e8bf08427 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -437,7 +437,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; -static inline size_t if_nlmsg_size(int iwbuflen) +static inline size_t if_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -452,13 +452,12 @@ static inline size_t if_nlmsg_size(int iwbuflen) + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1) /* IFLA_LINKMODE */ - + nla_total_size(iwbuflen); + + nla_total_size(1); /* IFLA_LINKMODE */ } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, - void *iwbuf, int iwbuflen, int type, u32 pid, - u32 seq, u32 change, unsigned int flags) + int type, u32 pid, u32 seq, u32 change, + unsigned int flags) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -523,9 +522,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } } - if (iwbuf) - NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf); - return nlmsg_end(skb, nlh); nla_put_failure: @@ -543,7 +539,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for_each_netdev(dev) { if (idx < s_idx) goto cont; - if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) break; @@ -555,7 +551,7 @@ cont: return skb->len; } -static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { +static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, @@ -584,7 +580,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); - if (ifm->ifi_index >= 0) + if (ifm->ifi_index > 0) dev = dev_get_by_index(ifm->ifi_index); else if (tb[IFLA_IFNAME]) dev = dev_get_by_name(ifname); @@ -676,7 +672,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) * name provided implies that a name change has been * requested. */ - if (ifm->ifi_index >= 0 && ifname[0]) { + if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) goto errout_dev; @@ -689,8 +685,15 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } - if (ifm->ifi_flags) - dev_change_flags(dev, ifm->ifi_flags); + if (ifm->ifi_flags || ifm->ifi_change) { + unsigned int flags = ifm->ifi_flags; + + /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ + if (ifm->ifi_change) + flags = (flags & ifm->ifi_change) | + (dev->flags & ~ifm->ifi_change); + dev_change_flags(dev, flags); + } if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); @@ -730,8 +733,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; - char *iw_buf = NULL, *iw = NULL; - int iw_buf_len = 0; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); @@ -739,21 +740,21 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; ifm = nlmsg_data(nlh); - if (ifm->ifi_index >= 0) { + if (ifm->ifi_index > 0) { dev = dev_get_by_index(ifm->ifi_index); if (dev == NULL) return -ENODEV; } else return -EINVAL; - nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; } - err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, - NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); + err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -762,7 +763,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: - kfree(iw_buf); dev_put(dev); return err; @@ -797,11 +797,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 142257307fa..27cfe5fe4bb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -434,8 +434,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(iif); #endif - skb_copy_secmark(n, skb); #endif + skb_copy_secmark(n, skb); C(truesize); atomic_set(&n->users, 1); C(head); @@ -644,11 +644,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. */ - memcpy(data + nhead, skb->head, #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->tail); + memcpy(data + nhead, skb->head, skb->tail); #else - skb->tail - skb->head); + memcpy(data + nhead, skb->head, skb->tail - skb->head); #endif memcpy(data + size, skb_end_pointer(skb), sizeof(struct skb_shared_info)); @@ -1707,6 +1706,11 @@ next_skb: st->stepped_offset += frag->size; } + if (st->frag_data) { + kunmap_skb_frag(st->frag_data); + st->frag_data = NULL; + } + if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; diff --git a/net/core/sock.c b/net/core/sock.c index 22183c2ef28..c14ce0198d2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -206,7 +206,19 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) return -EINVAL; if (copy_from_user(&tv, optval, sizeof(tv))) return -EFAULT; - + if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) + return -EDOM; + + if (tv.tv_sec < 0) { + static int warned = 0; + *timeo_p = 0; + if (warned < 10 && net_ratelimit()) + warned++; + printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " + "tries to set negative timeout\n", + current->comm, current->pid); + return 0; + } *timeo_p = MAX_SCHEDULE_TIMEOUT; if (tv.tv_sec == 0 && tv.tv_usec == 0) return 0; @@ -986,7 +998,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) __sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; if (sk->sk_route_caps & NETIF_F_GSO) - sk->sk_route_caps |= NETIF_F_GSO_MASK; + sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (sk_can_gso(sk)) { if (dst->header_len) sk->sk_route_caps &= ~NETIF_F_GSO_MASK; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b29712033dd..6d5ea976204 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,6 +24,8 @@ extern int sysctl_core_destroy_delay; #ifdef CONFIG_XFRM extern u32 sysctl_xfrm_aevent_etime; extern u32 sysctl_xfrm_aevent_rseqth; +extern int sysctl_xfrm_larval_drop; +extern u32 sysctl_xfrm_acq_expires; #endif ctl_table core_table[] = { @@ -118,6 +120,22 @@ ctl_table core_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_larval_drop", + .data = &sysctl_xfrm_larval_drop, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_acq_expires", + .data = &sysctl_xfrm_acq_expires, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { diff --git a/net/core/utils.c b/net/core/utils.c index adecfd281ae..2030bb8c2d3 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -139,16 +139,16 @@ int in4_pton(const char *src, int srclen, while(1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); - if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) { + if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) { goto out; } - if (c & (IN6PTON_DOT | IN6PTON_DELIM)) { + if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (w == 0) goto out; *d++ = w & 0xff; w = 0; i++; - if (c & IN6PTON_DELIM) { + if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (i != 4) goto out; break; diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index b8a68dd4100..0549e4719b1 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -1,8 +1,6 @@ -menu "DCCP Configuration (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - -config IP_DCCP +menuconfig IP_DCCP tristate "The DCCP Protocol (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL ---help--- Datagram Congestion Control Protocol (RFC 4340) @@ -19,19 +17,20 @@ config IP_DCCP If in doubt, say N. +if IP_DCCP + config INET_DCCP_DIAG - depends on IP_DCCP && INET_DIAG + depends on INET_DIAG def_tristate y if (IP_DCCP = y && INET_DIAG = y) def_tristate m config IP_DCCP_ACKVEC - depends on IP_DCCP bool source "net/dccp/ccids/Kconfig" menu "DCCP Kernel Hacking" - depends on IP_DCCP && DEBUG_KERNEL=y + depends on DEBUG_KERNEL=y config IP_DCCP_DEBUG bool "DCCP debug messages" @@ -61,4 +60,4 @@ config NET_DCCPPROBE endmenu -endmenu +endif # IP_DDCP diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d7d9ce73724..ec7fa4d67f0 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -419,7 +419,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; @@ -491,7 +490,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("%s(%p), s=%u, MSS=%u, " "R_sample=%uus, X=%u\n", dccp_role(sk), sk, hctx->ccid3hctx_s, - dp->dccps_mss_cache, r_sample, + dccp_sk(sk)->dccps_mss_cache, r_sample, (unsigned)(hctx->ccid3hctx_x >> 6)); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 64eac2515aa..31737cdf156 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1043,9 +1043,13 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 1); - if (err < 0) - goto failure; + err = __xfrm_lookup(&dst, &fl, sk, 1); + if (err < 0) { + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, &fl); + if (err < 0) + goto failure; + } if (saddr == NULL) { saddr = &fl.fl6_src; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 1f5e3ba6206..43a3adb027e 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -128,7 +128,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf, int error = 0, cnt = 0; unsigned char *tbuf; - if (!buf || len < 0) + if (!buf) return -EINVAL; if (len == 0) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9fbe87c9380..bfa910b6ad2 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1839,7 +1839,7 @@ static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *que } /* - * The DECnet spec requires the the "routing layer" accepts packets which + * The DECnet spec requires that the "routing layer" accepts packets which * are at least 230 bytes in size. This excludes any headers which the NSP * layer might add, so we always assume that we'll be using the maximal * length header on data packets. The variation in length is due to the diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 764a56a13e3..ab41c1879fd 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -638,7 +638,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex) return dn_dev; } -static struct nla_policy dn_ifa_policy[IFA_MAX+1] __read_mostly = { +static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .type = NLA_U16 }, [IFA_LOCAL] = { .type = NLA_U16 }, [IFA_LABEL] = { .type = NLA_STRING, diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 17a1932216d..84ff3dd3707 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -108,7 +108,7 @@ errout: return err; } -static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { +static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, }; diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c index 305a09de85a..960ad13f5e9 100644 --- a/net/ieee80211/ieee80211_geo.c +++ b/net/ieee80211/ieee80211_geo.c @@ -94,6 +94,21 @@ int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel) return -1; } +u32 ieee80211_channel_to_freq(struct ieee80211_device * ieee, u8 channel) +{ + const struct ieee80211_channel * ch; + + /* Driver needs to initialize the geography map before using + * these helper functions */ + if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0) + return 0; + + ch = ieee80211_get_channel(ieee, channel); + if (!ch->channel) + return 0; + return ch->freq; +} + u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq) { int i; @@ -174,6 +189,7 @@ EXPORT_SYMBOL(ieee80211_get_channel); EXPORT_SYMBOL(ieee80211_get_channel_flags); EXPORT_SYMBOL(ieee80211_is_valid_channel); EXPORT_SYMBOL(ieee80211_freq_to_channel); +EXPORT_SYMBOL(ieee80211_channel_to_freq); EXPORT_SYMBOL(ieee80211_channel_to_index); EXPORT_SYMBOL(ieee80211_set_geo); EXPORT_SYMBOL(ieee80211_get_geo); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 7ec6610841b..17ad278696e 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -140,7 +140,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) dev = alloc_etherdev(sizeof(struct ieee80211_device) + sizeof_priv); if (!dev) { - IEEE80211_ERROR("Unable to network device.\n"); + IEEE80211_ERROR("Unable to allocate network device.\n"); goto failed; } ieee = netdev_priv(dev); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index cee5e13bc42..523a137d49d 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -89,15 +89,17 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN); } - /* Add frequency/channel */ + /* Add channel and frequency */ iwe.cmd = SIOCGIWFREQ; -/* iwe.u.freq.m = ieee80211_frequency(network->channel, network->mode); - iwe.u.freq.e = 3; */ iwe.u.freq.m = network->channel; iwe.u.freq.e = 0; iwe.u.freq.i = 0; start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); + iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel); + iwe.u.freq.e = 6; + start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN); + /* Add encryption capability */ iwe.cmd = SIOCGIWENCODE; if (network->capability & WLAN_CAPABILITY_PRIVACY) diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index e9cdc6615dd..c308756c2f9 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -33,7 +33,10 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) struct ieee80211softmac_device *softmac; struct net_device *dev; - dev = alloc_ieee80211(sizeof(struct ieee80211softmac_device) + sizeof_priv); + dev = alloc_ieee80211(sizeof(*softmac) + sizeof_priv); + if (!dev) + return NULL; + softmac = ieee80211_priv(dev); softmac->dev = dev; softmac->ieee = netdev_priv(dev); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e62aee0ec4c..010fbb2d45e 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -43,11 +43,11 @@ config IP_ADVANCED_ROUTER asymmetric routing (packets from you to a host take a different path than packets from that host to you) or if you operate a non-routing host which has several IP addresses on different interfaces. To turn - rp_filter off use: + rp_filter on use: - echo 0 > /proc/sys/net/ipv4/conf/<device>/rp_filter + echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter or - echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter + echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter If unsure, say N here. @@ -130,7 +130,7 @@ config IP_ROUTE_MULTIPATH_RR tristate "MULTIPATH: round robin algorithm" depends on IP_ROUTE_MULTIPATH_CACHED help - Mulitpath routes are chosen according to Round Robin + Multipath routes are chosen according to Round Robin config IP_ROUTE_MULTIPATH_RANDOM tristate "MULTIPATH: random algorithm" @@ -577,6 +577,7 @@ config TCP_CONG_VENO config TCP_CONG_YEAH tristate "YeAH TCP" depends on EXPERIMENTAL + select TCP_CONG_VEGAS default n ---help--- YeAH-TCP is a sender-side high-speed enabled TCP congestion control @@ -651,7 +652,7 @@ config TCP_MD5SIG select CRYPTO select CRYPTO_MD5 ---help--- - RFC2385 specifices a method of giving MD5 protection to TCP sessions. + RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers on the Internet. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 16aae8ef555..041fba3fa0a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -92,7 +92,6 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/igmp.h> #include <linux/inetdevice.h> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7110779a024..e00767e8ebd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -877,7 +877,7 @@ static int arp_process(struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (ipv4_devconf.arp_accept) { + if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) @@ -987,11 +987,11 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev) return 0; } if (dev == NULL) { - ipv4_devconf.proxy_arp = 1; + IPV4_DEVCONF_ALL(PROXY_ARP) = 1; return 0; } if (__in_dev_get_rtnl(dev)) { - __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1; + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1); return 0; } return -ENXIO; @@ -1093,11 +1093,12 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev) return pneigh_delete(&arp_tbl, &ip, dev); if (mask == 0) { if (dev == NULL) { - ipv4_devconf.proxy_arp = 0; + IPV4_DEVCONF_ALL(PROXY_ARP) = 0; return 0; } if (__in_dev_get_rtnl(dev)) { - __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0; + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), + PROXY_ARP, 0); return 0; } return -ENXIO; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e1f18489db1..ab56a052ce3 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -45,6 +45,7 @@ #include <net/cipso_ipv4.h> #include <asm/atomic.h> #include <asm/bug.h> +#include <asm/unaligned.h> struct cipso_v4_domhsh_entry { char *domain; @@ -629,7 +630,7 @@ doi_walk_return: * @domain: the domain to add * * Description: - * Adds the @domain to the the DOI specified by @doi_def, this function + * Adds the @domain to the DOI specified by @doi_def, this function * should only be called by external functions (i.e. NetLabel). This function * does allocate memory. Returns zero on success, negative values on failure. * @@ -1000,7 +1001,7 @@ static int cipso_v4_map_cat_enum_valid(const struct cipso_v4_doi *doi_def, return -EFAULT; for (iter = 0; iter < enumcat_len; iter += 2) { - cat = ntohs(*((__be16 *)&enumcat[iter])); + cat = ntohs(get_unaligned((__be16 *)&enumcat[iter])); if (cat <= cat_prev) return -EFAULT; cat_prev = cat; @@ -1068,8 +1069,8 @@ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def, for (iter = 0; iter < net_cat_len; iter += 2) { ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, - ntohs(*((__be16 *)&net_cat[iter])), - GFP_ATOMIC); + ntohs(get_unaligned((__be16 *)&net_cat[iter])), + GFP_ATOMIC); if (ret_val != 0) return ret_val; } @@ -1102,9 +1103,10 @@ static int cipso_v4_map_cat_rng_valid(const struct cipso_v4_doi *doi_def, return -EFAULT; for (iter = 0; iter < rngcat_len; iter += 4) { - cat_high = ntohs(*((__be16 *)&rngcat[iter])); + cat_high = ntohs(get_unaligned((__be16 *)&rngcat[iter])); if ((iter + 4) <= rngcat_len) - cat_low = ntohs(*((__be16 *)&rngcat[iter + 2])); + cat_low = ntohs( + get_unaligned((__be16 *)&rngcat[iter + 2])); else cat_low = 0; @@ -1201,9 +1203,10 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, u16 cat_high; for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) { - cat_high = ntohs(*((__be16 *)&net_cat[net_iter])); + cat_high = ntohs(get_unaligned((__be16 *)&net_cat[net_iter])); if ((net_iter + 4) <= net_cat_len) - cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2])); + cat_low = ntohs( + get_unaligned((__be16 *)&net_cat[net_iter + 2])); else cat_low = 0; @@ -1565,7 +1568,7 @@ int cipso_v4_validate(unsigned char **option) } rcu_read_lock(); - doi_def = cipso_v4_doi_search(ntohl(*((__be32 *)&opt[2]))); + doi_def = cipso_v4_doi_search(ntohl(get_unaligned((__be32 *)&opt[2]))); if (doi_def == NULL) { err_offset = 2; goto validate_return_locked; @@ -1709,22 +1712,22 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) } /** - * cipso_v4_socket_setattr - Add a CIPSO option to a socket - * @sock: the socket + * cipso_v4_sock_setattr - Add a CIPSO option to a socket + * @sk: the socket * @doi_def: the CIPSO DOI to use * @secattr: the specific security attributes of the socket * * Description: * Set the CIPSO option on the given socket using the DOI definition and * security attributes passed to the function. This function requires - * exclusive access to @sock->sk, which means it either needs to be in the - * process of being created or locked via lock_sock(sock->sk). Returns zero on - * success and negative values on failure. + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. * */ -int cipso_v4_socket_setattr(const struct socket *sock, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) +int cipso_v4_sock_setattr(struct sock *sk, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; u32 iter; @@ -1732,7 +1735,6 @@ int cipso_v4_socket_setattr(const struct socket *sock, u32 buf_len = 0; u32 opt_len; struct ip_options *opt = NULL; - struct sock *sk; struct inet_sock *sk_inet; struct inet_connection_sock *sk_conn; @@ -1740,7 +1742,6 @@ int cipso_v4_socket_setattr(const struct socket *sock, * defined yet but it is not a problem as the only users of these * "lite" PF_INET sockets are functions which do an accept() call * afterwards so we will label the socket as part of the accept(). */ - sk = sock->sk; if (sk == NULL) return 0; @@ -1858,7 +1859,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) if (ret_val == 0) return ret_val; - doi = ntohl(*(__be32 *)&cipso_ptr[2]); + doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2])); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); if (doi_def == NULL) { @@ -1892,29 +1893,6 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** - * cipso_v4_socket_getattr - Get the security attributes from a socket - * @sock: the socket - * @secattr: the security attributes - * - * Description: - * Query @sock to see if there is a CIPSO option attached to the socket and if - * there is return the CIPSO security attributes in @secattr. Returns zero on - * success and negative values on failure. - * - */ -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val; - - lock_sock(sock->sk); - ret_val = cipso_v4_sock_getattr(sock->sk, secattr); - release_sock(sock->sk); - - return ret_val; -} - -/** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet * @secattr: the security attributes @@ -1936,7 +1914,7 @@ int cipso_v4_skbuff_getattr(const struct sk_buff *skb, if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) return 0; - doi = ntohl(*(__be32 *)&cipso_ptr[2]); + doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2])); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); if (doi_def == NULL) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index dd02a45d0f6..0301dd468cf 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -50,8 +50,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) RT_CONN_FLAGS(sk), oif, sk->sk_protocol, inet->sport, usin->sin_port, sk, 1); - if (err) + if (err) { + if (err == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return err; + } + if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { ip_rt_put(rt); return -EACCES; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7f95e6e9bee..abf6352f990 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,21 +64,27 @@ #include <net/rtnetlink.h> struct ipv4_devconf ipv4_devconf = { - .accept_redirects = 1, - .send_redirects = 1, - .secure_redirects = 1, - .shared_media = 1, + .data = { + [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, + }, }; static struct ipv4_devconf ipv4_devconf_dflt = { - .accept_redirects = 1, - .send_redirects = 1, - .secure_redirects = 1, - .shared_media = 1, - .accept_source_route = 1, + .data = { + [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1, + [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1, + }, }; -static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { +#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) + +static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, @@ -141,7 +147,7 @@ void in_dev_finish_destroy(struct in_device *idev) } } -struct in_device *inetdev_init(struct net_device *dev) +static struct in_device *inetdev_init(struct net_device *dev) { struct in_device *in_dev; @@ -321,12 +327,8 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } - if (destroy) { + if (destroy) inet_free_ifa(ifa1); - - if (!in_dev->ifa_list) - inetdev_destroy(in_dev); - } } static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, @@ -399,12 +401,10 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) ASSERT_RTNL(); if (!in_dev) { - in_dev = inetdev_init(dev); - if (!in_dev) { - inet_free_ifa(ifa); - return -ENOBUFS; - } + inet_free_ifa(ifa); + return -ENOBUFS; } + ipv4_devconf_setall(in_dev); if (ifa->ifa_dev != in_dev) { BUG_TRAP(!ifa->ifa_dev); in_dev_hold(in_dev); @@ -514,13 +514,12 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) in_dev = __in_dev_get_rtnl(dev); if (in_dev == NULL) { - in_dev = inetdev_init(dev); - if (in_dev == NULL) { - err = -ENOBUFS; - goto errout; - } + err = -ENOBUFS; + goto errout; } + ipv4_devconf_setall(in_dev); + ifa = inet_alloc_ifa(); if (ifa == NULL) { /* @@ -1057,11 +1056,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); - if (!in_dev) - panic("devinet: Failed to create loopback\n"); if (dev == &loopback_dev) { - in_dev->cnf.no_xfrm = 1; - in_dev->cnf.no_policy = 1; + if (!in_dev) + panic("devinet: " + "Failed to create loopback\n"); + IN_DEV_CONF_SET(in_dev, NOXFRM, 1); + IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } } goto out; @@ -1237,13 +1237,98 @@ errout: #ifdef CONFIG_SYSCTL +static void devinet_copy_dflt_conf(int i) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + for_each_netdev(dev) { + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev && !test_bit(i, in_dev->cnf.state)) + in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; + rcu_read_unlock(); + } + read_unlock(&dev_base_lock); +} + +static int devinet_conf_proc(ctl_table *ctl, int write, + struct file* filp, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + + if (write) { + struct ipv4_devconf *cnf = ctl->extra1; + int i = (int *)ctl->data - cnf->data; + + set_bit(i, cnf->state); + + if (cnf == &ipv4_devconf_dflt) + devinet_copy_dflt_conf(i); + } + + return ret; +} + +static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + struct ipv4_devconf *cnf; + int *valp = table->data; + int new; + int i; + + if (!newval || !newlen) + return 0; + + if (newlen != sizeof(int)) + return -EINVAL; + + if (get_user(new, (int __user *)newval)) + return -EFAULT; + + if (new == *valp) + return 0; + + if (oldval && oldlenp) { + size_t len; + + if (get_user(len, oldlenp)) + return -EFAULT; + + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, valp, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } + + *valp = new; + + cnf = table->extra1; + i = (int *)table->data - cnf->data; + + set_bit(i, cnf->state); + + if (cnf == &ipv4_devconf_dflt) + devinet_copy_dflt_conf(i); + + return 1; +} + void inet_forward_change(void) { struct net_device *dev; - int on = ipv4_devconf.forwarding; + int on = IPV4_DEVCONF_ALL(FORWARDING); - ipv4_devconf.accept_redirects = !on; - ipv4_devconf_dflt.forwarding = on; + IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on; + IPV4_DEVCONF_DFLT(FORWARDING) = on; read_lock(&dev_base_lock); for_each_netdev(dev) { @@ -1251,7 +1336,7 @@ void inet_forward_change(void) rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) - in_dev->cnf.forwarding = on; + IN_DEV_CONF_SET(in_dev, FORWARDING, on); rcu_read_unlock(); } read_unlock(&dev_base_lock); @@ -1268,9 +1353,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); if (write && *valp != val) { - if (valp == &ipv4_devconf.forwarding) + if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) inet_forward_change(); - else if (valp != &ipv4_devconf_dflt.forwarding) + else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) rt_cache_flush(0); } @@ -1295,42 +1380,43 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - int *valp = table->data; - int new; + int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, + newval, newlen); - if (!newval || !newlen) - return 0; + if (ret == 1) + rt_cache_flush(0); - if (newlen != sizeof(int)) - return -EINVAL; + return ret; +} - if (get_user(new, (int __user *)newval)) - return -EFAULT; - if (new == *valp) - return 0; +#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \ + { \ + .ctl_name = NET_IPV4_CONF_ ## attr, \ + .procname = name, \ + .data = ipv4_devconf.data + \ + NET_IPV4_CONF_ ## attr - 1, \ + .maxlen = sizeof(int), \ + .mode = mval, \ + .proc_handler = proc, \ + .strategy = sysctl, \ + .extra1 = &ipv4_devconf, \ + } - if (oldval && oldlenp) { - size_t len; +#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \ + devinet_conf_sysctl) - if (get_user(len, oldlenp)) - return -EFAULT; +#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \ + devinet_conf_sysctl) - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - *valp = new; - rt_cache_flush(0); - return 1; -} +#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl) +#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ + DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \ + ipv4_doint_and_flush_strategy) static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; @@ -1341,178 +1427,34 @@ static struct devinet_sysctl_table { ctl_table devinet_root_dir[2]; } devinet_sysctl = { .devinet_vars = { - { - .ctl_name = NET_IPV4_CONF_FORWARDING, - .procname = "forwarding", - .data = &ipv4_devconf.forwarding, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &devinet_sysctl_forward, - }, - { - .ctl_name = NET_IPV4_CONF_MC_FORWARDING, - .procname = "mc_forwarding", - .data = &ipv4_devconf.mc_forwarding, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS, - .procname = "accept_redirects", - .data = &ipv4_devconf.accept_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS, - .procname = "secure_redirects", - .data = &ipv4_devconf.secure_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_SHARED_MEDIA, - .procname = "shared_media", - .data = &ipv4_devconf.shared_media, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_RP_FILTER, - .procname = "rp_filter", - .data = &ipv4_devconf.rp_filter, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS, - .procname = "send_redirects", - .data = &ipv4_devconf.send_redirects, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, - .procname = "accept_source_route", - .data = &ipv4_devconf.accept_source_route, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_PROXY_ARP, - .procname = "proxy_arp", - .data = &ipv4_devconf.proxy_arp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_MEDIUM_ID, - .procname = "medium_id", - .data = &ipv4_devconf.medium_id, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_BOOTP_RELAY, - .procname = "bootp_relay", - .data = &ipv4_devconf.bootp_relay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_LOG_MARTIANS, - .procname = "log_martians", - .data = &ipv4_devconf.log_martians, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_TAG, - .procname = "tag", - .data = &ipv4_devconf.tag, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARPFILTER, - .procname = "arp_filter", - .data = &ipv4_devconf.arp_filter, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE, - .procname = "arp_announce", - .data = &ipv4_devconf.arp_announce, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARP_IGNORE, - .procname = "arp_ignore", - .data = &ipv4_devconf.arp_ignore, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_ARP_ACCEPT, - .procname = "arp_accept", - .data = &ipv4_devconf.arp_accept, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_CONF_NOXFRM, - .procname = "disable_xfrm", - .data = &ipv4_devconf.no_xfrm, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_CONF_NOPOLICY, - .procname = "disable_policy", - .data = &ipv4_devconf.no_policy, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION, - .procname = "force_igmp_version", - .data = &ipv4_devconf.force_igmp_version, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES, - .procname = "promote_secondaries", - .data = &ipv4_devconf.promote_secondaries, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, + DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", + devinet_sysctl_forward, + devinet_conf_sysctl), + DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), + + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), + DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), + DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), + DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), + DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, + "accept_source_route"), + DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), + DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), + DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), + DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), + DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), + DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), + DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), + DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), + DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), + + DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), + DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), + DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, + "force_igmp_version"), + DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, + "promote_secondaries"), }, .devinet_dev = { { @@ -1561,6 +1503,7 @@ static void devinet_sysctl_register(struct in_device *in_dev, return; for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; + t->devinet_vars[i].extra1 = p; } if (dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 837f2957fa8..311d633f7f3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -250,8 +250,6 @@ e_inval: return -EINVAL; } -#ifndef CONFIG_IP_NOSIOCRT - static inline __be32 sk_extract_addr(struct sockaddr *addr) { return ((struct sockaddr_in *) addr)->sin_addr.s_addr; @@ -443,16 +441,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg) return -EINVAL; } -#else - -int ip_rt_ioctl(unsigned int cmd, void *arg) -{ - return -EINVAL; -} - -#endif - -struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { +const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_DST] = { .type = NLA_U32 }, [RTA_SRC] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 9cfecf1215c..07e843a47dd 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -456,6 +456,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(-1); + rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, + &cfg->fc_nlinfo, NLM_F_REPLACE); return 0; } @@ -523,7 +525,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) rt_cache_flush(-1); rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, - &cfg->fc_nlinfo); + &cfg->fc_nlinfo, 0); return 0; out_free_new_fa: @@ -589,7 +591,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) fa = fa_to_delete; rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo); + tb->tb_id, &cfg->fc_nlinfo, 0); kill_fn = 0; write_lock_bh(&fib_hash_lock); diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 0e8b70bad4e..eef9eec17e0 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -30,7 +30,8 @@ extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info); + int dst_len, u32 tb_id, struct nl_info *info, + unsigned int nlm_flags); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 33083ad52e9..2a947840210 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -169,7 +169,7 @@ static struct fib_table *fib_empty_table(void) return NULL; } -static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { +static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 406ea7050ae..bb94550d95c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -301,7 +301,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) } void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info) + int dst_len, u32 tb_id, struct nl_info *info, + unsigned int nlm_flags) { struct sk_buff *skb; u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; @@ -313,7 +314,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, err = fib_dump_info(skb, info->pid, seq, event, tb_id, fa->fa_type, fa->fa_scope, key, dst_len, - fa->fa_tos, fa->fa_info, 0); + fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9be7da7c3a8..30e332ade61 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1226,6 +1226,8 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(-1); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, + tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); goto succeeded; } @@ -1278,7 +1280,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) rt_cache_flush(-1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, - &cfg->fc_nlinfo); + &cfg->fc_nlinfo, 0); succeeded: return 0; @@ -1624,7 +1626,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) fa = fa_to_delete; rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, - &cfg->fc_nlinfo); + &cfg->fc_nlinfo, 0); l = fib_find_node(t, key); li = find_leaf_info(l, plen); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d38cbba92a4..02a899bec19 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -514,9 +514,15 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) saddr = iph->daddr; if (!(rt->rt_flags & RTCF_LOCAL)) { - if (sysctl_icmp_errors_use_inbound_ifaddr) - saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK); - else + struct net_device *dev = NULL; + + if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) + dev = dev_get_by_index(rt->fl.iif); + + if (dev) { + saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); + dev_put(dev); + } else saddr = 0; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f4dd4745310..a646409c2d0 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -128,14 +128,16 @@ * contradict to specs provided this delay is small enough. */ -#define IGMP_V1_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 1 || \ - (in_dev)->cnf.force_igmp_version == 1 || \ - ((in_dev)->mr_v1_seen && \ - time_before(jiffies, (in_dev)->mr_v1_seen))) -#define IGMP_V2_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 2 || \ - (in_dev)->cnf.force_igmp_version == 2 || \ - ((in_dev)->mr_v2_seen && \ - time_before(jiffies, (in_dev)->mr_v2_seen))) +#define IGMP_V1_SEEN(in_dev) \ + (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ + ((in_dev)->mr_v1_seen && \ + time_before(jiffies, (in_dev)->mr_v1_seen))) +#define IGMP_V2_SEEN(in_dev) \ + (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ + ((in_dev)->mr_v2_seen && \ + time_before(jiffies, (in_dev)->mr_v2_seen))) static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 43fb1600f1f..fbe7714f21d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -31,10 +31,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); /* * This array holds the first and last local port number. - * For high-usage systems, use sysctl to change this to - * 32768-61000 */ -int sysctl_local_port_range[2] = { 1024, 4999 }; +int sysctl_local_port_range[2] = { 32768, 61000 }; int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d6427d91851..34ea4547ebb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1352,7 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .oif = arg->bound_dev_if, + .nl_u = { .ip4_u = { .daddr = daddr, .saddr = rt->rt_spec_dst, .tos = RT_TOS(ip_hdr(skb)->tos) } }, @@ -1376,6 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar inet->tos = ip_hdr(skb)->tos; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; + sk->sk_bound_dev_if = arg->bound_dev_if; ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0ebae413ae8..d96582acdf6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -152,9 +152,11 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) + if (in_dev == NULL) goto failure; - in_dev->cnf.rp_filter = 0; + + ipv4_devconf_setall(in_dev); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; if (dev_open(dev)) goto failure; @@ -218,10 +220,15 @@ static struct net_device *ipmr_reg_vif(void) } dev->iflink = 0; - if ((in_dev = inetdev_init(dev)) == NULL) + rcu_read_lock(); + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { + rcu_read_unlock(); goto failure; + } - in_dev->cnf.rp_filter = 0; + ipv4_devconf_setall(in_dev); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + rcu_read_unlock(); if (dev_open(dev)) goto failure; @@ -281,7 +288,7 @@ static int vif_delete(int vifi) dev_set_allmulti(dev, -1); if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { - in_dev->cnf.mc_forwarding--; + IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; ip_rt_multicast_event(in_dev); } @@ -426,7 +433,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) return -EADDRNOTAVAIL; - in_dev->cnf.mc_forwarding++; + IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; dev_set_allmulti(dev, +1); ip_rt_multicast_event(in_dev); @@ -841,7 +848,7 @@ static void mrtsock_destruct(struct sock *sk) { rtnl_lock(); if (sk == mroute_socket) { - ipv4_devconf.mc_forwarding--; + IPV4_DEVCONF_ALL(MC_FORWARDING)--; write_lock_bh(&mrt_lock); mroute_socket=NULL; @@ -890,7 +897,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt mroute_socket=sk; write_unlock_bh(&mrt_lock); - ipv4_devconf.mc_forwarding++; + IPV4_DEVCONF_ALL(MC_FORWARDING)++; } rtnl_unlock(); return ret; diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig index 891b9355cf9..09d0c3f3566 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/ipv4/ipvs/Kconfig @@ -1,10 +1,7 @@ # # IP Virtual Server configuration # -menu "IP: Virtual Server Configuration" - depends on NETFILTER - -config IP_VS +menuconfig IP_VS tristate "IP virtual server support (EXPERIMENTAL)" depends on NETFILTER ---help--- @@ -25,9 +22,10 @@ config IP_VS If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +if IP_VS + config IP_VS_DEBUG bool "IP virtual server debugging" - depends on IP_VS ---help--- Say Y here if you want to get additional messages useful in debugging the IP virtual server code. You can change the debug @@ -35,7 +33,6 @@ config IP_VS_DEBUG config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" - depends on IP_VS default "12" ---help--- The IPVS connection hash table uses the chaining scheme to handle @@ -61,42 +58,35 @@ config IP_VS_TAB_BITS needed for your box. comment "IPVS transport protocol load balancing support" - depends on IP_VS config IP_VS_PROTO_TCP bool "TCP load balancing support" - depends on IP_VS ---help--- This option enables support for load balancing TCP transport protocol. Say Y if unsure. config IP_VS_PROTO_UDP bool "UDP load balancing support" - depends on IP_VS ---help--- This option enables support for load balancing UDP transport protocol. Say Y if unsure. config IP_VS_PROTO_ESP bool "ESP load balancing support" - depends on IP_VS ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" - depends on IP_VS ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. comment "IPVS scheduler" - depends on IP_VS config IP_VS_RR tristate "round-robin scheduling" - depends on IP_VS ---help--- The robin-robin scheduling algorithm simply directs network connections to different real servers in a round-robin manner. @@ -106,7 +96,6 @@ config IP_VS_RR config IP_VS_WRR tristate "weighted round-robin scheduling" - depends on IP_VS ---help--- The weighted robin-robin scheduling algorithm directs network connections to different real servers based on server weights @@ -120,7 +109,6 @@ config IP_VS_WRR config IP_VS_LC tristate "least-connection scheduling" - depends on IP_VS ---help--- The least-connection scheduling algorithm directs network connections to the server with the least number of active @@ -131,7 +119,6 @@ config IP_VS_LC config IP_VS_WLC tristate "weighted least-connection scheduling" - depends on IP_VS ---help--- The weighted least-connection scheduling algorithm directs network connections to the server with the least active connections @@ -142,7 +129,6 @@ config IP_VS_WLC config IP_VS_LBLC tristate "locality-based least-connection scheduling" - depends on IP_VS ---help--- The locality-based least-connection scheduling algorithm is for destination IP load balancing. It is usually used in cache cluster. @@ -157,7 +143,6 @@ config IP_VS_LBLC config IP_VS_LBLCR tristate "locality-based least-connection with replication scheduling" - depends on IP_VS ---help--- The locality-based least-connection with replication scheduling algorithm is also for destination IP load balancing. It is @@ -176,7 +161,6 @@ config IP_VS_LBLCR config IP_VS_DH tristate "destination hashing scheduling" - depends on IP_VS ---help--- The destination hashing scheduling algorithm assigns network connections to the servers through looking up a statically assigned @@ -187,7 +171,6 @@ config IP_VS_DH config IP_VS_SH tristate "source hashing scheduling" - depends on IP_VS ---help--- The source hashing scheduling algorithm assigns network connections to the servers through looking up a statically assigned @@ -198,7 +181,6 @@ config IP_VS_SH config IP_VS_SED tristate "shortest expected delay scheduling" - depends on IP_VS ---help--- The shortest expected delay scheduling algorithm assigns network connections to the server with the shortest expected delay. The @@ -212,7 +194,6 @@ config IP_VS_SED config IP_VS_NQ tristate "never queue scheduling" - depends on IP_VS ---help--- The never queue scheduling algorithm adopts a two-speed model. When there is an idle server available, the job will be sent to @@ -225,11 +206,10 @@ config IP_VS_NQ module, choose M here. If unsure, say N. comment 'IPVS application helper' - depends on IP_VS config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS && IP_VS_PROTO_TCP + depends on IP_VS_PROTO_TCP ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, @@ -241,4 +221,4 @@ config IP_VS_FTP If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. -endmenu +endif # IP_VS diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index b3050a6817e..68fe1d4d021 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -2387,6 +2387,7 @@ void ip_vs_control_cleanup(void) EnterFunction(2); ip_vs_trash_cleanup(); cancel_rearming_delayed_work(&defense_work); + cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); proc_net_remove("ip_vs_stats"); diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index ff366f7390d..dd7c128f9db 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -18,7 +18,7 @@ * The SED algorithm attempts to minimize each job's expected delay until * completion. The expected delay that the job will experience is * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of - * jobs on the the ith server and Ui is the fixed service rate (weight) of + * jobs on the ith server and Ui is the fixed service rate (weight) of * the ith server. The SED algorithm adopts a greedy policy that each does * what is in its own best interest, i.e. to join the queue which would * minimize its expected delay of completion. diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 7ea2d981a93..356f067484e 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -67,6 +67,11 @@ struct ip_vs_sync_conn_options { struct ip_vs_seq out_seq; /* outgoing seq. struct */ }; +struct ip_vs_sync_thread_data { + struct completion *startup; + int state; +}; + #define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) #define FULL_CONN_SIZE \ @@ -751,6 +756,7 @@ static int sync_thread(void *startup) mm_segment_t oldmm; int state; const char *name; + struct ip_vs_sync_thread_data *tinfo = startup; /* increase the module use count */ ip_vs_use_count_inc(); @@ -789,7 +795,14 @@ static int sync_thread(void *startup) add_wait_queue(&sync_wait, &wait); set_sync_pid(state, current->pid); - complete((struct completion *)startup); + complete(tinfo->startup); + + /* + * once we call the completion queue above, we should + * null out that reference, since its allocated on the + * stack of the creating kernel thread + */ + tinfo->startup = NULL; /* processing master/backup loop here */ if (state == IP_VS_STATE_MASTER) @@ -801,6 +814,14 @@ static int sync_thread(void *startup) remove_wait_queue(&sync_wait, &wait); /* thread exits */ + + /* + * If we weren't explicitly stopped, then we + * exited in error, and should undo our state + */ + if ((!stop_master_sync) && (!stop_backup_sync)) + ip_vs_sync_state -= tinfo->state; + set_sync_pid(state, 0); IP_VS_INFO("sync thread stopped!\n"); @@ -812,6 +833,11 @@ static int sync_thread(void *startup) set_stop_sync(state, 0); wake_up(&stop_sync_wait); + /* + * we need to free the structure that was allocated + * for us in start_sync_thread + */ + kfree(tinfo); return 0; } @@ -838,11 +864,19 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) { DECLARE_COMPLETION_ONSTACK(startup); pid_t pid; + struct ip_vs_sync_thread_data *tinfo; if ((state == IP_VS_STATE_MASTER && sync_master_pid) || (state == IP_VS_STATE_BACKUP && sync_backup_pid)) return -EEXIST; + /* + * Note that tinfo will be freed in sync_thread on exit + */ + tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL); + if (!tinfo) + return -ENOMEM; + IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", sizeof(struct ip_vs_sync_conn)); @@ -858,8 +892,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) ip_vs_backup_syncid = syncid; } + tinfo->state = state; + tinfo->startup = &startup; + repeat: - if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { + if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) { IP_VS_ERR("could not create fork_sync_thread due to %d... " "retrying.\n", pid); msleep_interruptible(1000); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 7edea2a1696..75c02306253 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -15,128 +15,34 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -/* Standard entry. */ -struct arpt_standard -{ - struct arpt_entry entry; - struct arpt_standard_target target; -}; - -struct arpt_error_target -{ - struct arpt_entry_target target; - char errorname[ARPT_FUNCTION_MAXNAMELEN]; -}; - -struct arpt_error -{ - struct arpt_entry entry; - struct arpt_error_target target; -}; - static struct { struct arpt_replace repl; struct arpt_standard entries[3]; struct arpt_error term; -} initial_table __initdata -= { { "filter", FILTER_VALID_HOOKS, 4, - sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), - { [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), }, - { [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), }, - 0, NULL, { } }, - { - /* ARP_IN */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_standard), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } - }, - /* ARP_OUT */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_standard), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } - }, - /* ARP_FORWARD */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_standard), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } - } - }, - /* ERROR */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_error), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_error_target)), ARPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } +} initial_table __initdata = { + .repl = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), + .hook_entry = { + [NF_ARP_IN] = 0, + [NF_ARP_OUT] = sizeof(struct arpt_standard), + [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), + }, + .underflow = { + [NF_ARP_IN] = 0, + [NF_ARP_OUT] = sizeof(struct arpt_standard), + [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), + }, + }, + .entries = { + ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */ + ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */ + ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */ + }, + .term = ARPT_ERROR_INIT, }; static struct arpt_table packet_filter = { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e3f83bf160d..9bacf1a0363 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -499,7 +499,8 @@ check_entry(struct ipt_entry *e, const char *name) } static inline int check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) + const struct ipt_ip *ip, unsigned int hookmask, + unsigned int *i) { struct xt_match *match; int ret; @@ -515,6 +516,8 @@ static inline int check_match(struct ipt_entry_match *m, const char *name, m->u.kernel.match->name); ret = -EINVAL; } + if (!ret) + (*i)++; return ret; } @@ -537,11 +540,10 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask); + ret = check_match(m, name, ip, hookmask, i); if (ret) goto err; - (*i)++; return 0; err: module_put(m->u.kernel.match->me); @@ -1425,7 +1427,7 @@ out: } static inline int -compat_check_calc_match(struct ipt_entry_match *m, +compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, @@ -1449,6 +1451,31 @@ compat_check_calc_match(struct ipt_entry_match *m, } static inline int +compat_release_match(struct ipt_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + + module_put(m->u.kernel.match->me); + return 0; +} + +static inline int +compat_release_entry(struct ipt_entry *e, unsigned int *i) +{ + struct ipt_entry_target *t; + + if (i && (*i)-- == 0) + return 1; + + /* Cleanup all matches */ + IPT_MATCH_ITERATE(e, compat_release_match, NULL); + t = ipt_get_target(e); + module_put(t->u.kernel.target->me); + return 0; +} + +static inline int check_compat_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, @@ -1485,10 +1512,10 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, off = 0; entry_offset = (void *)e - (void *)base; j = 0; - ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, + ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip, e->comefrom, &off, &j); if (ret != 0) - goto cleanup_matches; + goto release_matches; t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1499,7 +1526,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; - goto cleanup_matches; + goto release_matches; } t->u.kernel.target = target; @@ -1526,8 +1553,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, out: module_put(t->u.kernel.target->me); -cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); +release_matches: + IPT_MATCH_ITERATE(e, compat_release_match, &j); return ret; } @@ -1574,15 +1601,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, return ret; } -static inline int compat_check_entry(struct ipt_entry *e, const char *name) +static inline int compat_check_entry(struct ipt_entry *e, const char *name, + unsigned int *i) { - int ret; + int j, ret; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom); + j = 0; + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); if (ret) - return ret; + goto cleanup_matches; + + ret = check_target(e, name); + if (ret) + goto cleanup_matches; - return check_target(e, name); + (*i)++; + return 0; + + cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); + return ret; } static int @@ -1673,10 +1711,17 @@ translate_compat_table(const char *name, if (!mark_source_chains(newinfo, valid_hooks, entry1)) goto free_newinfo; + i = 0; ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name); - if (ret) - goto free_newinfo; + name, &i); + if (ret) { + j -= i; + IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i, + compat_release_entry, &j); + IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_free_table_info(newinfo); + return ret; + } /* And one copy for every other CPU */ for_each_possible_cpu(i) @@ -1691,7 +1736,7 @@ translate_compat_table(const char *name, free_newinfo: xt_free_table_info(newinfo); out: - IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &j); + IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); return ret; out_unlock: compat_flush_offsets(); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 42728909eba..4f51c1d7d2d 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -26,53 +26,29 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata -= { { "filter", FILTER_VALID_HOOKS, 4, - sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - { [NF_IP_LOCAL_IN] = 0, - [NF_IP_FORWARD] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, - { [NF_IP_LOCAL_IN] = 0, - [NF_IP_FORWARD] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, - 0, NULL, { } }, - { - /* LOCAL_IN */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* FORWARD */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } } - }, - /* ERROR */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } +} initial_table __initdata = { + .repl = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + .hook_entry = { + [NF_IP_LOCAL_IN] = 0, + [NF_IP_FORWARD] = sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + .underflow = { + [NF_IP_LOCAL_IN] = 0, + [NF_IP_FORWARD] = sizeof(struct ipt_standard), + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ }; static struct xt_table packet_filter = { @@ -105,7 +81,8 @@ ipt_local_out_hook(unsigned int hook, if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + printk("iptable_filter: ignoring short SOCK_RAW " + "packet.\n"); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 9278802f274..902446f7cbc 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -33,73 +33,35 @@ static struct struct ipt_replace repl; struct ipt_standard entries[5]; struct ipt_error term; -} initial_table __initdata -= { { "mangle", MANGLE_VALID_HOOKS, 6, - sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), - { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4 }, - { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4 }, - 0, NULL, { } }, - { - /* PRE_ROUTING */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_IN */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* FORWARD */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* POST_ROUTING */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - }, - /* ERROR */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } +} initial_table __initdata = { + .repl = { + .name = "mangle", + .valid_hooks = MANGLE_VALID_HOOKS, + .num_entries = 6, + .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), + .hook_entry = { + [NF_IP_PRE_ROUTING] = 0, + [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), + [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, + [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4, + }, + .underflow = { + [NF_IP_PRE_ROUTING] = 0, + [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), + [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, + [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ }; static struct xt_table packet_mangler = { @@ -138,7 +100,8 @@ ipt_local_hook(unsigned int hook, if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + printk("iptable_mangle: ignoring short SOCK_RAW " + "packet.\n"); return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 18c3d4c9ff5..d6e50339568 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -5,6 +5,7 @@ */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> +#include <net/ip.h> #define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) @@ -21,62 +22,18 @@ static struct .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), .hook_entry = { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) }, + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) + }, .underflow = { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) }, + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) + }, }, .entries = { - /* PRE_ROUTING */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, - - /* LOCAL_OUT */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, + IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ }, - /* ERROR */ - .term = { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_error), - }, - .target = { - .target = { - .u = { - .user = { - .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)), - .name = IPT_ERROR_TARGET, - }, - }, - }, - .errorname = "ERROR", - }, - } + .term = IPT_ERROR_INIT, /* ERROR */ }; static struct xt_table packet_raw = { @@ -98,6 +55,24 @@ ipt_hook(unsigned int hook, return ipt_do_table(pskb, hook, in, out, &packet_raw); } +static unsigned int +ipt_local_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) || + ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("iptable_raw: ignoring short SOCK_RAW" + "packet.\n"); + return NF_ACCEPT; + } + return ipt_do_table(pskb, hook, in, out, &packet_raw); +} + /* 'raw' is the very first table. */ static struct nf_hook_ops ipt_ops[] = { { @@ -108,7 +83,7 @@ static struct nf_hook_ops ipt_ops[] = { .owner = THIS_MODULE, }, { - .hook = ipt_hook, + .hook = ipt_local_hook, .pf = PF_INET, .hooknum = NF_IP_LOCAL_OUT, .priority = NF_IP_PRI_RAW, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 0654eaae70c..6dc72a815f7 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -133,6 +133,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_help *help; + struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); @@ -140,12 +141,14 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, return NF_ACCEPT; help = nfct_help(ct); - if (!help || !help->helper) + if (!help) return NF_ACCEPT; - - return help->helper->help(pskb, - skb_network_offset(*pskb) + ip_hdrlen(*pskb), - ct, ctinfo); + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (!helper) + return NF_ACCEPT; + return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), + ct, ctinfo); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, @@ -154,12 +157,10 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { -#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if ((*pskb)->nfct) return NF_ACCEPT; -#endif /* Gather fragments. */ if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index 751b5980175..e6bc8e5a72f 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -40,8 +40,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, unsigned int matchoff, unsigned int matchlen, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - u32 *seq) + enum ip_conntrack_info ctinfo) { char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; @@ -50,7 +49,6 @@ mangle_rfc959_packet(struct sk_buff **pskb, DEBUGP("calling nf_nat_mangle_tcp_packet\n"); - *seq += strlen(buffer) - matchlen; return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } @@ -63,8 +61,7 @@ mangle_eprt_packet(struct sk_buff **pskb, unsigned int matchoff, unsigned int matchlen, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - u32 *seq) + enum ip_conntrack_info ctinfo) { char buffer[sizeof("|1|255.255.255.255|65535|")]; @@ -72,7 +69,6 @@ mangle_eprt_packet(struct sk_buff **pskb, DEBUGP("calling nf_nat_mangle_tcp_packet\n"); - *seq += strlen(buffer) - matchlen; return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } @@ -85,8 +81,7 @@ mangle_epsv_packet(struct sk_buff **pskb, unsigned int matchoff, unsigned int matchlen, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - u32 *seq) + enum ip_conntrack_info ctinfo) { char buffer[sizeof("|||65535|")]; @@ -94,14 +89,13 @@ mangle_epsv_packet(struct sk_buff **pskb, DEBUGP("calling nf_nat_mangle_tcp_packet\n"); - *seq += strlen(buffer) - matchlen; return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, unsigned int, unsigned int, struct nf_conn *, - enum ip_conntrack_info, u32 *seq) + enum ip_conntrack_info) = { [NF_CT_FTP_PORT] = mangle_rfc959_packet, [NF_CT_FTP_PASV] = mangle_rfc959_packet, @@ -116,8 +110,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, enum nf_ct_ftp_type type, unsigned int matchoff, unsigned int matchlen, - struct nf_conntrack_expect *exp, - u32 *seq) + struct nf_conntrack_expect *exp) { __be32 newip; u_int16_t port; @@ -145,8 +138,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo, - seq)) { + if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { nf_conntrack_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index fcebc968d37..c5d2a2d690b 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -455,9 +455,9 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr_hook(pskb, data, 0, &taddr[0], - &ct->tuplehash[!dir].tuple.dst.u3, - info->sig_port[!dir]); + set_h225_addr(pskb, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir]); } } else { nf_conntrack_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 2534f718ab9..6740736c5e7 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -46,77 +46,20 @@ static struct .hook_entry = { [NF_IP_PRE_ROUTING] = 0, [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 + }, .underflow = { [NF_IP_PRE_ROUTING] = 0, [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, + [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 + }, }, .entries = { - /* PRE_ROUTING */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, - /* POST_ROUTING */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, - /* LOCAL_OUT */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, + IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ + IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ }, - /* ERROR */ - .term = { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_error), - }, - .target = { - .target = { - .u = { - .user = { - .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)), - .name = IPT_ERROR_TARGET, - }, - }, - }, - .errorname = "ERROR", - }, - } + .term = IPT_ERROR_INIT, /* ERROR */ }; static struct xt_table nat_table = { @@ -230,9 +173,7 @@ static int ipt_dnat_checkentry(const char *tablename, } inline unsigned int -alloc_null_binding(struct nf_conn *ct, - struct nf_nat_info *info, - unsigned int hooknum) +alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). @@ -251,9 +192,7 @@ alloc_null_binding(struct nf_conn *ct, } unsigned int -alloc_null_binding_confirmed(struct nf_conn *ct, - struct nf_nat_info *info, - unsigned int hooknum) +alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) { __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC @@ -275,8 +214,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, - struct nf_conn *ct, - struct nf_nat_info *info) + struct nf_conn *ct) { int ret; @@ -285,7 +223,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) /* NUL mapping */ - ret = alloc_null_binding(ct, info, hooknum); + ret = alloc_null_binding(ct, hooknum); } return ret; } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 64bbed2ba78..55dac36dbc8 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -80,7 +80,6 @@ nf_nat_fn(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - struct nf_nat_info *info; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); @@ -129,7 +128,6 @@ nf_nat_fn(unsigned int hooknum, } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: - info = &nat->info; /* Seen it before? This can happen for loopback, retrans, or local packets.. */ @@ -138,14 +136,13 @@ nf_nat_fn(unsigned int hooknum, if (unlikely(nf_ct_is_confirmed(ct))) /* NAT module was loaded late */ - ret = alloc_null_binding_confirmed(ct, info, - hooknum); + ret = alloc_null_binding_confirmed(ct, hooknum); else if (hooknum == NF_IP_LOCAL_IN) /* LOCAL_IN hook doesn't have a chain! */ - ret = alloc_null_binding(ct, info, hooknum); + ret = alloc_null_binding(ct, hooknum); else ret = nf_nat_rule_find(pskb, hooknum, in, out, - ct, info); + ct); if (ret != NF_ACCEPT) { return ret; @@ -160,10 +157,8 @@ nf_nat_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); - info = &nat->info; } - NF_CT_ASSERT(info); return nf_nat_packet(ct, ctinfo, hooknum, pskb); } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 37ab5802ca0..3b690cf2a4e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -109,6 +109,17 @@ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_SENTINEL }; +/* Following RFC4293 items are displayed in /proc/net/netstat */ +static const struct snmp_mib snmp4_ipextstats_list[] = { + SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES), + SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), + SNMP_MIB_ITEM("InMcastPkts", IPSTATS_MIB_INMCASTPKTS), + SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), + SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS), + SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS), + SNMP_MIB_SENTINEL +}; + static const struct snmp_mib snmp4_icmp_list[] = { SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS), SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS), @@ -249,7 +260,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl); + IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", @@ -338,6 +349,16 @@ static int netstat_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)net_statistics, snmp4_net_list[i].entry)); + seq_puts(seq, "\nIpExt:"); + for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); + + seq_puts(seq, "\nIpExt:"); + for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + seq_printf(seq, " %lu", + snmp_fold_field((void **)ip_statistics, + snmp4_ipextstats_list[i].entry)); + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb76e3c725a..29ca63e81ce 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1636,7 +1636,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; @@ -1778,9 +1778,9 @@ static inline int __mkroute_input(struct sk_buff *skb, if (res->fi->fib_nhs > 1) rth->u.dst.flags |= DST_BALANCED; #endif - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; - if (out_dev->cnf.no_xfrm) + if (IN_DEV_CONF_GET(out_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; @@ -2021,7 +2021,7 @@ local_input: atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; @@ -2218,9 +2218,9 @@ static inline int __mkroute_output(struct rtable **result, rth->u.dst.flags |= DST_BALANCED; } #endif - if (in_dev->cnf.no_xfrm) + if (IN_DEV_CONF_GET(in_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; - if (in_dev->cnf.no_policy) + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = oldflp->fl4_dst; @@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = ip_dev_find(oldflp->fl4_src); - if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind)) + if (dev_out == NULL) goto out; /* I removed check for oif == dev_out->oif here. @@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) of another iface. --ANK */ - if (dev_out && oldflp->oif == 0 + if (oldflp->oif == 0 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* Special hack: user can direct multicasts and limited broadcast via necessary interface @@ -2598,6 +2598,69 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) EXPORT_SYMBOL_GPL(__ip_route_output_key); +static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +{ +} + +static struct dst_ops ipv4_dst_blackhole_ops = { + .family = AF_INET, + .protocol = __constant_htons(ETH_P_IP), + .destroy = ipv4_dst_destroy, + .check = ipv4_dst_check, + .update_pmtu = ipv4_rt_blackhole_update_pmtu, + .entry_size = sizeof(struct rtable), +}; + + +static int ipv4_blackhole_output(struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) +{ + struct rtable *ort = *rp; + struct rtable *rt = (struct rtable *) + dst_alloc(&ipv4_dst_blackhole_ops); + + if (rt) { + struct dst_entry *new = &rt->u.dst; + + atomic_set(&new->__refcnt, 1); + new->__use = 1; + new->input = ipv4_blackhole_output; + new->output = ipv4_blackhole_output; + memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); + + new->dev = ort->u.dst.dev; + if (new->dev) + dev_hold(new->dev); + + rt->fl = ort->fl; + + rt->idev = ort->idev; + if (rt->idev) + in_dev_hold(rt->idev); + rt->rt_flags = ort->rt_flags; + rt->rt_type = ort->rt_type; + rt->rt_dst = ort->rt_dst; + rt->rt_src = ort->rt_src; + rt->rt_iif = ort->rt_iif; + rt->rt_gateway = ort->rt_gateway; + rt->rt_spec_dst = ort->rt_spec_dst; + rt->peer = ort->peer; + if (rt->peer) + atomic_inc(&rt->peer->refcnt); + + dst_free(new); + } + + dst_release(&(*rp)->u.dst); + *rp = rt; + return (rt ? 0 : -ENOMEM); +} + int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) { int err; @@ -2610,7 +2673,11 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); + err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); + if (err == -EREMOTE) + err = ipv4_dst_blackhole(rp, flp, sk); + + return err; } return 0; @@ -2692,7 +2759,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && - ipv4_devconf.mc_forwarding) { + IPV4_DEVCONF_ALL(MC_FORWARDING)) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { @@ -3139,6 +3206,8 @@ int __init ip_rt_init(void) kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; + rt_hash_table = (struct rt_hash_bucket *) alloc_large_system_hash("IP route cache", sizeof(struct rt_hash_bucket), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6817d6485df..53ef0f4bbda 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -37,12 +37,12 @@ static int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int val = ipv4_devconf.forwarding; + int val = IPV4_DEVCONF_ALL(FORWARDING); int ret; ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && ipv4_devconf.forwarding != val) + if (write && IPV4_DEVCONF_ALL(FORWARDING) != val) inet_forward_change(); return ret; @@ -222,7 +222,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_FORWARD, .procname = "ip_forward", - .data = &ipv4_devconf.forwarding, + .data = &IPV4_DEVCONF_ALL(FORWARDING), .maxlen = sizeof(int), .mode = 0644, .proc_handler = &ipv4_sysctl_forward, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8b124eafbb9..450f44bb2c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -252,7 +252,6 @@ #include <linux/fcntl.h> #include <linux/poll.h> #include <linux/init.h> -#include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/random.h> #include <linux/bootmem.h> @@ -1065,7 +1064,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, break; } used = recv_actor(desc, skb, offset, len); - if (used <= len) { + if (used < 0) { + if (!copied) + copied = used; + break; + } else if (used <= len) { seq += used; copied += used; offset += used; @@ -1087,7 +1090,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied) + if (copied > 0) tcp_cleanup_rbuf(sk, copied); return copied; } @@ -1675,9 +1678,8 @@ adjudge_to_death: } if (sk->sk_state != TCP_CLOSE) { sk_stream_mem_reclaim(sk); - if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { + if (tcp_too_many_orphans(sk, + atomic_read(sk->sk_prot->orphan_count))) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2466,13 +2468,10 @@ void __init tcp_init(void) order++) ; if (order >= 4) { - sysctl_local_port_range[0] = 32768; - sysctl_local_port_range[1] = 61000; tcp_death_row.sysctl_max_tw_buckets = 180000; sysctl_tcp_max_orphans = 4096 << (order - 4); sysctl_max_syn_backlog = 1024; } else if (order < 3) { - sysctl_local_port_range[0] = 1024 * (3 - order); tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); sysctl_tcp_max_orphans >>= (3 - order); sysctl_max_syn_backlog = 128; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 281c9f91325..dd9ef65ad3f 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -29,7 +29,7 @@ static int fast_convergence = 1; static int max_increment = 16; static int low_window = 14; static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ -static int initial_ssthresh = 100; +static int initial_ssthresh; static int smooth_part = 20; module_param(fast_convergence, int, 0644); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 86b26539e54..1260e52ad77 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -276,30 +276,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) /* - * Slow start (exponential increase) with - * RFC3742 Limited Slow Start (fast linear increase) support. + * Slow start is used when congestion window is less than slow start + * threshold. This version implements the basic RFC2581 version + * and optionally supports: + * RFC3742 Limited Slow Start - growth limited to max_ssthresh + * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged */ void tcp_slow_start(struct tcp_sock *tp) { - int cnt = 0; - - if (sysctl_tcp_abc) { - /* RFC3465: Slow Start - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (tp->bytes_acked < tp->mss_cache) - return; - } + int cnt; /* increase in packets */ + + /* RFC3465: ABC Slow start + * Increase only after a full MSS of bytes is acked + * + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) + return; - if (sysctl_tcp_max_ssthresh > 0 && - tp->snd_cwnd > sysctl_tcp_max_ssthresh) - cnt += sysctl_tcp_max_ssthresh>>1; + if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) + cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ else - cnt += tp->snd_cwnd; + cnt = tp->snd_cwnd; /* exponential increase */ - /* RFC3465: We MAY increase by 2 if discovered delayed ack */ + /* RFC3465: ABC + * We MAY increase by 2 if discovered delayed ack + */ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) cnt <<= 1; tp->bytes_acked = 0; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 14224487b16..ebfaac2f9f4 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -29,7 +29,7 @@ static int fast_convergence __read_mostly = 1; static int max_increment __read_mostly = 16; static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ -static int initial_ssthresh __read_mostly = 100; +static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 4adc47c5535..b2b2256d3b8 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -90,6 +90,9 @@ static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) ca->acked = pkts_acked; + if (ktime_equal(last, net_invalid_timestamp())) + return; + rtt = ktime_to_us(net_timedelta(last)); /* ignore bogus values, this prevents wraparound in alpha math */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7641b2761a1..69f9f1ef3ef 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -953,7 +953,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int prior_fackets; u32 lost_retrans = 0; int flag = 0; - int dup_sack = 0; + int found_dup_sack = 0; int cached_fack_count; int i; int first_sack_index; @@ -964,20 +964,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* Check for D-SACK. */ if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { - dup_sack = 1; + found_dup_sack = 1; tp->rx_opt.sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1 && !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { - dup_sack = 1; + found_dup_sack = 1; tp->rx_opt.sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); } /* D-SACK for already forgotten data... * Do dumb counting. */ - if (dup_sack && + if (found_dup_sack && !after(ntohl(sp[0].end_seq), prior_snd_una) && after(ntohl(sp[0].end_seq), tp->undo_marker)) tp->undo_retrans--; @@ -1058,6 +1058,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ __u32 start_seq = ntohl(sp->start_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count; + int dup_sack = (found_dup_sack && (i == first_sack_index)); skb = cached_skb; fack_count = cached_fack_count; @@ -1501,6 +1502,8 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); + /* Abort FRTO algorithm if one is in progress */ + tp->frto_counter = 0; clear_all_retrans_hints(tp); } @@ -2035,7 +2038,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); - tp->left_out = tp->sacked_out; + tcp_sync_left_out(tp); if (tp->retrans_out == 0) tp->retrans_stamp = 0; @@ -2405,9 +2408,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) struct sk_buff *skb; __u32 now = tcp_time_stamp; int acked = 0; + int prior_packets = tp->packets_out; __s32 seq_rtt = -1; - u32 pkts_acked = 0; - ktime_t last_ackt = ktime_set(0,0); + ktime_t last_ackt = net_invalid_timestamp(); while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { @@ -2435,7 +2438,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) */ if (!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; - ++pkts_acked; } else { acked |= FLAG_SYN_ACKED; tp->retrans_stamp = 0; @@ -2479,12 +2481,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } if (acked&FLAG_ACKED) { + u32 pkts_acked = prior_packets - tp->packets_out; const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk); + /* Is the ACK triggering packet unambiguous? */ + if (acked & FLAG_RETRANS_DATA_ACKED) + last_ackt = net_invalid_timestamp(); + if (ca_ops->pkts_acked) ca_ops->pkts_acked(sk, pkts_acked, last_ackt); } @@ -2608,6 +2615,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_cnt = 0; + TCP_ECN_queue_cwr(tp); tcp_moderate_cwnd(tp); } @@ -2929,6 +2937,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, opt_rx->sack_ok) { TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; } + break; #ifdef CONFIG_TCP_MD5SIG case TCPOPT_MD5SIG: /* diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a3e7f839fc..354721d67f6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -192,8 +192,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, inet->sport, usin->sin_port, sk, 1); - if (tmp < 0) + if (tmp < 0) { + if (tmp == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return tmp; + } if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); @@ -702,6 +705,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + if (twsk) + arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); @@ -873,6 +878,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, kfree(newkey); return -ENOMEM; } + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } if (tcp_alloc_md5sig_pool() == NULL) { kfree(newkey); @@ -1002,7 +1008,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, return -EINVAL; tp->md5sig_info = p; - + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 43294ad9f63..e49836ce012 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -266,7 +266,8 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last))); + if (!ktime_equal(last, net_invalid_timestamp())) + tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last))); /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0faacf9c419..53232dd6fb4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -40,7 +40,6 @@ #include <linux/compiler.h> #include <linux/module.h> -#include <linux/smp_lock.h> /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 3938d5dbdf2..d9323dfff82 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -63,6 +63,9 @@ struct { * FIXME: causes an extra copy */ static void printl(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +static void printl(const char *fmt, ...) { va_list args; int len; @@ -95,7 +98,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Only update if port matches */ if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) && (full || tp->snd_cwnd != tcpw.lastcwnd)) { - printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n", + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u %u\n", NIPQUAD(inet->saddr), ntohs(inet->sport), NIPQUAD(inet->daddr), ntohs(inet->dport), skb->len, tp->snd_nxt, tp->snd_una, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2ca97b20929..e9b151b3a59 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -78,9 +78,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) if (sk->sk_err_soft) orphans <<= 1; - if (orphans >= sysctl_tcp_max_orphans || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) { + if (tcp_too_many_orphans(sk, orphans)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); @@ -294,9 +292,9 @@ static void tcp_retransmit_timer(struct sock *sk) * we cannot allow such beasts to hang infinitely. */ #ifdef TCP_DEBUG - if (net_ratelimit()) { + if (1) { struct inet_sock *inet = inet_sk(sk); - printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", NIPQUAD(inet->daddr), ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 73e19cf7df2..e218a51cece 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -117,6 +117,9 @@ void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; + if (ktime_equal(last, net_invalid_timestamp())) + return; + /* Never allow zero rtt or baseRTT */ vrtt = ktime_to_us(net_timedelta(last)) + 1; diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 9edb340f2f9..ec854cc5fad 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -74,6 +74,9 @@ static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) struct veno *veno = inet_csk_ca(sk); u32 vrtt; + if (ktime_equal(last, net_invalid_timestamp())) + return; + /* Never allow zero rtt or baseRTT */ vrtt = ktime_to_us(net_timedelta(last)) + 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 113e0c4c8a9..facb7e29304 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,33 +114,14 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -/* - * Note about this hash function : - * Typical use is probably daddr = 0, only dport is going to vary hash - */ -static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr) -{ - addr ^= addr >> 16; - addr ^= addr >> 8; - return port ^ addr; -} - -static inline int __udp_lib_port_inuse(unsigned int hash, int port, - __be32 daddr, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; - struct inet_sock *inet; - sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { - if (sk->sk_hash != hash) - continue; - inet = inet_sk(sk); - if (inet->num != port) - continue; - if (inet->rcv_saddr == daddr) + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) + if (sk->sk_hash == num) return 1; - } return 0; } @@ -161,7 +142,6 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_node *node; struct hlist_head *head; struct sock *sk2; - unsigned int hash; int error = 1; write_lock_bh(&udp_hash_lock); @@ -176,9 +156,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { int size; - hash = hash_port_and_addr(result, - inet_sk(sk)->rcv_saddr); - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + @@ -203,17 +181,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - hash = hash_port_and_addr(result, 0); - if (__udp_lib_port_inuse(hash, result, - 0, udptable)) - continue; - if (!inet_sk(sk)->rcv_saddr) - break; - - hash = hash_port_and_addr(result, - inet_sk(sk)->rcv_saddr); - if (! __udp_lib_port_inuse(hash, result, - inet_sk(sk)->rcv_saddr, udptable)) + if (! __udp_lib_lport_inuse(result, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) @@ -221,41 +189,21 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, gotit: *port_rover = snum = result; } else { - hash = hash_port_and_addr(snum, 0); - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_for_each(sk2, node, head) - if (sk2->sk_hash == hash && - sk2 != sk && - inet_sk(sk2)->num == snum && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) + if (sk2->sk_hash == snum && + sk2 != sk && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2) ) goto fail; - - if (inet_sk(sk)->rcv_saddr) { - hash = hash_port_and_addr(snum, - inet_sk(sk)->rcv_saddr); - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; - - sk_for_each(sk2, node, head) - if (sk2->sk_hash == hash && - sk2 != sk && - inet_sk(sk2)->num == snum && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == - sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - goto fail; - } } inet_sk(sk)->num = snum; - sk->sk_hash = hash; + sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } @@ -294,77 +242,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, { struct sock *sk, *result = NULL; struct hlist_node *node; - unsigned int hash, hashwild; - int score, best = -1, hport = ntohs(dport); - - hash = hash_port_and_addr(hport, daddr); - hashwild = hash_port_and_addr(hport, 0); + unsigned short hnum = ntohs(dport); + int badness = -1; read_lock(&udp_hash_lock); - -lookup: - - sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash != hash || ipv6_only_sock(sk) || - inet->num != hport) - continue; - - score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - goto found; - } else if (score > best) { - result = sk; - best = score; + if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } } } - - if (hash != hashwild) { - hash = hashwild; - goto lookup; - } -found: if (result) sock_hold(result); read_unlock(&udp_hash_lock); return result; } -static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum, - int hport, __be32 loc_addr, +static inline struct sock *udp_v4_mcast_next(struct sock *sk, + __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) { struct hlist_node *node; struct sock *s = sk; + unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (s->sk_hash != hnum || - inet->num != hport || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -699,8 +633,11 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .dport = dport } } }; security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 1); - if (err) + if (err) { + if (err == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); goto out; + } err = -EACCES; if ((rt->rt_flags & RTCF_BROADCAST) && @@ -983,7 +920,7 @@ int udp_disconnect(struct sock *sk, int flags) } /* return: - * 1 if the the UDP system should process it + * 1 if the UDP system should process it * 0 if we should drop this packet * -1 if it should get processed by xfrm4_rcv_encap */ @@ -1195,45 +1132,29 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) { - struct sock *sk, *skw, *sknext; + struct sock *sk; int dif; - int hport = ntohs(uh->dest); - unsigned int hash = hash_port_and_addr(hport, daddr); - unsigned int hashwild = hash_port_and_addr(hport, 0); - - dif = skb->dev->ifindex; read_lock(&udp_hash_lock); - - sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); - skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); - - sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif); - if (!sk) { - hash = hashwild; - sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source, - saddr, dif); - } + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = skb->dev->ifindex; + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { + struct sock *sknext = NULL; + do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk_next(sk), hash, hport, - daddr, uh->source, saddr, dif); - if (!sknext && hash != hashwild) { - hash = hashwild; - sknext = udp_v4_mcast_next(skw, hash, hport, - daddr, uh->source, saddr, dif); - } + + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, + uh->source, saddr, dif); if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) - /* - * we should probably re-process - * instead of dropping packets here. - */ + /* we should probably re-process instead + * of dropping packets here. */ kfree_skb(skb1); } sk = sknext; @@ -1320,7 +1241,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - skb->dev->ifindex, udptable); + skb->dev->ifindex, udptable ); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 5ceca951d73..fa1902dc81b 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -139,10 +139,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) nf_reset(skb); if (decaps) { - if (!(skb->dev->flags&IFF_LOOPBACK)) { - dst_release(skb->dst); - skb->dst = NULL; - } + dst_release(skb->dst); + skb->dst = NULL; netif_rx(skb); return 0; } else { diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index a2f2e6a5ec5..9963700e74c 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -85,6 +85,8 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; + skb->protocol = htons(ETH_P_IP); + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d02685c6bc6..f96ed76d8fa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2259,7 +2259,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, switch(event) { case NETDEV_REGISTER: - if (!idev) { + if (!idev && dev->mtu >= IPV6_MIN_MTU) { idev = ipv6_add_dev(dev); if (!idev) printk(KERN_WARNING "IPv6: add_dev failed for %s\n", @@ -2979,7 +2979,7 @@ static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) return pfx; } -static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { +static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, @@ -4204,6 +4204,10 @@ int __init addrconf_init(void) return err; ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); +#endif register_netdevice_notifier(&ipv6_dev_notf); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 18cb928c8d9..6dd377253cf 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -42,7 +42,6 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> -#include <linux/smp_lock.h> #include <linux/netfilter_ipv6.h> #include <net/ip.h> diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index b696c840120..128f94c79c6 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(tmp_base, top_iph, sizeof(tmp_base)); tmp_ext = NULL; - extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr); + extlen = skb_transport_offset(skb) - sizeof(struct ipv6hdr); if (extlen) { extlen += sizeof(*tmp_ext); tmp_ext = kmalloc(extlen, GFP_ATOMIC); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 403eee66b9c..b1fe7ac5dc9 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -177,8 +177,12 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) - goto out; + if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, &fl); + if (err < 0) + goto out; + } /* source address lookup done in ip6_dst_lookup */ diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6d8e4ac7bda..14be0b9b77a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -660,6 +660,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); Hop-by-hop options. **********************************/ +/* + * Note: we cannot rely on skb->dst before we assign it in ip6_route_input(). + */ +static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) +{ + return skb->dst ? ip6_dst_idev(skb->dst) : __in6_dev_get(skb->dev); +} + /* Router Alert as of RFC 2711 */ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) @@ -688,25 +696,25 @@ static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (ipv6_hdr(skb)->payload_len) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index fc3882c9060..53b3998a486 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -157,7 +157,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 1; } -static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { +static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, }; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ca08ee88d07..662a7d9681f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -619,14 +619,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; - if (fn->fn_flags&RTN_TL_ROOT && - fn->leaf == &ip6_null_entry && - !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){ - fn->leaf = rt; - rt->u.dst.rt6_next = NULL; - goto out; - } - for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) { /* * Search for duplicates @@ -666,7 +658,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * insert node */ -out: rt->u.dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index be0ee8a34f9..30a5cb1b203 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -235,7 +235,7 @@ int ip6_mc_input(struct sk_buff *skb) IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); hdr = ipv6_hdr(skb); - deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) || + deliver = unlikely(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f508171bab7..4704b5fc308 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -463,10 +463,17 @@ int ip6_forward(struct sk_buff *skb) */ if (xrlim_allow(dst, 1*HZ)) ndisc_send_redirect(skb, n, target); - } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK - |IPV6_ADDR_LINKLOCAL)) { + } else { + int addrtype = ipv6_addr_type(&hdr->saddr); + /* This check is security critical. */ - goto error; + if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK)) + goto error; + if (addrtype & IPV6_ADDR_LINKLOCAL) { + icmpv6_send(skb, ICMPV6_DEST_UNREACH, + ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); + goto error; + } } if (skb->len > dst_mtu(dst)) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d8b36451bad..0358e6066a4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1062,7 +1062,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || - in6_dev->cnf.accept_ra_rtr_pref) + !in6_dev->cnf.accept_ra_rtr_pref) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index bbe99f842f9..838b8ddee8c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -28,7 +28,7 @@ config IP6_NF_QUEUE packets which enables users to receive the filtered packets with QUEUE target using libipq. - THis option enables the old IPv6-only "ip6_queue" implementation + This option enables the old IPv6-only "ip6_queue" implementation which has been obsoleted by the new "nfnetlink_queue" code (see CONFIG_NETFILTER_NETLINK_QUEUE). diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 76f0cf66f95..7e32e2aaf7f 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -24,53 +24,29 @@ static struct struct ip6t_replace repl; struct ip6t_standard entries[3]; struct ip6t_error term; -} initial_table __initdata -= { { "filter", FILTER_VALID_HOOKS, 4, - sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), - { [NF_IP6_LOCAL_IN] = 0, - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 }, - { [NF_IP6_LOCAL_IN] = 0, - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 }, - 0, NULL, { } }, - { - /* LOCAL_IN */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* FORWARD */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } } - }, - /* ERROR */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_error), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } }, - { } }, - "ERROR" - } - } +} initial_table __initdata = { + .repl = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), + .hook_entry = { + [NF_IP6_LOCAL_IN] = 0, + [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), + [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 + }, + .underflow = { + [NF_IP6_LOCAL_IN] = 0, + [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), + [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 + }, + }, + .entries = { + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IP6T_ERROR_INIT, /* ERROR */ }; static struct xt_table packet_filter = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a9f10e32c16..f2d26495f41 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -32,73 +32,35 @@ static struct struct ip6t_replace repl; struct ip6t_standard entries[5]; struct ip6t_error term; -} initial_table __initdata -= { { "mangle", MANGLE_VALID_HOOKS, 6, - sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), - { [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4}, - { [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4}, - 0, NULL, { } }, - { - /* PRE_ROUTING */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_IN */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* FORWARD */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* POST_ROUTING */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } } - }, - /* ERROR */ - { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ip6t_entry), - sizeof(struct ip6t_error), - 0, { 0, 0 }, { } }, - { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } }, - { } }, - "ERROR" - } - } +} initial_table __initdata = { + .repl = { + .name = "mangle", + .valid_hooks = MANGLE_VALID_HOOKS, + .num_entries = 6, + .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), + .hook_entry = { + [NF_IP6_PRE_ROUTING] = 0, + [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), + [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, + [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, + [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, + }, + .underflow = { + [NF_IP6_PRE_ROUTING] = 0, + [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), + [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, + [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, + [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, + }, + }, + .entries = { + IP6T_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ + }, + .term = IP6T_ERROR_INIT, /* ERROR */ }; static struct xt_table packet_mangler = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index a3eb5b8ce18..0acda45d455 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -35,56 +35,10 @@ static struct }, }, .entries = { - /* PRE_ROUTING */ - { - .entry = { - .target_offset = sizeof(struct ip6t_entry), - .next_offset = sizeof(struct ip6t_standard), - }, - .target = { - .target = { - .u = { - .target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, - - /* LOCAL_OUT */ - { - .entry = { - .target_offset = sizeof(struct ip6t_entry), - .next_offset = sizeof(struct ip6t_standard), - }, - .target = { - .target = { - .u = { - .target_size = IP6T_ALIGN(sizeof(struct ip6t_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, + IP6T_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ }, - /* ERROR */ - .term = { - .entry = { - .target_offset = sizeof(struct ip6t_entry), - .next_offset = sizeof(struct ip6t_error), - }, - .target = { - .target = { - .u = { - .user = { - .target_size = IP6T_ALIGN(sizeof(struct ip6t_error_target)), - .name = IP6T_ERROR_TARGET, - }, - }, - }, - .errorname = "ERROR", - }, - } + .term = IP6T_ERROR_INIT, /* ERROR */ }; static struct xt_table packet_raw = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 6d2a0820511..1b1797f1f33 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,6 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, { struct nf_conn *ct; struct nf_conn_help *help; + struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; @@ -172,18 +173,21 @@ static unsigned int ipv6_confirm(unsigned int hooknum, goto out; help = nfct_help(ct); - if (!help || !help->helper) + if (!help) + goto out; + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (!helper) goto out; protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, (*pskb)->len - extoff); - if (protoff < 0 || protoff > (*pskb)->len || - pnum == NEXTHDR_FRAGMENT) { + if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { DEBUGP("proto header not found\n"); return NF_ACCEPT; } - ret = help->helper->help(pskb, protoff, ct, ctinfo); + ret = helper->help(pskb, protoff, ct, ctinfo); if (ret != NF_ACCEPT) return ret; out: diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 0be790d250f..8814b95b232 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -168,8 +168,7 @@ icmpv6_error_message(struct sk_buff *skb, skb->len - inip6off - sizeof(struct ipv6hdr)); - if ((inprotoff < 0) || (inprotoff > skb->len) || - (inprotonum == NEXTHDR_FRAGMENT)) { + if ((inprotoff > skb->len) || (inprotonum == NEXTHDR_FRAGMENT)) { DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); return -NF_ACCEPT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 009a1047fc3..a58459a7668 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -818,8 +818,12 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) - goto out; + if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, &fl); + if (err < 0) + goto out; + } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b46ad53044b..fe8d9837f9f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -119,6 +119,19 @@ static struct dst_ops ip6_dst_ops = { .entry_size = sizeof(struct rt6_info), }; +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +{ +} + +static struct dst_ops ip6_dst_blackhole_ops = { + .family = AF_INET6, + .protocol = __constant_htons(ETH_P_IPV6), + .destroy = ip6_dst_destroy, + .check = ip6_dst_check, + .update_pmtu = ip6_rt_blackhole_update_pmtu, + .entry_size = sizeof(struct rt6_info), +}; + struct rt6_info ip6_null_entry = { .u = { .dst = { @@ -833,6 +846,54 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) EXPORT_SYMBOL(ip6_route_output); +static int ip6_blackhole_output(struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) +{ + struct rt6_info *ort = (struct rt6_info *) *dstp; + struct rt6_info *rt = (struct rt6_info *) + dst_alloc(&ip6_dst_blackhole_ops); + struct dst_entry *new = NULL; + + if (rt) { + new = &rt->u.dst; + + atomic_set(&new->__refcnt, 1); + new->__use = 1; + new->input = ip6_blackhole_output; + new->output = ip6_blackhole_output; + + memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); + new->dev = ort->u.dst.dev; + if (new->dev) + dev_hold(new->dev); + rt->rt6i_idev = ort->rt6i_idev; + if (rt->rt6i_idev) + in6_dev_hold(rt->rt6i_idev); + rt->rt6i_expires = 0; + + ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); + rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; + rt->rt6i_metric = 0; + + memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); +#ifdef CONFIG_IPV6_SUBTREES + memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); +#endif + + dst_free(new); + } + + dst_release(*dstp); + *dstp = new; + return (new ? 0 : -ENOMEM); +} +EXPORT_SYMBOL_GPL(ip6_dst_blackhole); + /* * Destination cache support functions */ @@ -1938,7 +1999,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) fib6_clean_all(rt6_mtu_change_route, 0, &arg); } -static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { +static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, @@ -2495,6 +2556,8 @@ void __init ip6_route_init(void) ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; + fib6_init(); #ifdef CONFIG_PROC_FS p = proc_net_create("ipv6_route", 0, rt6_proc_info); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e2f25ea43b6..193d9d60bb7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -265,8 +265,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) - goto failure; + if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, &fl); + if (err < 0) + goto failure; + } if (saddr == NULL) { saddr = &fl.fl6_src; @@ -586,6 +590,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer, kfree(newkey); return -ENOMEM; } + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } tcp_alloc_md5sig_pool(); if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) { @@ -720,6 +725,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return -ENOMEM; tp->md5sig_info = p; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b083c09e3d2..4210951edb6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -748,8 +748,12 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) - goto out; + if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, &fl); + if (err < 0) + goto out; + } if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index d7ed8aa56ec..c858537cec4 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -104,10 +104,8 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) nf_reset(skb); if (decaps) { - if (!(skb->dev->flags&IFF_LOOPBACK)) { - dst_release(skb->dst); - skb->dst = NULL; - } + dst_release(skb->dst); + skb->dst = NULL; netif_rx(skb); return -1; } else { diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index a6c0cdf46ad..9fc95bc6509 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -80,6 +80,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); + skb->protocol = htons(ETH_P_IPV6); return 0; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 392f8bc9269..8400525177a 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -87,7 +87,7 @@ extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc, unsigned char *node); extern void ipxrtr_del_routes(struct ipx_interface *intrfc); extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, - struct iovec *iov, int len, int noblock); + struct iovec *iov, size_t len, int noblock); extern int ipxrtr_route_skb(struct sk_buff *skb); extern struct ipx_route *ipxrtr_lookup(__be32 net); extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg); @@ -1961,7 +1961,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { .sendpage = sock_no_sendpage, }; -#include <linux/smp_lock.h> SOCKOPS_WRAP(ipx_dgram, PF_IPX); static struct packet_type ipx_8023_packet_type = { diff --git a/net/irda/Kconfig b/net/irda/Kconfig index 9efb17ba48a..c8671a7ffb3 100644 --- a/net/irda/Kconfig +++ b/net/irda/Kconfig @@ -3,7 +3,7 @@ # menuconfig IRDA - depends on NET + depends on NET && !S390 tristate "IrDA (infrared) subsystem support" select CRC_CCITT ---help--- diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 06c97c60d54..dcd7e325b28 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2538,7 +2538,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { }; #endif /* CONFIG_IRDA_ULTRA */ -#include <linux/smp_lock.h> SOCKOPS_WRAP(irda_stream, PF_IRDA); SOCKOPS_WRAP(irda_seqpacket, PF_IRDA); SOCKOPS_WRAP(irda_dgram, PF_IRDA); diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index 0b02073ffdf..a8b8873aa26 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -317,23 +317,6 @@ void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, } /* - * Function irlap_next_state (self, state) - * - * Switches state and provides debug information - * - */ -static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state) -{ - /* - if (!self || self->magic != LAP_MAGIC) - return; - - IRDA_DEBUG(4, "next LAP state = %s\n", irlap_state[state]); - */ - self->state = state; -} - -/* * Function irlap_state_ndm (event, skb, frame) * * NDM (Normal Disconnected Mode) state @@ -1086,7 +1069,6 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event, } else { /* Final packet of window */ irlap_send_data_primary_poll(self, skb); - irlap_next_state(self, LAP_NRM_P); /* * Make sure state machine does not try to send @@ -1436,14 +1418,14 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event, */ self->remote_busy = FALSE; + /* Stop final timer */ + del_timer(&self->final_timer); + /* * Nr as expected? */ ret = irlap_validate_nr_received(self, info->nr); if (ret == NR_EXPECTED) { - /* Stop final timer */ - del_timer(&self->final_timer); - /* Update Nr received */ irlap_update_nr_received(self, info->nr); @@ -1475,14 +1457,12 @@ static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event, /* Resend rejected frames */ irlap_resend_rejected_frames(self, CMD_FRAME); - - /* Final timer ??? Jean II */ + irlap_start_final_timer(self, self->final_timeout * 2); irlap_next_state(self, LAP_NRM_P); } else if (ret == NR_INVALID) { IRDA_DEBUG(1, "%s(), Received RR with " "invalid nr !\n", __FUNCTION__); - del_timer(&self->final_timer); irlap_next_state(self, LAP_RESET_WAIT); diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 3c5a68e3641..3013c49ab97 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -798,16 +798,19 @@ void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb) self->vs = (self->vs + 1) % 8; self->ack_required = FALSE; + irlap_next_state(self, LAP_NRM_P); irlap_send_i_frame(self, tx_skb, CMD_FRAME); } else { IRDA_DEBUG(4, "%s(), sending unreliable frame\n", __FUNCTION__); if (self->ack_required) { irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); + irlap_next_state(self, LAP_NRM_P); irlap_send_rr_frame(self, CMD_FRAME); self->ack_required = FALSE; } else { skb->data[1] |= PF_BIT; + irlap_next_state(self, LAP_NRM_P); irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); } } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fb3faf72e85..b7333061016 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -556,6 +556,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (!percpu_populate(iucv_irq_data, sizeof(struct iucv_irq_data), GFP_KERNEL|GFP_DMA, cpu)) @@ -567,15 +568,20 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, } break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: percpu_depopulate(iucv_param, cpu); percpu_depopulate(iucv_irq_data, cpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: cpumask = iucv_buffer_cpumask; cpu_clear(cpu, cpumask); if (cpus_empty(cpumask)) diff --git a/net/key/af_key.c b/net/key/af_key.c index a99444142dc..0f8304b0246 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1448,8 +1448,6 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, int err; struct km_event c; - xfrm_probe_algs(); - x = pfkey_msg2xfrm_state(hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); @@ -1684,6 +1682,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd unsigned proto; struct km_event c; struct xfrm_audit audit_info; + int err; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) @@ -1691,7 +1690,9 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd audit_info.loginuid = audit_get_loginuid(current->audit_context); audit_info.secid = 0; - xfrm_state_flush(proto, &audit_info); + err = xfrm_state_flush(proto, &audit_info); + if (err) + return err; c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; @@ -2685,10 +2686,13 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; struct xfrm_audit audit_info; + int err; audit_info.loginuid = audit_get_loginuid(current->audit_context); audit_info.secid = 0; - xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + if (err) + return err; c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7d9fa38b6a7..6b8a103cf9e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -324,7 +324,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) memset(&laddr, 0, sizeof(laddr)); memset(&daddr, 0, sizeof(daddr)); /* - * FIXME: check if the the address is multicast, + * FIXME: check if the address is multicast, * only SOCK_DGRAM can do this. */ memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index bb6c0feb2d4..476c8486f78 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -112,7 +112,7 @@ DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(tx_power_reduction, 20, "%d.%d dBm", local->hw.conf.tx_power_reduction / 10, - local->hw.conf.tx_power_reduction & 10); + local->hw.conf.tx_power_reduction % 10); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>"); diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 6e36df67f8d..4e84f24fd43 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -2474,6 +2474,8 @@ static int ieee80211_open(struct net_device *dev) if (sdata->type == IEEE80211_IF_TYPE_STA && !local->user_space_mlme) netif_carrier_off(dev); + else + netif_carrier_on(dev); netif_start_queue(dev); return 0; @@ -3278,8 +3280,10 @@ ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) return TXRX_DROP; } } - while ((skb = __skb_dequeue(&entry->skb_list))) + while ((skb = __skb_dequeue(&entry->skb_list))) { memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); + dev_kfree_skb(skb); + } /* Complete frame has been reassembled - process it now */ rx->fragmented = 1; diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 822917debef..91b545c144c 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -17,6 +17,7 @@ * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, * SSID) */ +#include <linux/delay.h> #include <linux/if_ether.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -27,7 +28,6 @@ #include <linux/rtnetlink.h> #include <net/iw_handler.h> #include <asm/types.h> -#include <asm/delay.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -1155,6 +1155,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", dev->name, status_code); + if (status_code == WLAN_STATUS_REASSOC_NO_ASSOC) + ifsta->prev_bssid_set = 0; return; } @@ -2590,11 +2592,17 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) read_lock(&local->sub_if_lock); list_for_each_entry(sdata, &local->sub_if_list, list) { + + /* No need to wake the master device. */ + if (sdata->dev == local->mdev) + continue; + if (sdata->type == IEEE80211_IF_TYPE_STA) { if (sdata->u.sta.associated) ieee80211_send_nullfunc(local, sdata, 0); ieee80211_sta_timer((unsigned long)sdata); } + netif_wake_queue(sdata->dev); } read_unlock(&local->sub_if_lock); @@ -2736,6 +2744,12 @@ static int ieee80211_sta_start_scan(struct net_device *dev, read_lock(&local->sub_if_lock); list_for_each_entry(sdata, &local->sub_if_list, list) { + + /* Don't stop the master interface, otherwise we can't transmit + * probes! */ + if (sdata->dev == local->mdev) + continue; + netif_stop_queue(sdata->dev); if (sdata->type == IEEE80211_IF_TYPE_STA && sdata->u.sta.associated) @@ -2995,7 +3009,7 @@ struct sta_info * ieee80211_ibss_add_sta(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); /* TODO: Could consider removing the least recently used entry and * allow new one to be added. */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ea6211cade0..a567dae8e5f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -197,7 +197,7 @@ config NF_CONNTRACK_PPTP Please note that not all PPTP modes of operation are supported yet. Specifically these limitations exist: - - Blindy assumes that control connections are always established + - Blindly assumes that control connections are always established in PNS->PAC direction. This is a violation of RFC2637. - Only supports a single call within each session diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index b8869eab765..0568f2e86b5 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -208,13 +208,14 @@ static int __init nf_conntrack_amanda_init(void) { int ret, i; - ret = -ENOMEM; for (i = 0; i < ARRAY_SIZE(search); i++) { search[i].ts = textsearch_prepare(ts_algo, search[i].string, search[i].len, GFP_KERNEL, TS_AUTOLOAD); - if (search[i].ts == NULL) + if (IS_ERR(search[i].ts)) { + ret = PTR_ERR(search[i].ts); goto err1; + } } ret = nf_conntrack_helper_register(&amanda_helper[0]); if (ret < 0) @@ -227,10 +228,9 @@ static int __init nf_conntrack_amanda_init(void) err2: nf_conntrack_helper_unregister(&amanda_helper[0]); err1: - for (; i >= 0; i--) { - if (search[i].ts) - textsearch_destroy(search[i].ts); - } + while (--i >= 0) + textsearch_destroy(search[i].ts); + return ret; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e132c8ae878..7a15e30356f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -298,8 +298,6 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct nf_conn_help *help = nfct_help(ct); - struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; typeof(nf_conntrack_destroyed) destroyed; @@ -310,17 +308,10 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); - if (help && help->helper && help->helper->destroy) - help->helper->destroy(ct); - /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to nf_conntrack_lock!!! -HW */ rcu_read_lock(); - l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); - if (l3proto && l3proto->destroy) - l3proto->destroy(ct); - l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (l4proto && l4proto->destroy) @@ -358,6 +349,16 @@ destroy_conntrack(struct nf_conntrack *nfct) static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; + struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; + + if (help) { + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (helper && helper->destroy) + helper->destroy(ct); + rcu_read_unlock(); + } write_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. @@ -666,6 +667,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, unsigned int dataoff) { struct nf_conn *conntrack; + struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; u_int32_t features = 0; @@ -696,6 +698,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, write_lock_bh(&nf_conntrack_lock); exp = find_expectation(tuple); + help = nfct_help(conntrack); if (exp) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", conntrack, exp); @@ -703,7 +706,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = exp->master; if (exp->helper) - nfct_help(conntrack)->helper = exp->helper; + rcu_assign_pointer(help->helper, exp->helper); #ifdef CONFIG_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif @@ -713,10 +716,11 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); } else { - struct nf_conn_help *help = nfct_help(conntrack); - - if (help) - help->helper = __nf_ct_helper_find(&repl_tuple); + if (help) { + /* not in hash table yet, so not strictly necessary */ + rcu_assign_pointer(help->helper, + __nf_ct_helper_find(&repl_tuple)); + } NF_CT_STAT_INC(new); } @@ -893,8 +897,14 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, NF_CT_DUMP_TUPLE(newreply); ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (!ct->master && help && help->expecting == 0) - help->helper = __nf_ct_helper_find(newreply); + if (!ct->master && help && help->expecting == 0) { + struct nf_conntrack_helper *helper; + helper = __nf_ct_helper_find(newreply); + if (helper) + memset(&help->help, 0, sizeof(help->help)); + /* not in hash table yet, so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); + } write_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c31af29a443..504fb6c083f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -177,7 +177,7 @@ void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) struct nf_conntrack_expect *i; write_lock_bh(&nf_conntrack_lock); - /* choose the the oldest expectation to evict */ + /* choose the oldest expectation to evict */ list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { if (expect_matches(i, exp) && del_timer(&i->timeout)) { nf_ct_unlink_expect(i); @@ -337,6 +337,10 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) NF_CT_ASSERT(master_help); write_lock_bh(&nf_conntrack_lock); + if (!master_help->helper) { + ret = -ESHUTDOWN; + goto out; + } list_for_each_entry(i, &nf_conntrack_expect_list, list) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index a186799f654..82db2aa53bf 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -48,8 +48,7 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, enum nf_ct_ftp_type type, unsigned int matchoff, unsigned int matchlen, - struct nf_conntrack_expect *exp, - u32 *seq); + struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); #if 0 @@ -335,15 +334,17 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, if (info->seq_aft_nl[dir][i] == nl_seq) return; - if (oldest == info->seq_aft_nl_num[dir] - || before(info->seq_aft_nl[dir][i], oldest)) + if (oldest == info->seq_aft_nl_num[dir] || + before(info->seq_aft_nl[dir][i], + info->seq_aft_nl[dir][oldest])) oldest = i; } if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); - } else if (oldest != NUM_SEQ_TO_REMEMBER) { + } else if (oldest != NUM_SEQ_TO_REMEMBER && + after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); } @@ -519,7 +520,7 @@ static int help(struct sk_buff **pskb, nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); if (nf_nat_ftp && ct->status & IPS_NAT_MASK) ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, - matchoff, matchlen, exp, &seq); + matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ if (nf_conntrack_expect_related(exp) != 0) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index b284db73ca7..a1b95acad29 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -520,6 +520,16 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, } } + if ((olca->options & eOpenLogicalChannelAck_separateStack) && + olca->separateStack.networkAddress.choice == + eNetworkAccessParameters_networkAddress_localAreaAddress) { + ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + &olca->separateStack.networkAddress. + localAreaAddress); + if (ret < 0) + return -1; + } + return 0; } @@ -640,7 +650,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, case eTransportAddress_ip6Address: if (family != AF_INET6) return 0; - p = data + taddr->ip6Address.ip6; + p = data + taddr->ip6Address.ip; len = 16; break; default: @@ -977,30 +987,6 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_information(struct sk_buff **pskb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned char **data, int dataoff, - Information_UUIE *info) -{ - int ret; - int i; - - DEBUGP("nf_ct_q931: Information\n"); - - if (info->options & eInformation_UUIE_fastStart) { - for (i = 0; i < info->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, - &info->fastStart.item[i]); - if (ret < 0) - return -1; - } - } - - return 0; -} - -/****************************************************************************/ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -1096,11 +1082,6 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, ret = process_alerting(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.alerting); break; - case eH323_UU_PDU_h323_message_body_information: - ret = process_information(pskb, ct, ctinfo, data, dataoff, - &pdu->h323_message_body. - information); - break; case eH323_UU_PDU_h323_message_body_facility: ret = process_facility(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.facility); diff --git a/net/netfilter/nf_conntrack_h323_types.c b/net/netfilter/nf_conntrack_h323_types.c index 4c6f8b3b120..3a21fdf1a26 100644 --- a/net/netfilter/nf_conntrack_h323_types.c +++ b/net/netfilter/nf_conntrack_h323_types.c @@ -1,4 +1,4 @@ -/* Generated by Jing Min Zhao's ASN.1 parser, Apr 20 2006 +/* Generated by Jing Min Zhao's ASN.1 parser, May 16 2007 * * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> * @@ -37,7 +37,7 @@ static field_t _TransportAddress_ipxAddress[] = { /* SEQUENCE */ static field_t _TransportAddress_ip6Address[] = { /* SEQUENCE */ {FNAME("ip") OCTSTR, FIXD, 16, 0, DECODE, - offsetof(TransportAddress_ip6Address, ip6), NULL}, + offsetof(TransportAddress_ip6Address, ip), NULL}, {FNAME("port") INT, WORD, 0, 0, SKIP, 0, NULL}, }; @@ -67,7 +67,8 @@ static field_t _TransportAddress[] = { /* CHOICE */ {FNAME("ipxAddress") SEQ, 0, 3, 3, SKIP, 0, _TransportAddress_ipxAddress}, {FNAME("ip6Address") SEQ, 0, 2, 2, DECODE | EXT, - offsetof(TransportAddress, ip6Address), _TransportAddress_ip6Address}, + offsetof(TransportAddress, ip6Address), + _TransportAddress_ip6Address}, {FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, {FNAME("nsap") OCTSTR, 5, 1, 0, SKIP, 0, NULL}, {FNAME("nonStandardAddress") SEQ, 0, 2, 2, SKIP, 0, @@ -638,7 +639,8 @@ static field_t _UnicastAddress_iPXAddress[] = { /* SEQUENCE */ }; static field_t _UnicastAddress_iP6Address[] = { /* SEQUENCE */ - {FNAME("network") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, + {FNAME("network") OCTSTR, FIXD, 16, 0, DECODE, + offsetof(UnicastAddress_iP6Address, network), NULL}, {FNAME("tsapIdentifier") INT, WORD, 0, 0, SKIP, 0, NULL}, }; @@ -665,8 +667,8 @@ static field_t _UnicastAddress[] = { /* CHOICE */ offsetof(UnicastAddress, iPAddress), _UnicastAddress_iPAddress}, {FNAME("iPXAddress") SEQ, 0, 3, 3, SKIP | EXT, 0, _UnicastAddress_iPXAddress}, - {FNAME("iP6Address") SEQ, 0, 2, 2, SKIP | EXT, 0, - _UnicastAddress_iP6Address}, + {FNAME("iP6Address") SEQ, 0, 2, 2, DECODE | EXT, + offsetof(UnicastAddress, iP6Address), _UnicastAddress_iP6Address}, {FNAME("netBios") OCTSTR, FIXD, 16, 0, SKIP, 0, NULL}, {FNAME("iPSourceRouteAddress") SEQ, 0, 4, 4, SKIP | EXT, 0, _UnicastAddress_iPSourceRouteAddress}, @@ -984,19 +986,12 @@ static field_t _Alerting_UUIE[] = { /* SEQUENCE */ {FNAME("featureSet") SEQ, 3, 4, 4, SKIP | EXT | OPT, 0, NULL}, }; -static field_t _Information_UUIE_fastStart[] = { /* SEQUENCE OF */ - {FNAME("item") SEQ, 1, 3, 5, DECODE | OPEN | EXT, - sizeof(OpenLogicalChannel), _OpenLogicalChannel} - , -}; - static field_t _Information_UUIE[] = { /* SEQUENCE */ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, {FNAME("callIdentifier") SEQ, 0, 1, 1, SKIP | EXT, 0, NULL}, {FNAME("tokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, {FNAME("cryptoTokens") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, NULL}, - {FNAME("fastStart") SEQOF, SEMI, 0, 30, DECODE | OPT, - offsetof(Information_UUIE, fastStart), _Information_UUIE_fastStart}, + {FNAME("fastStart") SEQOF, SEMI, 0, 30, SKIP | OPT, 0, NULL}, {FNAME("fastConnectRefused") NUL, FIXD, 0, 0, SKIP | OPT, 0, NULL}, {FNAME("circuitInfo") SEQ, 3, 3, 3, SKIP | EXT | OPT, 0, NULL}, }; @@ -1343,9 +1338,7 @@ static field_t _H323_UU_PDU_h323_message_body[] = { /* CHOICE */ offsetof(H323_UU_PDU_h323_message_body, connect), _Connect_UUIE}, {FNAME("alerting") SEQ, 1, 3, 17, DECODE | EXT, offsetof(H323_UU_PDU_h323_message_body, alerting), _Alerting_UUIE}, - {FNAME("information") SEQ, 0, 1, 7, DECODE | EXT, - offsetof(H323_UU_PDU_h323_message_body, information), - _Information_UUIE}, + {FNAME("information") SEQ, 0, 1, 7, SKIP | EXT, 0, _Information_UUIE}, {FNAME("releaseComplete") SEQ, 1, 2, 11, SKIP | EXT, 0, _ReleaseComplete_UUIE}, {FNAME("facility") SEQ, 3, 5, 21, DECODE | EXT, @@ -1430,7 +1423,9 @@ static field_t _OpenLogicalChannelAck[] = { /* SEQUENCE */ DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck, reverseLogicalChannelParameters), _OpenLogicalChannelAck_reverseLogicalChannelParameters}, - {FNAME("separateStack") SEQ, 2, 4, 5, SKIP | EXT | OPT, 0, NULL}, + {FNAME("separateStack") SEQ, 2, 4, 5, DECODE | EXT | OPT, + offsetof(OpenLogicalChannelAck, separateStack), + _NetworkAccessParameters}, {FNAME("forwardMultiplexAckParameters") CHOICE, 0, 1, 1, DECODE | EXT | OPT, offsetof(OpenLogicalChannelAck, forwardMultiplexAckParameters), diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 0743be4434b..f868b7fbd9b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -93,7 +93,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, if (help && help->helper == me) { nf_conntrack_event(IPCT_HELPER, ct); - help->helper = NULL; + rcu_assign_pointer(help->helper, NULL); } return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index aa1a97ee514..d0fe3d76982 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -171,21 +171,29 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) { struct nfattr *nest_helper; const struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; - if (!help || !help->helper) + if (!help) return 0; + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (!helper) + goto out; + nest_helper = NFA_NEST(skb, CTA_HELP); - NFA_PUT(skb, CTA_HELP_NAME, strlen(help->helper->name), help->helper->name); + NFA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name); - if (help->helper->to_nfattr) - help->helper->to_nfattr(skb, ct); + if (helper->to_nfattr) + helper->to_nfattr(skb, ct); NFA_NEST_END(skb, nest_helper); - +out: + rcu_read_unlock(); return 0; nfattr_failure: + rcu_read_unlock(); return -1; } @@ -830,11 +838,6 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) char *helpname; int err; - if (!help) { - /* FIXME: we need to reallocate and rehash */ - return -EBUSY; - } - /* don't change helper of sibling connections */ if (ct->master) return -EINVAL; @@ -843,26 +846,34 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) if (err < 0) return err; - helper = __nf_conntrack_helper_find_byname(helpname); - if (!helper) { - if (!strcmp(helpname, "")) - helper = NULL; - else - return -EINVAL; - } - - if (help->helper) { - if (!helper) { + if (!strcmp(helpname, "")) { + if (help && help->helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); - help->helper = NULL; - } else { - /* need to zero data of old helper */ - memset(&help->help, 0, sizeof(help->help)); + rcu_assign_pointer(help->helper, NULL); } + + return 0; } - help->helper = helper; + if (!help) { + /* FIXME: we need to reallocate and rehash */ + return -EBUSY; + } + + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper == NULL) + return -EINVAL; + + if (help->helper == helper) + return 0; + + if (help->helper) + return -EBUSY; + + /* need to zero data of old helper */ + memset(&help->help, 0, sizeof(help->help)); + rcu_assign_pointer(help->helper, helper); return 0; } @@ -946,6 +957,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], struct nf_conn *ct; int err = -EINVAL; struct nf_conn_help *help; + struct nf_conntrack_helper *helper = NULL; ct = nf_conntrack_alloc(otuple, rtuple); if (ct == NULL || IS_ERR(ct)) @@ -976,14 +988,17 @@ ctnetlink_create_conntrack(struct nfattr *cda[], #endif help = nfct_help(ct); - if (help) - help->helper = nf_ct_helper_find_get(rtuple); + if (help) { + helper = nf_ct_helper_find_get(rtuple); + /* not in hash table yet so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); + } add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); - if (help && help->helper) - nf_ct_helper_put(help->helper); + if (helper) + nf_ct_helper_put(helper); return 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 5434472420f..339c397d1b5 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -100,7 +100,6 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conn_help *help = nfct_help(ct); struct nf_ct_gre_keymap **kmp, *km; - BUG_ON(strcmp(help->helper->name, "pptp")); kmp = &help->help.ct_pptp_info.keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ @@ -137,7 +136,6 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) enum ip_conntrack_dir dir; DEBUGP("entering for ct %p\n", ct); - BUG_ON(strcmp(help->helper->name, "pptp")); write_lock_bh(&nf_ct_gre_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 7aaa8c91b29..1b5c6c1055f 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -442,6 +442,9 @@ static int sip_help(struct sk_buff **pskb, /* RTP info only in some SDP pkts */ if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 && + memcmp(dptr, "UPDATE", sizeof("UPDATE") - 1) != 0 && + memcmp(dptr, "SIP/2.0 180", sizeof("SIP/2.0 180") - 1) != 0 && + memcmp(dptr, "SIP/2.0 183", sizeof("SIP/2.0 183") - 1) != 0 && memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) { goto out; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index f4ea8fe07a5..189ded5f378 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -134,12 +134,66 @@ static void destroy(const struct xt_match *match, void *matchinfo) nf_ct_l3proto_module_put(match->family); } +#ifdef CONFIG_COMPAT +struct compat_xt_conntrack_info +{ + compat_uint_t statemask; + compat_uint_t statusmask; + struct ip_conntrack_old_tuple tuple[IP_CT_DIR_MAX]; + struct in_addr sipmsk[IP_CT_DIR_MAX]; + struct in_addr dipmsk[IP_CT_DIR_MAX]; + compat_ulong_t expires_min; + compat_ulong_t expires_max; + u_int8_t flags; + u_int8_t invflags; +}; + +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_conntrack_info *cm = src; + struct xt_conntrack_info m = { + .statemask = cm->statemask, + .statusmask = cm->statusmask, + .expires_min = cm->expires_min, + .expires_max = cm->expires_max, + .flags = cm->flags, + .invflags = cm->invflags, + }; + memcpy(m.tuple, cm->tuple, sizeof(m.tuple)); + memcpy(m.sipmsk, cm->sipmsk, sizeof(m.sipmsk)); + memcpy(m.dipmsk, cm->dipmsk, sizeof(m.dipmsk)); + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_conntrack_info *m = src; + struct compat_xt_conntrack_info cm = { + .statemask = m->statemask, + .statusmask = m->statusmask, + .expires_min = m->expires_min, + .expires_max = m->expires_max, + .flags = m->flags, + .invflags = m->invflags, + }; + memcpy(cm.tuple, m->tuple, sizeof(cm.tuple)); + memcpy(cm.sipmsk, m->sipmsk, sizeof(cm.sipmsk)); + memcpy(cm.dipmsk, m->dipmsk, sizeof(cm.dipmsk)); + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif + static struct xt_match conntrack_match = { .name = "conntrack", .match = match, .checkentry = checkentry, .destroy = destroy, .matchsize = sizeof(struct xt_conntrack_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_conntrack_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .family = AF_INET, .me = THIS_MODULE, }; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 07e47dbcb0a..24b660f16ce 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -59,7 +59,7 @@ static struct genl_family netlbl_cipsov4_gnl_family = { }; /* NetLabel Netlink attribute policy */ -static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { +static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 }, [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 }, diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index f2535e7f286..b165712aaa7 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -246,19 +246,18 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, /** * netlbl_socket_setattr - Label a socket using the correct protocol - * @sock: the socket to label + * @sk: the socket to label * @secattr: the security attributes * * Description: * Attach the correct label to the given socket using the security attributes - * specified in @secattr. This function requires exclusive access to - * @sock->sk, which means it either needs to be in the process of being - * created or locked via lock_sock(sock->sk). Returns zero on success, - * negative values on failure. + * specified in @secattr. This function requires exclusive access to @sk, + * which means it either needs to be in the process of being created or locked. + * Returns zero on success, negative values on failure. * */ -int netlbl_socket_setattr(const struct socket *sock, - const struct netlbl_lsm_secattr *secattr) +int netlbl_sock_setattr(struct sock *sk, + const struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOENT; struct netlbl_dom_map *dom_entry; @@ -269,9 +268,9 @@ int netlbl_socket_setattr(const struct socket *sock, goto socket_setattr_return; switch (dom_entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_socket_setattr(sock, - dom_entry->type_def.cipsov4, - secattr); + ret_val = cipso_v4_sock_setattr(sk, + dom_entry->type_def.cipsov4, + secattr); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -309,30 +308,6 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** - * netlbl_socket_getattr - Determine the security attributes of a socket - * @sock: the socket - * @secattr: the security attributes - * - * Description: - * Examines the given socket to see any NetLabel style labeling has been - * applied to the socket, if so it parses the socket label and returns the - * security attributes in @secattr. Returns zero on success, negative values - * on failure. - * - */ -int netlbl_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val; - - ret_val = cipso_v4_socket_getattr(sock, secattr); - if (ret_val == 0) - return 0; - - return netlbl_unlabel_getattr(secattr); -} - -/** * netlbl_skbuff_getattr - Determine the security attributes of a packet * @skb: the packet * @secattr: the security attributes diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index e8c80f33f3d..e00fc219c72 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -59,7 +59,7 @@ static struct genl_family netlbl_mgmt_gnl_family = { }; /* NetLabel Netlink attribute policy */ -static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { +static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING }, [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 }, [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 }, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index b931edee4b8..5c303c68af1 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -61,7 +61,7 @@ static struct genl_family netlbl_unlabel_gnl_family = { }; /* NetLabel Netlink attribute policy */ -static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { +static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 507828d7d4a..1f15821c8da 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -45,7 +45,6 @@ #include <linux/rtnetlink.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/smp_lock.h> #include <linux/notifier.h> #include <linux/security.h> #include <linux/jhash.h> diff --git a/net/netlink/attr.c b/net/netlink/attr.c index df5f820a4c3..c591212793e 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -24,9 +24,9 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { }; static int validate_nla(struct nlattr *nla, int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { - struct nla_policy *pt; + const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla); if (nla->nla_type <= 0 || nla->nla_type > maxtype) @@ -99,7 +99,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, * Returns 0 on success or a negative error code. */ int nla_validate(struct nlattr *head, int len, int maxtype, - struct nla_policy *policy) + const struct nla_policy *policy) { struct nlattr *nla; int rem, err; @@ -130,7 +130,7 @@ errout: * Returns 0 on success or a negative error code. */ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - struct nla_policy *policy) + const struct nla_policy *policy) { struct nlattr *nla; int rem, err; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6e31234a419..b9ab62f938d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -472,7 +472,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, return skb; } -static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, .len = GENL_NAMSIZ - 1 }, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 02e401cd683..f8b83014ccc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -83,22 +83,6 @@ #include <net/inet_common.h> #endif -#define CONFIG_SOCK_PACKET 1 - -/* - Proposed replacement for SIOC{ADD,DEL}MULTI and - IFF_PROMISC, IFF_ALLMULTI flags. - - It is more expensive, but I believe, - it is really correct solution: reentereble, safe and fault tolerant. - - IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping - reference count and global flag, so that real status is - (gflag|(count != 0)), so that we can use obsolete faulty interface - not harming clever users. - */ -#define CONFIG_PACKET_MULTICAST 1 - /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -159,7 +143,6 @@ static atomic_t packet_socks_nr; /* Private packet socket structures. */ -#ifdef CONFIG_PACKET_MULTICAST struct packet_mclist { struct packet_mclist *next; @@ -179,7 +162,7 @@ struct packet_mreq_max unsigned short mr_alen; unsigned char mr_address[MAX_ADDR_LEN]; }; -#endif + #ifdef CONFIG_PACKET_MMAP static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing); #endif @@ -205,9 +188,7 @@ struct packet_sock { origdev:1; int ifindex; /* bound device */ __be16 num; -#ifdef CONFIG_PACKET_MULTICAST struct packet_mclist *mclist; -#endif #ifdef CONFIG_PACKET_MMAP atomic_t mapped; unsigned int pg_vec_order; @@ -263,7 +244,6 @@ static void packet_sock_destruct(struct sock *sk) static const struct proto_ops packet_ops; -#ifdef CONFIG_SOCK_PACKET static const struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) @@ -435,7 +415,6 @@ out_unlock: dev_put(dev); return err; } -#endif static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, unsigned int res) @@ -851,9 +830,7 @@ static int packet_release(struct socket *sock) __sock_put(sk); } -#ifdef CONFIG_PACKET_MULTICAST packet_flush_mclist(sk); -#endif #ifdef CONFIG_PACKET_MMAP if (po->pg_vec) { @@ -936,8 +913,6 @@ out_unlock: * Bind a packet socket to a device */ -#ifdef CONFIG_SOCK_PACKET - static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk=sock->sk; @@ -960,7 +935,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add } return err; } -#endif static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -1012,11 +986,8 @@ static int packet_create(struct socket *sock, int protocol) if (!capable(CAP_NET_RAW)) return -EPERM; - if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW -#ifdef CONFIG_SOCK_PACKET - && sock->type != SOCK_PACKET -#endif - ) + if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && + sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; @@ -1027,10 +998,9 @@ static int packet_create(struct socket *sock, int protocol) goto out; sock->ops = &packet_ops; -#ifdef CONFIG_SOCK_PACKET if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; -#endif + sock_init_data(sock, sk); po = pkt_sk(sk); @@ -1046,10 +1016,10 @@ static int packet_create(struct socket *sock, int protocol) spin_lock_init(&po->bind_lock); po->prot_hook.func = packet_rcv; -#ifdef CONFIG_SOCK_PACKET + if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; -#endif + po->prot_hook.af_packet_priv = sk; if (proto) { @@ -1169,7 +1139,6 @@ out: return err; } -#ifdef CONFIG_SOCK_PACKET static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { @@ -1190,7 +1159,6 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return 0; } -#endif static int packet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) @@ -1221,7 +1189,6 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, return 0; } -#ifdef CONFIG_PACKET_MULTICAST static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) { switch (i->type) { @@ -1349,7 +1316,6 @@ static void packet_flush_mclist(struct sock *sk) } rtnl_unlock(); } -#endif static int packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) @@ -1362,7 +1328,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -ENOPROTOOPT; switch(optname) { -#ifdef CONFIG_PACKET_MULTICAST case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { @@ -1383,7 +1348,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ret = packet_mc_drop(sk, &mreq); return ret; } -#endif + #ifdef CONFIG_PACKET_MMAP case PACKET_RX_RING: { @@ -1506,11 +1471,10 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void switch (msg) { case NETDEV_UNREGISTER: -#ifdef CONFIG_PACKET_MULTICAST if (po->mclist) packet_dev_mclist(dev, po->mclist, -1); - // fallthrough -#endif + /* fallthrough */ + case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); @@ -1856,7 +1820,6 @@ out: #endif -#ifdef CONFIG_SOCK_PACKET static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, @@ -1877,7 +1840,6 @@ static const struct proto_ops packet_ops_spkt = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; -#endif static const struct proto_ops packet_ops = { .family = PF_PACKET, diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index a973603e388..f3986d498b4 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -296,7 +296,7 @@ struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type) struct device *dev; rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL); - if (rfkill) + if (!rfkill) return NULL; mutex_init(&rfkill->mutex); diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 91b3d52f6f1..e662f1d0766 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -4,7 +4,7 @@ config AF_RXRPC tristate "RxRPC session sockets" - depends on EXPERIMENTAL + depends on INET && EXPERIMENTAL select KEYS help Say Y or M here to include support for RxRPC session sockets (just diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c index 4d92d88ff1f..3c04b00dab7 100644 --- a/net/rxrpc/ar-call.c +++ b/net/rxrpc/ar-call.c @@ -15,6 +15,25 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" +const char *rxrpc_call_states[] = { + [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", + [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", + [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", + [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", + [RXRPC_CALL_SERVER_SECURING] = "SvSecure", + [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", + [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", + [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", + [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", + [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", + [RXRPC_CALL_COMPLETE] = "Complete", + [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", + [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", + [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CALL_NETWORK_ERROR] = "NetError", + [RXRPC_CALL_DEAD] = "Dead ", +}; + struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 43cb3e051ec..482750efc23 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -211,7 +211,7 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn->header_size = sizeof(struct rxrpc_header); } - _leave(" = %p{%d}", conn, conn->debug_id); + _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); return conn; } diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 591c4422205..cc9102c5b58 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -640,6 +640,7 @@ static int rxrpc_send_data(struct kiocb *iocb, goto efault; sp->remain -= copy; skb->mark += copy; + copied += copy; len -= copy; segment -= copy; @@ -709,6 +710,8 @@ static int rxrpc_send_data(struct kiocb *iocb, } while (segment > 0); +success: + ret = copied; out: call->tx_pending = skb; _leave(" = %d", ret); @@ -725,7 +728,7 @@ call_aborted: maybe_error: if (copied) - ret = copied; + goto success; goto out; efault: diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index ce08b78647c..90fa107a8af 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -59,14 +59,14 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) ret = ip_route_output_key(&rt, &fl); if (ret < 0) { - kleave(" [route err %d]", ret); + _leave(" [route err %d]", ret); return; } peer->if_mtu = dst_mtu(&rt->u.dst); dst_release(&rt->u.dst); - kleave(" [if_mtu %u]", peer->if_mtu); + _leave(" [if_mtu %u]", peer->if_mtu); } /* diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c index 58f4b4e5cec..1c0be0e77b1 100644 --- a/net/rxrpc/ar-proc.c +++ b/net/rxrpc/ar-proc.c @@ -25,25 +25,6 @@ static const char *rxrpc_conn_states[] = { [RXRPC_CONN_NETWORK_ERROR] = "NetError", }; -const char *rxrpc_call_states[] = { - [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", - [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", - [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", - [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", - [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", - [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", - [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", - [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", - [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", - [RXRPC_CALL_COMPLETE] = "Complete", - [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", - [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", - [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", - [RXRPC_CALL_NETWORK_ERROR] = "NetError", - [RXRPC_CALL_DEAD] = "Dead ", -}; - /* * generate a list of extant and dead calls in /proc/net/rxrpc_calls */ diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 45b3cda86a2..6f8684b5617 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -164,8 +164,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, printk("offset must be on 32 bit boundaries\n"); goto bad; } - if (skb->len < 0 || - (offset > 0 && offset > skb->len)) { + if (offset > 0 && offset > skb->len) { printk("offset %d cant exceed pkt length %d\n", offset, skb->len); goto bad; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index be7d299acd7..d1c383fca82 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -599,6 +599,7 @@ static void atm_tc_destroy(struct Qdisc *sch) /* races ? */ while ((flow = p->flows)) { tcf_destroy_chain(flow->filter_list); + flow->filter_list = NULL; if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index a294542cb8e..ee2d5967d10 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1748,10 +1748,12 @@ cbq_destroy(struct Qdisc* sch) * classes from root to leafs which means that filters can still * be bound to classes which have been destroyed already. --TGR '04 */ - for (h = 0; h < 16; h++) - for (cl = q->classes[h]; cl; cl = cl->next) + for (h = 0; h < 16; h++) { + for (cl = q->classes[h]; cl; cl = cl->next) { tcf_destroy_chain(cl->filter_list); - + cl->filter_list = NULL; + } + } for (h = 0; h < 16; h++) { struct cbq_class *next; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3385ee59254..f4d34480a09 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -71,12 +71,9 @@ void qdisc_unlock_tree(struct net_device *dev) /* Kick device. - Note, that this procedure can be called by a watchdog timer, so that - we do not check dev->tbusy flag here. - Returns: 0 - queue is empty. - >0 - queue is not empty, but throttled. - <0 - queue is not empty. Device is throttled, if dev->tbusy != 0. + Returns: 0 - queue is empty or throttled. + >0 - queue is not empty. NOTE: Called under dev->queue_lock with locally disabled BH. */ @@ -115,7 +112,7 @@ static inline int qdisc_restart(struct net_device *dev) kfree_skb(skb); if (net_ratelimit()) printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - return -1; + goto out; } __get_cpu_var(netdev_rx_stat).cpu_collision++; goto requeue; @@ -135,10 +132,12 @@ static inline int qdisc_restart(struct net_device *dev) netif_tx_unlock(dev); } spin_lock(&dev->queue_lock); - return -1; + q = dev->qdisc; + goto out; } if (ret == NETDEV_TX_LOCKED && nolock) { spin_lock(&dev->queue_lock); + q = dev->qdisc; goto collision; } } @@ -163,26 +162,28 @@ static inline int qdisc_restart(struct net_device *dev) */ requeue: - if (skb->next) + if (unlikely(q == &noop_qdisc)) + kfree_skb(skb); + else if (skb->next) dev->gso_skb = skb; else q->ops->requeue(skb, q); netif_schedule(dev); - return 1; } + return 0; + +out: BUG_ON((int) q->q.qlen < 0); return q->q.qlen; } void __qdisc_run(struct net_device *dev) { - if (unlikely(dev->qdisc == &noop_qdisc)) - goto out; + do { + if (!qdisc_restart(dev)) + break; + } while (!netif_queue_stopped(dev)); - while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) - /* NOTHING */; - -out: clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); } @@ -223,7 +224,8 @@ void __netdev_watchdog_up(struct net_device *dev) if (dev->tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; - if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + dev->watchdog_timeo))) dev_hold(dev); } } @@ -544,6 +546,7 @@ void dev_activate(struct net_device *dev) void dev_deactivate(struct net_device *dev) { struct Qdisc *qdisc; + struct sk_buff *skb; spin_lock_bh(&dev->queue_lock); qdisc = dev->qdisc; @@ -551,8 +554,12 @@ void dev_deactivate(struct net_device *dev) qdisc_reset(qdisc); + skb = dev->gso_skb; + dev->gso_skb = NULL; spin_unlock_bh(&dev->queue_lock); + kfree_skb(skb); + dev_watchdog_down(dev); /* Wait for outstanding dev_queue_xmit calls. */ @@ -561,11 +568,6 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) yield(); - - if (dev->gso_skb) { - kfree_skb(dev->gso_skb); - dev->gso_skb = NULL; - } } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 99bcec8dd04..035788c5b7f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -976,8 +976,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (q->now >= q->near_ev_cache[level]) { event = htb_do_events(q, level); - q->near_ev_cache[level] = event ? event : - PSCHED_TICKS_PER_SEC; + if (!event) + event = q->now + PSCHED_TICKS_PER_SEC; + q->near_ev_cache[level] = event; } else event = q->near_ev_cache[level]; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 269a6e17c6c..6d7542c26e4 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -75,7 +75,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) band = res.classid; } band = TC_H_MIN(band) - 1; - if (band > q->bands) + if (band >= q->bands) return q->queues[q->prio2band[0]]; return q->queues[band]; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index d24914db786..f05ad9a30b4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -94,14 +94,13 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) struct net_device *dev = sch->dev; struct teql_sched_data *q = qdisc_priv(sch); - __skb_queue_tail(&q->q, skb); - if (q->q.qlen <= dev->tx_queue_len) { + if (q->q.qlen < dev->tx_queue_len) { + __skb_queue_tail(&q->q, skb); sch->bstats.bytes += skb->len; sch->bstats.packets++; return 0; } - __skb_unlink(skb, &q->q); kfree_skb(skb); sch->qstats.drops++; return NET_XMIT_DROP; diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 9cba49e2ad4..8210f549c49 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -2,11 +2,9 @@ # SCTP configuration # -menu "SCTP Configuration (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - -config IP_SCTP +menuconfig IP_SCTP tristate "The SCTP Protocol (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL depends on IPV6 || IPV6=n select CRYPTO if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5 select CRYPTO_HMAC if SCTP_HMAC_SHA1 || SCTP_HMAC_MD5 @@ -36,9 +34,10 @@ config IP_SCTP If in doubt, say N. +if IP_SCTP + config SCTP_DBG_MSG bool "SCTP: Debug messages" - depends on IP_SCTP help If you say Y, this will enable verbose debugging messages. @@ -47,7 +46,6 @@ config SCTP_DBG_MSG config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" - depends on IP_SCTP help If you say Y, this will enable debugging support for counting the type of objects that are currently allocated. This is useful for @@ -59,7 +57,6 @@ config SCTP_DBG_OBJCNT choice prompt "SCTP: Cookie HMAC Algorithm" - depends on IP_SCTP default SCTP_HMAC_MD5 help HMAC algorithm to be used during association initialization. It @@ -86,4 +83,5 @@ config SCTP_HMAC_MD5 advised to use either HMAC-MD5 or HMAC-SHA1. endchoice -endmenu + +endif # IP_SCTP diff --git a/net/sctp/associola.c b/net/sctp/associola.c index df94e3cdfba..498edb0cd4e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1231,6 +1231,10 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); + if (t->pmtu_pending && t->dst) { + sctp_transport_update_pmtu(t, dst_mtu(t->dst)); + t->pmtu_pending = 0; + } if (!pmtu || (t->pathmtu < pmtu)) pmtu = t->pathmtu; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 83ef411772f..77fb7b06a9c 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -3,7 +3,7 @@ * * This file is part of the SCTP kernel reference Implementation * - * This file contains the code relating the the chunk abstraction. + * This file contains the code relating the chunk abstraction. * * The SCTP reference implementation is free software; * you can redistribute it and/or modify it under the terms of diff --git a/net/sctp/debug.c b/net/sctp/debug.c index e8c0f7435d7..80f70aa5338 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -77,8 +77,6 @@ static const char *sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { /* Lookup "chunk type" debug name. */ const char *sctp_cname(const sctp_subtype_t cid) { - if (cid.chunk < 0) - return "illegal chunk id"; if (cid.chunk <= SCTP_CID_BASE_MAX) return sctp_cid_tbl[cid.chunk]; @@ -146,8 +144,6 @@ static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { /* Lookup primitive debug name. */ const char *sctp_pname(const sctp_subtype_t id) { - if (id.primitive < 0) - return "illegal primitive"; if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX) return sctp_primitive_tbl[id.primitive]; return "unknown_primitive"; @@ -161,8 +157,6 @@ static const char *sctp_other_tbl[] = { /* Lookup "other" debug name. */ const char *sctp_oname(const sctp_subtype_t id) { - if (id.other < 0) - return "illegal 'other' event"; if (id.other <= SCTP_EVENT_OTHER_MAX) return sctp_other_tbl[id.other]; return "unknown 'other' event"; @@ -184,8 +178,6 @@ static const char *sctp_timer_tbl[] = { /* Lookup timer debug name. */ const char *sctp_tname(const sctp_subtype_t id) { - if (id.timeout < 0) - return "illegal 'timer' event"; if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX) return sctp_timer_tbl[id.timeout]; return "unknown_timer"; diff --git a/net/sctp/input.c b/net/sctp/input.c index 885109fb3dd..d57ff7f3c57 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -367,24 +367,18 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu)) + if (!t || (t->pathmtu == pmtu)) return; + if (sock_owned_by_user(sk)) { + asoc->pmtu_pending = 1; + t->pmtu_pending = 1; + return; + } + if (t->param_flags & SPP_PMTUD_ENABLE) { - if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", - __FUNCTION__, pmtu, - SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - t->param_flags = (t->param_flags & ~SPP_PMTUD) | - SPP_PMTUD_DISABLE; - } else { - t->pathmtu = pmtu; - } + /* Update transports view of the MTU */ + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 523071c7902..70a91ece3c4 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -960,7 +960,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, if (state > SCTP_STATE_MAX) return &bug; - if (cid >= 0 && cid <= SCTP_CID_BASE_MAX) + if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; if (sctp_prsctp_enable) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9f1a908776d..67861a8f00c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -333,12 +333,19 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!sp->pf->bind_verify(sp, addr)) return -EADDRNOTAVAIL; - /* We must either be unbound, or bind to the same port. */ - if (bp->port && (snum != bp->port)) { - SCTP_DEBUG_PRINTK("sctp_do_bind:" + /* We must either be unbound, or bind to the same port. + * It's OK to allow 0 ports if we are already bound. + * We'll just inhert an already bound port in this case + */ + if (bp->port) { + if (!snum) + snum = bp->port; + else if (snum != bp->port) { + SCTP_DEBUG_PRINTK("sctp_do_bind:" " New port %d does not match existing port " "%d.\n", snum, bp->port); - return -EINVAL; + return -EINVAL; + } } if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) @@ -1655,6 +1662,9 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_free; } + if (asoc->pmtu_pending) + sctp_assoc_pending_pmtu(asoc); + /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D * does not specify what this error is, but this looks like @@ -2586,7 +2596,7 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int opt * * 7.1.2 SCTP_ASSOCINFO * - * This option is used to tune the the maximum retransmission attempts + * This option is used to tune the maximum retransmission attempts * of the association. * Returns an error if the new association retransmission value is * greater than the sum of the retransmission value of the peer. @@ -3365,12 +3375,13 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, sctp_assoc_t associd; int retval = 0; - if (len != sizeof(status)) { + if (len < sizeof(status)) { retval = -EINVAL; goto out; } - if (copy_from_user(&status, optval, sizeof(status))) { + len = sizeof(status); + if (copy_from_user(&status, optval, len)) { retval = -EFAULT; goto out; } @@ -3442,12 +3453,13 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, struct sctp_transport *transport; int retval = 0; - if (len != sizeof(pinfo)) { + if (len < sizeof(pinfo)) { retval = -EINVAL; goto out; } - if (copy_from_user(&pinfo, optval, sizeof(pinfo))) { + len = sizeof(pinfo); + if (copy_from_user(&pinfo, optval, len)) { retval = -EFAULT; goto out; } @@ -3513,8 +3525,11 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len != sizeof(struct sctp_event_subscribe)) + if (len < sizeof(struct sctp_event_subscribe)) return -EINVAL; + len = sizeof(struct sctp_event_subscribe); + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len)) return -EFAULT; return 0; @@ -3536,9 +3551,12 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) return -EOPNOTSUPP; - if (len != sizeof(int)) + if (len < sizeof(int)) return -EINVAL; - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) + len = sizeof(int); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) return -EFAULT; return 0; } @@ -3550,6 +3568,7 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, struct sock *sk = asoc->base.sk; struct socket *sock; struct inet_sock *inetsk; + struct sctp_af *af; int err = 0; /* An association cannot be branched off from an already peeled-off @@ -3571,8 +3590,9 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, /* Make peeled-off sockets more like 1-1 accepted sockets. * Set the daddr and initialize id to something more random */ + af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family); + af->to_sk_daddr(&asoc->peer.primary_addr, sk); inetsk = inet_sk(sock->sk); - inetsk->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; inetsk->id = asoc->next_tsn ^ jiffies; *sockp = sock; @@ -3587,8 +3607,9 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval int retval = 0; struct sctp_association *asoc; - if (len != sizeof(sctp_peeloff_arg_t)) + if (len < sizeof(sctp_peeloff_arg_t)) return -EINVAL; + len = sizeof(sctp_peeloff_arg_t); if (copy_from_user(&peeloff, optval, len)) return -EFAULT; @@ -3616,6 +3637,8 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval /* Return the fd mapped to the new socket. */ peeloff.sd = retval; + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &peeloff, len)) retval = -EFAULT; @@ -3724,9 +3747,9 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_paddrparams)) + if (len < sizeof(struct sctp_paddrparams)) return -EINVAL; - + len = sizeof(struct sctp_paddrparams); if (copy_from_user(¶ms, optval, len)) return -EFAULT; @@ -3825,9 +3848,11 @@ static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_assoc_value)) + if (len < sizeof(struct sctp_assoc_value)) return - EINVAL; + len = sizeof(struct sctp_assoc_value); + if (copy_from_user(¶ms, optval, len)) return -EFAULT; @@ -3876,8 +3901,11 @@ static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, */ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len != sizeof(struct sctp_initmsg)) + if (len < sizeof(struct sctp_initmsg)) return -EINVAL; + len = sizeof(struct sctp_initmsg); + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &sctp_sk(sk)->initmsg, len)) return -EFAULT; return 0; @@ -3892,7 +3920,7 @@ static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, struct list_head *pos; int cnt = 0; - if (len != sizeof(sctp_assoc_t)) + if (len < sizeof(sctp_assoc_t)) return -EINVAL; if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) @@ -3928,10 +3956,12 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, struct sctp_sock *sp = sctp_sk(sk); int addrlen; - if (len != sizeof(struct sctp_getaddrs_old)) + if (len < sizeof(struct sctp_getaddrs_old)) return -EINVAL; - if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old))) + len = sizeof(struct sctp_getaddrs_old); + + if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; if (getaddrs.addr_num <= 0) return -EINVAL; @@ -3954,7 +3984,9 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, if (cnt >= getaddrs.addr_num) break; } getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &getaddrs, len)) return -EFAULT; return 0; @@ -3987,8 +4019,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, return -EINVAL; to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - sizeof(struct sctp_getaddrs) - - offsetof(struct sctp_getaddrs,addrs); + space_left = len - offsetof(struct sctp_getaddrs,addrs); list_for_each(pos, &asoc->peer.transport_addr_list) { from = list_entry(pos, struct sctp_transport, transports); @@ -4025,7 +4056,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, rwlock_t *addr_lock; int cnt = 0; - if (len != sizeof(sctp_assoc_t)) + if (len < sizeof(sctp_assoc_t)) return -EINVAL; if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) @@ -4164,12 +4195,14 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, rwlock_t *addr_lock; int err = 0; void *addrs; + void *buf; int bytes_copied = 0; - if (len != sizeof(struct sctp_getaddrs_old)) + if (len < sizeof(struct sctp_getaddrs_old)) return -EINVAL; - if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs_old))) + len = sizeof(struct sctp_getaddrs_old); + if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; if (getaddrs.addr_num <= 0) return -EINVAL; @@ -4217,13 +4250,14 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } } + buf = addrs; list_for_each(pos, &bp->address_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(addrs, &temp, addrlen); - to += addrlen; + memcpy(buf, &temp, addrlen); + buf += addrlen; bytes_copied += addrlen; cnt ++; if (cnt >= getaddrs.addr_num) break; @@ -4240,7 +4274,7 @@ copy_getaddrs: /* copy the leading structure back to user */ getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old))) + if (copy_to_user(optval, &getaddrs, len)) err = -EFAULT; error: @@ -4266,8 +4300,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, size_t space_left; int bytes_copied = 0; void *addrs; + void *buf; - if (len <= sizeof(struct sctp_getaddrs)) + if (len < sizeof(struct sctp_getaddrs)) return -EINVAL; if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs))) @@ -4291,8 +4326,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, } to = optval + offsetof(struct sctp_getaddrs,addrs); - space_left = len - sizeof(struct sctp_getaddrs) - - offsetof(struct sctp_getaddrs,addrs); + space_left = len - offsetof(struct sctp_getaddrs,addrs); + addrs = kmalloc(space_left, GFP_KERNEL); if (!addrs) return -ENOMEM; @@ -4316,6 +4351,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, } } + buf = addrs; list_for_each(pos, &bp->address_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); memcpy(&temp, &addr->a, sizeof(temp)); @@ -4325,8 +4361,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, err = -ENOMEM; /*fixme: right error?*/ goto error; } - memcpy(addrs, &temp, addrlen); - to += addrlen; + memcpy(buf, &temp, addrlen); + buf += addrlen; bytes_copied += addrlen; cnt ++; space_left -= addrlen; @@ -4339,11 +4375,12 @@ copy_getaddrs: err = -EFAULT; goto error; } - if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) - return -EFAULT; + if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num)) { + err = -EFAULT; + goto error; + } if (put_user(bytes_copied, optlen)) - return -EFAULT; - + err = -EFAULT; error: kfree(addrs); return err; @@ -4362,10 +4399,12 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_prim)) + if (len < sizeof(struct sctp_prim)) return -EINVAL; - if (copy_from_user(&prim, optval, sizeof(struct sctp_prim))) + len = sizeof(struct sctp_prim); + + if (copy_from_user(&prim, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, prim.ssp_assoc_id); @@ -4381,7 +4420,9 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, (union sctp_addr *)&prim.ssp_addr); - if (copy_to_user(optval, &prim, sizeof(struct sctp_prim))) + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &prim, len)) return -EFAULT; return 0; @@ -4398,10 +4439,15 @@ static int sctp_getsockopt_adaptation_layer(struct sock *sk, int len, { struct sctp_setadaptation adaptation; - if (len != sizeof(struct sctp_setadaptation)) + if (len < sizeof(struct sctp_setadaptation)) return -EINVAL; + len = sizeof(struct sctp_setadaptation); + adaptation.ssb_adaptation_ind = sctp_sk(sk)->adaptation_ind; + + if (put_user(len, optlen)) + return -EFAULT; if (copy_to_user(optval, &adaptation, len)) return -EFAULT; @@ -4435,9 +4481,12 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); - if (len != sizeof(struct sctp_sndrcvinfo)) + if (len < sizeof(struct sctp_sndrcvinfo)) return -EINVAL; - if (copy_from_user(&info, optval, sizeof(struct sctp_sndrcvinfo))) + + len = sizeof(struct sctp_sndrcvinfo); + + if (copy_from_user(&info, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); @@ -4458,7 +4507,9 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, info.sinfo_timetolive = sp->default_timetolive; } - if (copy_to_user(optval, &info, sizeof(struct sctp_sndrcvinfo))) + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) return -EFAULT; return 0; @@ -4509,10 +4560,12 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; - if (len != sizeof (struct sctp_rtoinfo)) + if (len < sizeof (struct sctp_rtoinfo)) return -EINVAL; - if (copy_from_user(&rtoinfo, optval, sizeof (struct sctp_rtoinfo))) + len = sizeof(struct sctp_rtoinfo); + + if (copy_from_user(&rtoinfo, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); @@ -4547,7 +4600,7 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, * * 7.1.2 SCTP_ASSOCINFO * - * This option is used to tune the the maximum retransmission attempts + * This option is used to tune the maximum retransmission attempts * of the association. * Returns an error if the new association retransmission value is * greater than the sum of the retransmission value of the peer. @@ -4564,11 +4617,12 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, struct list_head *pos; int cnt = 0; - if (len != sizeof (struct sctp_assocparams)) + if (len < sizeof (struct sctp_assocparams)) return -EINVAL; - if (copy_from_user(&assocparams, optval, - sizeof (struct sctp_assocparams))) + len = sizeof(struct sctp_assocparams); + + if (copy_from_user(&assocparams, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); @@ -4654,9 +4708,11 @@ static int sctp_getsockopt_context(struct sock *sk, int len, struct sctp_sock *sp; struct sctp_association *asoc; - if (len != sizeof(struct sctp_assoc_value)) + if (len < sizeof(struct sctp_assoc_value)) return -EINVAL; + len = sizeof(struct sctp_assoc_value); + if (copy_from_user(¶ms, optval, len)) return -EFAULT; @@ -5227,7 +5283,12 @@ int sctp_inet_listen(struct socket *sock, int backlog) /* Allocate HMAC for generating cookie. */ if (sctp_hmac_alg) { tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); - if (!tfm) { + if (IS_ERR(tfm)) { + if (net_ratelimit()) { + printk(KERN_INFO + "SCTP: failed to load transform for %s: %ld\n", + sctp_hmac_alg, PTR_ERR(tfm)); + } err = -ENOSYS; goto out; } @@ -6062,8 +6123,11 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * queued to the backlog. This prevents a potential race between * backlog processing on the old socket and new-packet processing * on the new socket. + * + * The caller has just allocated newsk so we can guarantee that other + * paths won't try to lock it and then oldsk. */ - sctp_lock_sock(newsk); + lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); sctp_assoc_migrate(assoc, newsk); /* If the association on the newsk is already closed before accept() diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 961df275d5b..5f467c914f8 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -241,6 +241,45 @@ void sctp_transport_pmtu(struct sctp_transport *transport) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } +/* this is a complete rip-off from __sk_dst_check + * the cookie is always 0 since this is how it's used in the + * pmtu code + */ +static struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) +{ + struct dst_entry *dst = t->dst; + + if (dst && dst->obsolete && dst->ops->check(dst, 0) == NULL) { + dst_release(t->dst); + t->dst = NULL; + return NULL; + } + + return dst; +} + +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +{ + struct dst_entry *dst; + + if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { + printk(KERN_WARNING "%s: Reported pmtu %d too low, " + "using default minimum of %d\n", + __FUNCTION__, pmtu, + SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment size and disable + * pmtu discovery on this transport. + */ + t->pathmtu = SCTP_DEFAULT_MINSEGMENT; + } else { + t->pathmtu = pmtu; + } + + dst = sctp_transport_dst_check(t); + if (dst) + dst->ops->update_pmtu(dst, pmtu); +} + /* Caches the dst entry and source address for a transport's destination * address. */ diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 661ea2dd78b..bfecb353ab3 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -141,11 +141,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( * an ABORT, so we need to include it in the sac_info. */ if (chunk) { - /* sctp_inqu_pop() has allready pulled off the chunk - * header. We need to put it back temporarily - */ - skb_push(chunk->skb, sizeof(sctp_chunkhdr_t)); - /* Copy the chunk data to a new skb and reserve enough * head room to use as notification. */ @@ -155,9 +150,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( if (!skb) goto fail; - /* put back the chunk header now that we have a copy */ - skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); - /* Embed the event fields inside the cloned skb. */ event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); @@ -168,7 +160,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( /* Trim the buffer to the right length. */ skb_trim(skb, sizeof(struct sctp_assoc_change) + - ntohs(chunk->chunk_hdr->length)); + ntohs(chunk->chunk_hdr->length) - + sizeof(sctp_chunkhdr_t)); } else { event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change), MSG_NOTIFICATION, gfp); diff --git a/net/socket.c b/net/socket.c index 759825b7ca2..f4530196a70 100644 --- a/net/socket.c +++ b/net/socket.c @@ -261,8 +261,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct socket_alloc *ei = (struct socket_alloc *)foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&ei->vfs_inode); + inode_init_once(&ei->vfs_inode); } static int init_inodecache(void) @@ -313,8 +312,19 @@ static int sockfs_delete_dentry(struct dentry *dentry) dentry->d_flags |= DCACHE_UNHASHED; return 0; } + +/* + * sockfs_dname() is called from d_path(). + */ +static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) +{ + return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", + dentry->d_inode->i_ino); +} + static struct dentry_operations sockfs_dentry_operations = { .d_delete = sockfs_delete_dentry, + .d_dname = sockfs_dname, }; /* @@ -354,14 +364,9 @@ static int sock_alloc_fd(struct file **filep) static int sock_attach_fd(struct socket *sock, struct file *file) { - struct qstr this; - char name[32]; - - this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); - this.name = name; - this.hash = 0; + struct qstr name = { .name = "" }; - file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); + file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); if (unlikely(!file->f_path.dentry)) return -ENOMEM; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index db298b501c8..099a983797d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -924,6 +924,7 @@ static inline int gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) { struct rsc *rsci; + int rc; if (rsip->major_status != GSS_S_COMPLETE) return gss_write_null_verf(rqstp); @@ -932,7 +933,9 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) rsip->major_status = GSS_S_NO_CONTEXT; return gss_write_null_verf(rqstp); } - return gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); + rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN); + cache_put(&rsci->h, &rsc_cache); + return rc; } /* @@ -1089,6 +1092,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } goto complete; case RPC_GSS_PROC_DESTROY: + if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) + goto auth_err; set_bit(CACHE_NEGATIVE, &rsci->h.flags); if (resv->iov_len + 4 > PAGE_SIZE) goto drop; @@ -1196,13 +1201,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) BUG(); - if (resbuf->page_len == 0 - && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE - < PAGE_SIZE) { - BUG_ON(resbuf->tail[0].iov_len); - /* Use head for everything */ - resv = &resbuf->head[0]; - } else if (resbuf->tail[0].iov_base == NULL) { + if (resbuf->tail[0].iov_base == NULL) { if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) goto out_err; resbuf->tail[0].iov_base = resbuf->head[0].iov_base diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ad39b47e05b..5887457dc93 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -828,23 +828,23 @@ init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct rpc_inode *rpci = (struct rpc_inode *) foo; - if (flags & SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&rpci->vfs_inode); - rpci->private = NULL; - rpci->nreaders = 0; - rpci->nwriters = 0; - INIT_LIST_HEAD(&rpci->in_upcall); - INIT_LIST_HEAD(&rpci->pipe); - rpci->pipelen = 0; - init_waitqueue_head(&rpci->waitq); - INIT_DELAYED_WORK(&rpci->queue_timeout, - rpc_timeout_upcall_queue); - rpci->ops = NULL; - } + inode_init_once(&rpci->vfs_inode); + rpci->private = NULL; + rpci->nreaders = 0; + rpci->nwriters = 0; + INIT_LIST_HEAD(&rpci->in_upcall); + INIT_LIST_HEAD(&rpci->pipe); + rpci->pipelen = 0; + init_waitqueue_head(&rpci->waitq); + INIT_DELAYED_WORK(&rpci->queue_timeout, + rpc_timeout_upcall_queue); + rpci->ops = NULL; } int register_rpc_pipefs(void) { + int err; + rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| @@ -852,7 +852,12 @@ int register_rpc_pipefs(void) init_once, NULL); if (!rpc_inode_cachep) return -ENOMEM; - register_filesystem(&rpc_pipe_fs_type); + err = register_filesystem(&rpc_pipe_fs_type); + if (err) { + kmem_cache_destroy(rpc_inode_cachep); + return err; + } + return 0; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4a53e94f813..944d75396fb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -736,6 +736,11 @@ static void rpc_async_schedule(struct work_struct *work) __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); } +struct rpc_buffer { + size_t len; + char data[]; +}; + /** * rpc_malloc - allocate an RPC buffer * @task: RPC task that will use this buffer @@ -754,18 +759,22 @@ static void rpc_async_schedule(struct work_struct *work) */ void *rpc_malloc(struct rpc_task *task, size_t size) { - size_t *buf; + struct rpc_buffer *buf; gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; - size += sizeof(size_t); + size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) buf = mempool_alloc(rpc_buffer_mempool, gfp); else buf = kmalloc(size, gfp); - *buf = size; - dprintk("RPC: %5u allocated buffer of size %u at %p\n", + + if (!buf) + return NULL; + + buf->len = size; + dprintk("RPC: %5u allocated buffer of size %zu at %p\n", task->tk_pid, size, buf); - return (void *) ++buf; + return &buf->data; } /** @@ -775,15 +784,18 @@ void *rpc_malloc(struct rpc_task *task, size_t size) */ void rpc_free(void *buffer) { - size_t size, *buf = (size_t *) buffer; + size_t size; + struct rpc_buffer *buf; if (!buffer) return; - size = *buf; - buf--; - dprintk("RPC: freeing buffer of size %u at %p\n", + buf = container_of(buffer, struct rpc_buffer, data); + size = buf->len; + + dprintk("RPC: freeing buffer of size %zu at %p\n", size, buf); + if (size <= RPC_BUFFER_MAXSIZE) mempool_free(buf, rpc_buffer_mempool); else @@ -977,8 +989,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) spin_unlock(&rpc_sched_lock); } -static DECLARE_MUTEX_LOCKED(rpciod_running); - static void rpciod_killall(void) { unsigned long flags; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 43ecf62f12e..73075dec83c 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -134,11 +134,7 @@ EXPORT_SYMBOL(nfsd_debug); EXPORT_SYMBOL(nlm_debug); #endif -extern int register_rpc_pipefs(void); -extern void unregister_rpc_pipefs(void); extern struct cache_detail ip_map_cache, unix_gid_cache; -extern int init_socket_xprt(void); -extern void cleanup_socket_xprt(void); static int __init init_sunrpc(void) @@ -146,9 +142,11 @@ init_sunrpc(void) int err = register_rpc_pipefs(); if (err) goto out; - err = rpc_init_mempool() != 0; - if (err) + err = rpc_init_mempool(); + if (err) { + unregister_rpc_pipefs(); goto out; + } #ifdef RPC_DEBUG rpc_register_sysctl(); #endif diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b7503c103ae..e673ef99390 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -907,7 +907,7 @@ svc_process(struct svc_rqst *rqstp) * better idea of reply size */ if (procp->pc_xdrressize) - svc_reserve(rqstp, procp->pc_xdrressize<<2); + svc_reserve_auth(rqstp, procp->pc_xdrressize<<2); /* Call the function that processes the request. */ if (!versp->vs_dispatch) { diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index f5c3808bf85..af7c5f05c6e 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -64,7 +64,7 @@ int svc_set_client(struct svc_rqst *rqstp) } /* A request, which was authenticated, has now executed. - * Time to finalise the the credentials and verifier + * Time to finalise the credentials and verifier * and release and resources */ int svc_authorise(struct svc_rqst *rqstp) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2bd23ea2aa8..07dcd20cbee 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -385,7 +385,7 @@ ip_map_cached_get(struct svc_rqst *rqstp) { struct ip_map *ipm; struct svc_sock *svsk = rqstp->rq_sock; - spin_lock_bh(&svsk->sk_defer_lock); + spin_lock(&svsk->sk_lock); ipm = svsk->sk_info_authunix; if (ipm != NULL) { if (!cache_valid(&ipm->h)) { @@ -395,13 +395,13 @@ ip_map_cached_get(struct svc_rqst *rqstp) * same IP address. */ svsk->sk_info_authunix = NULL; - spin_unlock_bh(&svsk->sk_defer_lock); + spin_unlock(&svsk->sk_lock); cache_put(&ipm->h, &ip_map_cache); return NULL; } cache_get(&ipm->h); } - spin_unlock_bh(&svsk->sk_defer_lock); + spin_unlock(&svsk->sk_lock); return ipm; } @@ -410,14 +410,14 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) { struct svc_sock *svsk = rqstp->rq_sock; - spin_lock_bh(&svsk->sk_defer_lock); + spin_lock(&svsk->sk_lock); if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL) { /* newly cached, keep the reference */ svsk->sk_info_authunix = ipm; ipm = NULL; } - spin_unlock_bh(&svsk->sk_defer_lock); + spin_unlock(&svsk->sk_lock); if (ipm) cache_put(&ipm->h, &ip_map_cache); } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 22f61aee482..5baf48de255 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -53,7 +53,8 @@ * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. * BKL protects svc_serv->sv_nrthread. - * svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list + * svc_sock->sk_lock protects the svc_sock->sk_deferred list + * and the ->sk_info_authunix cache. * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. * * Some flags can be set to certain values at any time @@ -787,15 +788,20 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) } clear_bit(SK_DATA, &svsk->sk_flags); - while ((err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, - 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 || - (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { - if (err == -EAGAIN) { - svc_sock_received(svsk); - return err; + skb = NULL; + err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, + 0, 0, MSG_PEEK | MSG_DONTWAIT); + if (err >= 0) + skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); + + if (skb == NULL) { + if (err != -EAGAIN) { + /* possibly an icmp error */ + dprintk("svc: recvfrom returned error %d\n", -err); + set_bit(SK_DATA, &svsk->sk_flags); } - /* possibly an icmp error */ - dprintk("svc: recvfrom returned error %d\n", -err); + svc_sock_received(svsk); + return -EAGAIN; } rqstp->rq_addrlen = sizeof(rqstp->rq_addr); if (skb->tstamp.tv64 == 0) { @@ -1633,7 +1639,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, svsk->sk_server = serv; atomic_set(&svsk->sk_inuse, 1); svsk->sk_lastrecv = get_seconds(); - spin_lock_init(&svsk->sk_defer_lock); + spin_lock_init(&svsk->sk_lock); INIT_LIST_HEAD(&svsk->sk_deferred); INIT_LIST_HEAD(&svsk->sk_ready); mutex_init(&svsk->sk_mutex); @@ -1857,9 +1863,9 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) dprintk("revisit queued\n"); svsk = dr->svsk; dr->svsk = NULL; - spin_lock_bh(&svsk->sk_defer_lock); + spin_lock(&svsk->sk_lock); list_add(&dr->handle.recent, &svsk->sk_deferred); - spin_unlock_bh(&svsk->sk_defer_lock); + spin_unlock(&svsk->sk_lock); set_bit(SK_DEFERRED, &svsk->sk_flags); svc_sock_enqueue(svsk); svc_sock_put(svsk); @@ -1925,7 +1931,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) return NULL; - spin_lock_bh(&svsk->sk_defer_lock); + spin_lock(&svsk->sk_lock); clear_bit(SK_DEFERRED, &svsk->sk_flags); if (!list_empty(&svsk->sk_deferred)) { dr = list_entry(svsk->sk_deferred.next, @@ -1934,6 +1940,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) list_del_init(&dr->handle.recent); set_bit(SK_DEFERRED, &svsk->sk_flags); } - spin_unlock_bh(&svsk->sk_defer_lock); + spin_unlock(&svsk->sk_lock); return dr; } diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index f9e367d946e..3b30d1130b6 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -2,11 +2,9 @@ # TIPC configuration # -menu "TIPC Configuration (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - -config TIPC +menuconfig TIPC tristate "The TIPC Protocol (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL ---help--- The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol @@ -22,9 +20,10 @@ config TIPC If in doubt, say N. +if TIPC + config TIPC_ADVANCED bool "TIPC: Advanced configuration" - depends on TIPC default n help Saying Y here will open some advanced configuration @@ -33,7 +32,7 @@ config TIPC_ADVANCED config TIPC_ZONES int "Maximum number of zones in network" - depends on TIPC && TIPC_ADVANCED + depends on TIPC_ADVANCED default "3" help Max number of zones inside TIPC network. Max supported value @@ -44,7 +43,7 @@ config TIPC_ZONES config TIPC_CLUSTERS int "Maximum number of clusters in a zone" - depends on TIPC && TIPC_ADVANCED + depends on TIPC_ADVANCED default "1" help ***Only 1 (one cluster in a zone) is supported by current code. @@ -59,7 +58,7 @@ config TIPC_CLUSTERS config TIPC_NODES int "Maximum number of nodes in cluster" - depends on TIPC && TIPC_ADVANCED + depends on TIPC_ADVANCED default "255" help Maximum number of nodes inside a TIPC cluster. Maximum @@ -70,7 +69,7 @@ config TIPC_NODES config TIPC_SLAVE_NODES int "Maximum number of slave nodes in cluster" - depends on TIPC && TIPC_ADVANCED + depends on TIPC_ADVANCED default "0" help ***This capability is not supported by current code.*** @@ -83,7 +82,7 @@ config TIPC_SLAVE_NODES config TIPC_PORTS int "Maximum number of ports in a node" - depends on TIPC && TIPC_ADVANCED + depends on TIPC_ADVANCED default "8191" help Maximum number of ports within a node. Maximum @@ -94,7 +93,7 @@ config TIPC_PORTS config TIPC_LOG int "Size of log buffer" - depends on TIPC && TIPC_ADVANCED + depends on TIPC_ADVANCED default 0 help Size (in bytes) of TIPC's internal log buffer, which records the @@ -106,7 +105,6 @@ config TIPC_LOG config TIPC_DEBUG bool "Enable debugging support" - depends on TIPC default n help This will enable debugging of TIPC. @@ -114,4 +112,4 @@ config TIPC_DEBUG Only say Y here if you are having trouble with TIPC. It will enable the display of detailed information about what is going on. -endmenu +endif # TIPC diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 0ee6ded18f3..77d2d9ce896 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -120,18 +120,20 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev, *pdev; + struct net_device *dev = NULL; + struct net_device *pdev = NULL; struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; /* Find device with specified name */ - dev = NULL; - for_each_netdev(pdev) - if (!strncmp(dev->name, driver_name, IFNAMSIZ)) { + + for_each_netdev(pdev){ + if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; break; } + } if (!dev) return -ENODEV; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 4cdafa2d1d4..6a7f7b4c259 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -60,7 +60,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid); + genlmsg_unicast(rep_buf, NETLINK_CB(skb).pid); } return 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aec8cf165e1..d70fa30d429 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -111,7 +111,6 @@ #include <net/scm.h> #include <linux/init.h> #include <linux/poll.h> -#include <linux/smp_lock.h> #include <linux/rtnetlink.h> #include <linux/mount.h> #include <net/checksum.h> @@ -175,11 +174,11 @@ static struct sock *unix_peer_get(struct sock *s) { struct sock *peer; - unix_state_rlock(s); + unix_state_lock(s); peer = unix_peer(s); if (peer) sock_hold(peer); - unix_state_runlock(s); + unix_state_unlock(s); return peer; } @@ -370,7 +369,7 @@ static int unix_release_sock (struct sock *sk, int embrion) unix_remove_socket(sk); /* Clear state */ - unix_state_wlock(sk); + unix_state_lock(sk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; dentry = u->dentry; @@ -379,7 +378,7 @@ static int unix_release_sock (struct sock *sk, int embrion) u->mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; - unix_state_wunlock(sk); + unix_state_unlock(sk); wake_up_interruptible_all(&u->peer_wait); @@ -387,12 +386,12 @@ static int unix_release_sock (struct sock *sk, int embrion) if (skpair!=NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { - unix_state_wlock(skpair); + unix_state_lock(skpair); /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) skpair->sk_err = ECONNRESET; - unix_state_wunlock(skpair); + unix_state_unlock(skpair); skpair->sk_state_change(skpair); read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair,1,POLL_HUP); @@ -449,7 +448,7 @@ static int unix_listen(struct socket *sock, int backlog) err = -EINVAL; if (!u->addr) goto out; /* No listens on an unbound socket */ - unix_state_wlock(sk); + unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; if (backlog > sk->sk_max_ack_backlog) @@ -463,7 +462,7 @@ static int unix_listen(struct socket *sock, int backlog) err = 0; out_unlock: - unix_state_wunlock(sk); + unix_state_unlock(sk); out: return err; } @@ -859,6 +858,31 @@ out_mknod_parent: goto out_up; } +static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_lock(sk1); + return; + } + if (sk1 < sk2) { + unix_state_lock(sk1); + unix_state_lock_nested(sk2); + } else { + unix_state_lock(sk2); + unix_state_lock_nested(sk1); + } +} + +static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_unlock(sk1); + return; + } + unix_state_unlock(sk1); + unix_state_unlock(sk2); +} + static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { @@ -878,11 +902,19 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; +restart: other=unix_find_other(sunaddr, alen, sock->type, hash, &err); if (!other) goto out; - unix_state_wlock(sk); + unix_state_double_lock(sk, other); + + /* Apparently VFS overslept socket death. Retry. */ + if (sock_flag(other, SOCK_DEAD)) { + unix_state_double_unlock(sk, other); + sock_put(other); + goto restart; + } err = -EPERM; if (!unix_may_send(sk, other)) @@ -897,7 +929,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, * 1003.1g breaking connected state with AF_UNSPEC */ other = NULL; - unix_state_wlock(sk); + unix_state_double_lock(sk, other); } /* @@ -906,19 +938,19 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk)=other; - unix_state_wunlock(sk); + unix_state_double_unlock(sk, other); if (other != old_peer) unix_dgram_disconnected(sk, old_peer); sock_put(old_peer); } else { unix_peer(sk)=other; - unix_state_wunlock(sk); + unix_state_double_unlock(sk, other); } return 0; out_unlock: - unix_state_wunlock(sk); + unix_state_double_unlock(sk, other); sock_put(other); out: return err; @@ -937,7 +969,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) (skb_queue_len(&other->sk_receive_queue) > other->sk_max_ack_backlog); - unix_state_runlock(other); + unix_state_unlock(other); if (sched) timeo = schedule_timeout(timeo); @@ -995,11 +1027,11 @@ restart: goto out; /* Latch state of peer */ - unix_state_rlock(other); + unix_state_lock(other); /* Apparently VFS overslept socket death. Retry. */ if (sock_flag(other, SOCK_DEAD)) { - unix_state_runlock(other); + unix_state_unlock(other); sock_put(other); goto restart; } @@ -1049,18 +1081,18 @@ restart: goto out_unlock; } - unix_state_wlock_nested(sk); + unix_state_lock_nested(sk); if (sk->sk_state != st) { - unix_state_wunlock(sk); - unix_state_runlock(other); + unix_state_unlock(sk); + unix_state_unlock(other); sock_put(other); goto restart; } err = security_unix_stream_connect(sock, other->sk_socket, newsk); if (err) { - unix_state_wunlock(sk); + unix_state_unlock(sk); goto out_unlock; } @@ -1097,7 +1129,7 @@ restart: smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ unix_peer(sk) = newsk; - unix_state_wunlock(sk); + unix_state_unlock(sk); /* take ten and and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); @@ -1106,14 +1138,14 @@ restart: * is installed to listening socket. */ atomic_inc(&newu->inflight); spin_unlock(&other->sk_receive_queue.lock); - unix_state_runlock(other); + unix_state_unlock(other); other->sk_data_ready(other, 0); sock_put(other); return 0; out_unlock: if (other) - unix_state_runlock(other); + unix_state_unlock(other); out: if (skb) @@ -1179,10 +1211,10 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) wake_up_interruptible(&unix_sk(sk)->peer_wait); /* attach accepted sock to socket */ - unix_state_wlock(tsk); + unix_state_lock(tsk); newsock->state = SS_CONNECTED; sock_graft(tsk, newsock); - unix_state_wunlock(tsk); + unix_state_unlock(tsk); return 0; out: @@ -1209,7 +1241,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ } u = unix_sk(sk); - unix_state_rlock(sk); + unix_state_lock(sk); if (!u->addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; @@ -1220,7 +1252,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ *uaddr_len = addr->len; memcpy(sunaddr, addr->name, *uaddr_len); } - unix_state_runlock(sk); + unix_state_unlock(sk); sock_put(sk); out: return err; @@ -1338,7 +1370,7 @@ restart: goto out_free; } - unix_state_rlock(other); + unix_state_lock(other); err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; @@ -1348,20 +1380,20 @@ restart: * Check with 1003.1g - what should * datagram error */ - unix_state_runlock(other); + unix_state_unlock(other); sock_put(other); err = 0; - unix_state_wlock(sk); + unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk)=NULL; - unix_state_wunlock(sk); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; } else { - unix_state_wunlock(sk); + unix_state_unlock(sk); } other = NULL; @@ -1398,14 +1430,14 @@ restart: } skb_queue_tail(&other->sk_receive_queue, skb); - unix_state_runlock(other); + unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); scm_destroy(siocb->scm); return len; out_unlock: - unix_state_runlock(other); + unix_state_unlock(other); out_free: kfree_skb(skb); out: @@ -1495,14 +1527,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out_err; } - unix_state_rlock(other); + unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); - unix_state_runlock(other); + unix_state_unlock(other); other->sk_data_ready(other, size); sent+=size; } @@ -1513,7 +1545,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, return sent; pipe_err_free: - unix_state_runlock(other); + unix_state_unlock(other); kfree_skb(skb); pipe_err: if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) @@ -1642,7 +1674,7 @@ static long unix_stream_data_wait(struct sock * sk, long timeo) { DEFINE_WAIT(wait); - unix_state_rlock(sk); + unix_state_lock(sk); for (;;) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); @@ -1655,14 +1687,14 @@ static long unix_stream_data_wait(struct sock * sk, long timeo) break; set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - unix_state_runlock(sk); + unix_state_unlock(sk); timeo = schedule_timeout(timeo); - unix_state_rlock(sk); + unix_state_lock(sk); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } finish_wait(sk->sk_sleep, &wait); - unix_state_runlock(sk); + unix_state_unlock(sk); return timeo; } @@ -1712,20 +1744,23 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, int chunk; struct sk_buff *skb; + unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb==NULL) { if (copied >= target) - break; + goto unlock; /* * POSIX 1003.1g mandates this order. */ if ((err = sock_error(sk)) != 0) - break; + goto unlock; if (sk->sk_shutdown & RCV_SHUTDOWN) - break; + goto unlock; + + unix_state_unlock(sk); err = -EAGAIN; if (!timeo) break; @@ -1739,7 +1774,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } mutex_lock(&u->readlock); continue; + unlock: + unix_state_unlock(sk); + break; } + unix_state_unlock(sk); if (check_creds) { /* Never glue messages from different writers */ @@ -1817,12 +1856,12 @@ static int unix_shutdown(struct socket *sock, int mode) mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); if (mode) { - unix_state_wlock(sk); + unix_state_lock(sk); sk->sk_shutdown |= mode; other=unix_peer(sk); if (other) sock_hold(other); - unix_state_wunlock(sk); + unix_state_unlock(sk); sk->sk_state_change(sk); if (other && @@ -1834,9 +1873,9 @@ static int unix_shutdown(struct socket *sock, int mode) peer_mode |= SEND_SHUTDOWN; if (mode&SEND_SHUTDOWN) peer_mode |= RCV_SHUTDOWN; - unix_state_wlock(other); + unix_state_lock(other); other->sk_shutdown |= peer_mode; - unix_state_wunlock(other); + unix_state_unlock(other); other->sk_state_change(other); read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) @@ -1974,7 +2013,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) else { struct sock *s = v; struct unix_sock *u = unix_sk(s); - unix_state_rlock(s); + unix_state_lock(s); seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu", s, @@ -2002,7 +2041,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) for ( ; i < len; i++) seq_putc(seq, u->addr->name->sun_path[i]); } - unix_state_runlock(s); + unix_state_unlock(s); seq_putc(seq, '\n'); } diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 7a19e0ede28..849cc06bd91 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -454,7 +454,7 @@ static int wanrouter_device_setup(struct wan_device *wandev, } if (conf->data_size && conf->data) { - if (conf->data_size > 128000 || conf->data_size < 0) { + if (conf->data_size > 128000) { printk(KERN_INFO "%s: ERROR, Invalid firmware data size %i !\n", wandev->name, conf->data_size); diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 3ebae144296..88aaacd9f82 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -33,7 +33,7 @@ static ssize_t _show_permaddr(struct device *dev, struct device_attribute *attr, char *buf) { - char *addr = dev_to_rdev(dev)->wiphy.perm_addr; + unsigned char *addr = dev_to_rdev(dev)->wiphy.perm_addr; return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0d6002fc77b..479927cb45c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1605,7 +1605,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { .sendpage = sock_no_sendpage, }; -#include <linux/smp_lock.h> SOCKOPS_WRAP(x25_proto, AF_X25); static struct packet_type x25_packet_type = { diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6249a9405bb..5ced62c19c6 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -347,67 +347,44 @@ static inline int calg_entries(void) return ARRAY_SIZE(calg_list); } -/* Todo: generic iterators */ -struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) -{ - int i; - - for (i = 0; i < aalg_entries(); i++) { - if (aalg_list[i].desc.sadb_alg_id == alg_id) { - if (aalg_list[i].available) - return &aalg_list[i]; - else - break; - } - } - return NULL; -} -EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); - -struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) -{ - int i; +struct xfrm_algo_list { + struct xfrm_algo_desc *algs; + int entries; + u32 type; + u32 mask; +}; - for (i = 0; i < ealg_entries(); i++) { - if (ealg_list[i].desc.sadb_alg_id == alg_id) { - if (ealg_list[i].available) - return &ealg_list[i]; - else - break; - } - } - return NULL; -} -EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); +static const struct xfrm_algo_list xfrm_aalg_list = { + .algs = aalg_list, + .entries = ARRAY_SIZE(aalg_list), + .type = CRYPTO_ALG_TYPE_HASH, + .mask = CRYPTO_ALG_TYPE_HASH_MASK | CRYPTO_ALG_ASYNC, +}; -struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) -{ - int i; +static const struct xfrm_algo_list xfrm_ealg_list = { + .algs = ealg_list, + .entries = ARRAY_SIZE(ealg_list), + .type = CRYPTO_ALG_TYPE_BLKCIPHER, + .mask = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC, +}; - for (i = 0; i < calg_entries(); i++) { - if (calg_list[i].desc.sadb_alg_id == alg_id) { - if (calg_list[i].available) - return &calg_list[i]; - else - break; - } - } - return NULL; -} -EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); +static const struct xfrm_algo_list xfrm_calg_list = { + .algs = calg_list, + .entries = ARRAY_SIZE(calg_list), + .type = CRYPTO_ALG_TYPE_COMPRESS, + .mask = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC, +}; -static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, - int entries, u32 type, u32 mask, - char *name, int probe) +static struct xfrm_algo_desc *xfrm_find_algo( + const struct xfrm_algo_list *algo_list, + int match(const struct xfrm_algo_desc *entry, const void *data), + const void *data, int probe) { + struct xfrm_algo_desc *list = algo_list->algs; int i, status; - if (!name) - return NULL; - - for (i = 0; i < entries; i++) { - if (strcmp(name, list[i].name) && - (!list[i].compat || strcmp(name, list[i].compat))) + for (i = 0; i < algo_list->entries; i++) { + if (!match(list + i, data)) continue; if (list[i].available) @@ -416,8 +393,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_has_alg(list[i].name, type, - mask | CRYPTO_ALG_ASYNC); + status = crypto_has_alg(list[i].name, algo_list->type, + algo_list->mask); if (!status) break; @@ -427,27 +404,60 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, return NULL; } +static int xfrm_alg_id_match(const struct xfrm_algo_desc *entry, + const void *data) +{ + return entry->desc.sadb_alg_id == (unsigned long)data; +} + +struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) +{ + return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_id_match, + (void *)(unsigned long)alg_id, 1); +} +EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid); + +struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) +{ + return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_id_match, + (void *)(unsigned long)alg_id, 1); +} +EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid); + +struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) +{ + return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_id_match, + (void *)(unsigned long)alg_id, 1); +} +EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); + +static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry, + const void *data) +{ + const char *name = data; + + return name && (!strcmp(name, entry->name) || + (entry->compat && !strcmp(name, entry->compat))); +} + struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) { - return xfrm_get_byname(aalg_list, aalg_entries(), - CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK, - name, probe); + return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, + probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) { - return xfrm_get_byname(ealg_list, ealg_entries(), - CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK, - name, probe); + return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, + probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) { - return xfrm_get_byname(calg_list, calg_entries(), - CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK, - name, probe); + return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, + probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index 37643bb8768..55ab5792af5 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -22,7 +22,8 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz) n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); else n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, + get_order(sz)); if (n) memset(n, 0, sz); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 95271e8426a..157bfbd250b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -26,9 +26,12 @@ #include <net/xfrm.h> #include <net/ip.h> #include <linux/audit.h> +#include <linux/cache.h> #include "xfrm_hash.h" +int sysctl_xfrm_larval_drop __read_mostly; + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -796,6 +799,10 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, struct hlist_head *chain; struct hlist_node *entry; + *err = -ENOENT; + if (xfrm_policy_id2dir(id) != dir) + return NULL; + *err = 0; write_lock_bh(&xfrm_policy_lock); chain = xfrm_policy_byidx + idx_hash(id); @@ -827,11 +834,67 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int +xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) { - int dir; + int dir, err = 0; + + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { + struct xfrm_policy *pol; + struct hlist_node *entry; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + err = security_xfrm_policy_delete(pol); + if (err) { + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 0, + pol, NULL); + return err; + } + } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, + xfrm_policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + err = security_xfrm_policy_delete(pol); + if (err) { + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, + 0, pol, NULL); + return err; + } + } + } + } + return err; +} +#else +static inline int +xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +{ + return 0; +} +#endif + +int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +{ + int dir, err = 0; write_lock_bh(&xfrm_policy_lock); + + err = xfrm_policy_flush_secctx_check(type, audit_info); + if (err) + goto out; + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; @@ -884,7 +947,9 @@ void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); +out: write_unlock_bh(&xfrm_policy_lock); + return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -1386,8 +1451,8 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, + struct sock *sk, int flags) { struct xfrm_policy *policy; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; @@ -1505,6 +1570,13 @@ restart: if (unlikely(nx<0)) { err = nx; + if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + /* EREMOTE tells the caller to generate + * a one-shot blackhole route. + */ + xfrm_pol_put(policy); + return -EREMOTE; + } if (err == -EAGAIN && flags) { DECLARE_WAITQUEUE(wait, current); @@ -1594,6 +1666,21 @@ error: *dst_p = NULL; return err; } +EXPORT_SYMBOL(__xfrm_lookup); + +int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, + struct sock *sk, int flags) +{ + int err = __xfrm_lookup(dst_p, fl, sk, flags); + + if (err == -EREMOTE) { + dst_release(*dst_p); + *dst_p = NULL; + err = -EAGAIN; + } + + return err; +} EXPORT_SYMBOL(xfrm_lookup); static inline int @@ -2554,4 +2641,3 @@ restore_state: } EXPORT_SYMBOL(xfrm_migrate); #endif - diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9955ff4da0a..dfacb9c2a6e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -21,18 +21,21 @@ #include <linux/cache.h> #include <asm/uaccess.h> #include <linux/audit.h> +#include <linux/cache.h> #include "xfrm_hash.h" struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); -u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME; +u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); -u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE; +u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); +u32 sysctl_xfrm_acq_expires __read_mostly = 30; + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -388,12 +391,48 @@ int xfrm_state_delete(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete); -void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int +xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) { - int i; - int err = 0; + int i, err = 0; + + for (i = 0; i <= xfrm_state_hmask; i++) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (xfrm_id_proto_match(x->id.proto, proto) && + (err = security_xfrm_state_delete(x)) != 0) { + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSA, + 0, NULL, x); + + return err; + } + } + } + + return err; +} +#else +static inline int +xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +{ + return 0; +} +#endif + +int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +{ + int i, err = 0; spin_lock_bh(&xfrm_state_lock); + err = xfrm_state_flush_secctx_check(proto, audit_info); + if (err) + goto out; + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -416,8 +455,12 @@ restart: } } } + err = 0; + +out: spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); + return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -622,8 +665,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); } - x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); xfrm_state_num++; xfrm_hash_grow_check(x->bydst.next != NULL); @@ -772,9 +815,9 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); @@ -1686,7 +1729,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) x->type && x->type->get_mtu) res = x->type->get_mtu(x, mtu); else - res = mtu; + res = mtu - x->props.header_len; spin_unlock_bh(&x->lock); return res; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b14c7e590c3..c06883bf620 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1418,10 +1418,13 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); struct xfrm_audit audit_info; + int err; audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.secid = NETLINK_CB(skb).sid; - xfrm_state_flush(p->proto, &audit_info); + err = xfrm_state_flush(p->proto, &audit_info); + if (err) + return err; c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1582,7 +1585,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.secid = NETLINK_CB(skb).sid; - xfrm_policy_flush(type, &audit_info); + err = xfrm_policy_flush(type, &audit_info); + if (err) + return err; c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; |