summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c4
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/Kconfig25
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/clip.c8
-rw-r--r--net/atm/mpc.c6
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c122
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c94
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h12
-rw-r--r--net/batman-adv/distributed-arp-table.c95
-rw-r--r--net/batman-adv/hard-interface.c98
-rw-r--r--net/batman-adv/icmp_socket.c4
-rw-r--r--net/batman-adv/main.c19
-rw-r--r--net/batman-adv/main.h18
-rw-r--r--net/batman-adv/network-coding.c30
-rw-r--r--net/batman-adv/network-coding.h6
-rw-r--r--net/batman-adv/originator.c22
-rw-r--r--net/batman-adv/originator.h3
-rw-r--r--net/batman-adv/ring_buffer.c51
-rw-r--r--net/batman-adv/ring_buffer.h27
-rw-r--r--net/batman-adv/routing.c64
-rw-r--r--net/batman-adv/routing.h1
-rw-r--r--net/batman-adv/send.c36
-rw-r--r--net/batman-adv/send.h6
-rw-r--r--net/batman-adv/soft-interface.c7
-rw-r--r--net/batman-adv/translation-table.c81
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/batman-adv/unicast.c2
-rw-r--r--net/batman-adv/vis.c19
-rw-r--r--net/bridge/br_device.c21
-rw-r--r--net/bridge/br_forward.c14
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_input.c15
-rw-r--r--net/bridge/br_multicast.c70
-rw-r--r--net/bridge/br_netlink.c10
-rw-r--r--net/bridge/br_notify.c2
-rw-r--r--net/bridge/br_private.h9
-rw-r--r--net/bridge/br_sysfs_br.c26
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/netfilter/ebt_log.c11
-rw-r--r--net/bridge/netfilter/ebt_ulog.c24
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/caif/caif_dev.c4
-rw-r--r--net/caif/caif_usb.c4
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/gw.c4
-rw-r--r--net/can/raw.c4
-rw-r--r--net/ceph/osd_client.c5
-rw-r--r--net/compat.c13
-rw-r--r--net/core/datagram.c4
-rw-r--r--net/core/dev.c195
-rw-r--r--net/core/dev_addr_lists.c17
-rw-r--r--net/core/drop_monitor.c4
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/gen_estimator.c12
-rw-r--r--net/core/gen_stats.c22
-rw-r--r--net/core/iovec.c50
-rw-r--r--net/core/net-procfs.c16
-rw-r--r--net/core/netpoll.c16
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c29
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c42
-rw-r--r--net/core/sock.c24
-rw-r--r--net/core/sysctl_net_core.c114
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/ieee802154/6lowpan.c5
-rw-r--r--net/ipv4/Kconfig11
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c17
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/icmp.c51
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_tunnel.c6
-rw-r--r--net/ipv4/ipcomp.c7
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/Kconfig2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c25
-rw-r--r--net/ipv4/ping.c641
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c84
-rw-r--r--net/ipv4/tcp.c331
-rw-r--r--net/ipv4/tcp_input.c496
-rw-r--r--net/ipv4/tcp_ipv4.c78
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_offload.c332
-rw-r--r--net/ipv4/tcp_output.c10
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c84
-rw-r--r--net/ipv6/af_inet6.c12
-rw-r--r--net/ipv6/datagram.c27
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/icmp.c21
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/mip6.c6
-rw-r--r--net/ipv6/ndisc.c11
-rw-r--r--net/ipv6/netfilter.c7
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c6
-rw-r--r--net/ipv6/output_core.c3
-rw-r--r--net/ipv6/ping.c272
-rw-r--r--net/ipv6/proc.c2
-rw-r--r--net/ipv6/raw.c48
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/sit.c119
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/udp.c68
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udp_offload.c23
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/mac80211/iface.c33
-rw-r--r--net/mpls/Kconfig9
-rw-r--r--net/mpls/Makefile4
-rw-r--r--net/mpls/mpls_gso.c108
-rw-r--r--net/netfilter/core.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/nf_conntrack_ftp.c73
-rw-r--r--net/netfilter/nf_log.c11
-rw-r--r--net/netfilter/nf_nat_helper.c2
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue_core.c33
-rw-r--r--net/netfilter/xt_CT.c10
-rw-r--r--net/netfilter/xt_LOG.c15
-rw-r--r--net/netfilter/xt_NFLOG.c3
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c17
-rw-r--r--net/netfilter/xt_TEE.c2
-rw-r--r--net/netfilter/xt_addrtype.c27
-rw-r--r--net/netfilter/xt_rateest.c2
-rw-r--r--net/netfilter/xt_socket.c26
-rw-r--r--net/netlabel/netlabel_domainhash.c69
-rw-r--r--net/netlabel/netlabel_unlabeled.c7
-rw-r--r--net/netlink/af_netlink.c72
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/Makefile1
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/packet/af_packet.c5
-rw-r--r--net/phonet/pn_dev.c4
-rw-r--r--net/rose/af_rose.c6
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_police.c8
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_generic.c8
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c44
-rw-r--r--net/sched/sch_qfq.c2
-rw-r--r--net/sched/sch_tbf.c55
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/socket.c71
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c62
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c8
-rw-r--r--net/sunrpc/netns.h4
-rw-r--r--net/sunrpc/rpc_pipe.c5
-rw-r--r--net/sunrpc/sched.c8
-rw-r--r--net/sunrpc/svcauth_unix.c12
-rw-r--r--net/tipc/eth_media.c4
-rw-r--r--net/tipc/ib_media.c4
-rw-r--r--net/wireless/core.c5
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/xfrm/xfrm_output.c10
-rw-r--r--net/xfrm/xfrm_policy.c5
-rw-r--r--net/xfrm/xfrm_user.c2
193 files changed, 3592 insertions, 2008 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index e085bcc754f..1eb05d80b07 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -871,10 +871,10 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
*/
del_timer_sync(&app->join_timer);
- spin_lock(&app->lock);
+ spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
mrp_pdu_queue(app);
- spin_unlock(&app->lock);
+ spin_unlock_bh(&app->lock);
mrp_queue_xmit(app);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9424f3718ea..2fb2d88e8c2 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -341,7 +341,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event)
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct vlan_group *grp;
struct vlan_info *vlan_info;
int i, flgs;
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc9046868..d6a9ce6e180 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -218,6 +218,7 @@ source "net/batman-adv/Kconfig"
source "net/openvswitch/Kconfig"
source "net/vmw_vsock/Kconfig"
source "net/netlink/Kconfig"
+source "net/mpls/Kconfig"
config RPS
boolean
@@ -242,6 +243,18 @@ config NETPRIO_CGROUP
Cgroup subsystem for use in assigning processes to network priorities on
a per-interface basis
+config NET_LL_RX_POLL
+ bool "Low Latency Receive Poll"
+ depends on X86_TSC
+ default n
+ ---help---
+ Support Low Latency Receive Queue Poll.
+ (For network card drivers which support this option.)
+ When waiting for data in read or poll call directly into the the device driver
+ to flush packets which may be pending on the device queues into the stack.
+
+ If unsure, say N.
+
config BQL
boolean
depends on SYSFS
@@ -259,6 +272,18 @@ config BPF_JIT
packet sniffing (libpcap/tcpdump). Note : Admin should enable
this feature changing /proc/sys/net/core/bpf_jit_enable
+config NET_FLOW_LIMIT
+ boolean
+ depends on RPS
+ default y
+ ---help---
+ The network stack has to drop packets when a receive processing CPU's
+ backlog reaches netdev_max_backlog. If a few out of many active flows
+ generate the vast majority of load, drop their traffic earlier to
+ maintain capacity for the other flows. This feature provides servers
+ with many clients some protection against DoS by a single (spoofed)
+ flow that greatly exceeds average workload.
+
menu "Network testing"
config NET_PKTGEN
diff --git a/net/Makefile b/net/Makefile
index 091e7b04f30..9492e8cb64e 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/
obj-$(CONFIG_NFC) += nfc/
obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
+obj-$(CONFIG_NET_MPLS_GSO) += mpls/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 173a2e82f48..690356fa52b 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused)
static int aarp_device_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
int ct;
if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index ef12839a7cf..7fee50d637f 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev)
static int ddp_device_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8ae3a787933..8215f7cb170 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -539,9 +539,9 @@ static int clip_create(int number)
}
static int clip_device_event(struct notifier_block *this, unsigned long event,
- void *arg)
+ void *ptr)
{
- struct net_device *dev = arg;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
@@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
void *ifa)
{
struct in_device *in_dev;
+ struct netdev_notifier_info info;
in_dev = ((struct in_ifaddr *)ifa)->ifa_dev;
/*
@@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event,
*/
if (event != NETDEV_UP)
return NOTIFY_DONE;
- return clip_device_event(this, NETDEV_CHANGE, in_dev->dev);
+ netdev_notifier_info_init(&info, in_dev->dev);
+ return clip_device_event(this, NETDEV_CHANGE, &info);
}
static struct notifier_block clip_dev_notifier = {
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index d4cc1be5c36..3af12755cd0 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
}
static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
- unsigned long event, void *dev_ptr)
+ unsigned long event, void *ptr)
{
- struct net_device *dev;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mpoa_client *mpc;
struct lec_priv *priv;
- dev = dev_ptr;
-
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index e277e38f736..4b4d2b779ec 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -111,9 +111,9 @@ again:
* Handle device status changes.
*/
static int ax25_device_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *)ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
@@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = {
};
static struct notifier_block ax25_dev_notifier = {
- .notifier_call =ax25_device_event,
+ .notifier_call = ax25_device_event,
};
static int __init ax25_init(void)
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index acbac2a9c62..489bb36f1b9 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
-batman-adv-y += ring_buffer.o
batman-adv-y += routing.o
batman-adv-y += send.o
batman-adv-y += soft-interface.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 071f288b77a..d07323b3e9b 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -19,7 +19,6 @@
#include "main.h"
#include "translation-table.h"
-#include "ring_buffer.h"
#include "originator.h"
#include "routing.h"
#include "gateway_common.h"
@@ -29,16 +28,57 @@
#include "bat_algo.h"
#include "network-coding.h"
+/**
+ * batadv_ring_buffer_set - update the ring buffer with the given value
+ * @lq_recv: pointer to the ring buffer
+ * @lq_index: index to store the value at
+ * @value: value to store in the ring buffer
+ */
+static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
+ uint8_t value)
+{
+ lq_recv[*lq_index] = value;
+ *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE;
+}
+
+/**
+ * batadv_ring_buffer_set - compute the average of all non-zero values stored
+ * in the given ring buffer
+ * @lq_recv: pointer to the ring buffer
+ *
+ * Returns computed average value.
+ */
+static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
+{
+ const uint8_t *ptr;
+ uint16_t count = 0, i = 0, sum = 0;
+
+ ptr = lq_recv;
+
+ while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) {
+ if (*ptr != 0) {
+ count++;
+ sum += *ptr;
+ }
+
+ i++;
+ ptr++;
+ }
+
+ if (count == 0)
+ return 0;
+
+ return (uint8_t)(sum / count);
+}
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh, __be32 seqno)
+ struct batadv_orig_node *orig_neigh)
{
struct batadv_neigh_node *neigh_node;
- neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr,
- ntohl(seqno));
+ neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr);
if (!neigh_node)
goto out;
@@ -413,18 +453,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
else
skb_size = packet_len;
- skb_size += ETH_HLEN + NET_IP_ALIGN;
+ skb_size += ETH_HLEN;
- forw_packet_aggr->skb = dev_alloc_skb(skb_size);
+ forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
if (!forw_packet_aggr->skb) {
if (!own_packet)
atomic_inc(&bat_priv->batman_queue_left);
kfree(forw_packet_aggr);
goto out;
}
- skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN);
-
- INIT_HLIST_NODE(&forw_packet_aggr->list);
+ skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
forw_packet_aggr->packet_len = packet_len;
@@ -590,6 +628,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
if_incoming, 0, batadv_iv_ogm_fwd_send_time());
}
+/**
+ * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for
+ * the given interface
+ * @hard_iface: the interface for which the windows have to be shifted
+ */
+static void
+batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
+{
+ struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct hlist_head *head;
+ struct batadv_orig_node *orig_node;
+ unsigned long *word;
+ uint32_t i;
+ size_t word_index;
+ uint8_t *w;
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ spin_lock_bh(&orig_node->ogm_cnt_lock);
+ word_index = hard_iface->if_num * BATADV_NUM_WORDS;
+ word = &(orig_node->bcast_own[word_index]);
+
+ batadv_bit_get_packet(bat_priv, word, 1, 0);
+ w = &orig_node->bcast_own_sum[hard_iface->if_num];
+ *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
+ spin_unlock_bh(&orig_node->ogm_cnt_lock);
+ }
+ rcu_read_unlock();
+ }
+}
+
static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -634,7 +707,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS;
}
- batadv_slide_own_bcast_window(hard_iface);
+ batadv_iv_ogm_slide_own_bcast_window(hard_iface);
batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff,
hard_iface->bat_iv.ogm_buff_len, hard_iface, 1,
batadv_iv_ogm_emit_send_time(bat_priv));
@@ -670,7 +743,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
tmp_neigh_node->if_incoming == if_incoming &&
atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
- if (neigh_node)
+ if (WARN(neigh_node, "too many matching neigh_nodes"))
batadv_neigh_node_free_ref(neigh_node);
neigh_node = tmp_neigh_node;
continue;
@@ -696,8 +769,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
ethhdr->h_source,
- orig_node, orig_tmp,
- batadv_ogm_packet->seqno);
+ orig_node, orig_tmp);
batadv_orig_node_free_ref(orig_tmp);
if (!neigh_node)
@@ -829,8 +901,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
orig_neigh_node->orig,
orig_neigh_node,
- orig_neigh_node,
- batadv_ogm_packet->seqno);
+ orig_neigh_node);
if (!neigh_node)
goto out;
@@ -991,7 +1062,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
struct batadv_neigh_node *orig_neigh_router = NULL;
int has_directlink_flag;
int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
- int is_broadcast = 0, is_bidirect;
+ int is_bidirect;
bool is_single_hop_neigh = false;
bool is_from_best_next_hop = false;
int is_duplicate, sameseq, simlar_ttl;
@@ -1054,19 +1125,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
if (batadv_compare_eth(batadv_ogm_packet->prev_sender,
hard_iface->net_dev->dev_addr))
is_my_oldorig = 1;
-
- if (is_broadcast_ether_addr(ethhdr->h_source))
- is_broadcast = 1;
}
rcu_read_unlock();
- if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) {
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Drop packet: incompatible batman version (%i)\n",
- batadv_ogm_packet->header.version);
- return;
- }
-
if (is_my_addr) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: received my own broadcast (sender: %pM)\n",
@@ -1074,13 +1135,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
return;
}
- if (is_broadcast) {
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n",
- ethhdr->h_source);
- return;
- }
-
if (is_my_orig) {
unsigned long *word;
int offset;
@@ -1288,7 +1342,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
skb->len + ETH_HLEN);
packet_len = skb_headlen(skb);
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
packet_buff = skb->data;
batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 379061c7254..e9d8e0b3c3d 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -180,7 +180,7 @@ static struct batadv_bla_claim
*/
static struct batadv_bla_backbone_gw *
batadv_backbone_hash_find(struct batadv_priv *bat_priv,
- uint8_t *addr, short vid)
+ uint8_t *addr, unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
struct hlist_head *head;
@@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
* @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...)
*/
static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
- short vid, int claimtype)
+ unsigned short vid, int claimtype)
{
struct sk_buff *skb;
struct ethhdr *ethhdr;
@@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
*/
memcpy(ethhdr->h_source, mac, ETH_ALEN);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid);
+ "bla_send_claim(): CLAIM %pM on vid %d\n", mac,
+ BATADV_PRINT_VID(vid));
break;
case BATADV_CLAIM_TYPE_UNCLAIM:
/* unclaim frame
@@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
memcpy(hw_src, mac, ETH_ALEN);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
- vid);
+ BATADV_PRINT_VID(vid));
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
/* announcement frame
@@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
memcpy(hw_src, mac, ETH_ALEN);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
- ethhdr->h_source, vid);
+ ethhdr->h_source, BATADV_PRINT_VID(vid));
break;
case BATADV_CLAIM_TYPE_REQUEST:
/* request frame
@@ -335,13 +336,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
memcpy(hw_src, mac, ETH_ALEN);
memcpy(ethhdr->h_dest, mac, ETH_ALEN);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n",
- ethhdr->h_source, ethhdr->h_dest, vid);
+ "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
+ ethhdr->h_source, ethhdr->h_dest,
+ BATADV_PRINT_VID(vid));
break;
}
- if (vid != -1)
- skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid);
+ if (vid & BATADV_VLAN_HAS_TAG)
+ skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
+ vid & VLAN_VID_MASK);
skb_reset_mac_header(skb);
skb->protocol = eth_type_trans(skb, soft_iface);
@@ -367,7 +370,7 @@ out:
*/
static struct batadv_bla_backbone_gw *
batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
- short vid, bool own_backbone)
+ unsigned short vid, bool own_backbone)
{
struct batadv_bla_backbone_gw *entry;
struct batadv_orig_node *orig_node;
@@ -380,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n",
- orig, vid);
+ orig, BATADV_PRINT_VID(vid));
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
@@ -434,7 +437,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
static void
batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- short vid)
+ unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -456,7 +459,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
*/
static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
- short vid)
+ unsigned short vid)
{
struct hlist_head *head;
struct batadv_hashtable *hash;
@@ -547,7 +550,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
* @backbone_gw: the backbone gateway which claims it
*/
static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
- const uint8_t *mac, const short vid,
+ const uint8_t *mac, const unsigned short vid,
struct batadv_bla_backbone_gw *backbone_gw)
{
struct batadv_bla_claim *claim;
@@ -572,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
atomic_set(&claim->refcount, 2);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
- mac, vid);
+ mac, BATADV_PRINT_VID(vid));
hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
batadv_compare_claim,
batadv_choose_claim, claim,
@@ -591,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): changing ownership for %pM, vid %d\n",
- mac, vid);
+ mac, BATADV_PRINT_VID(vid));
claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
batadv_backbone_gw_free_ref(claim->backbone_gw);
@@ -611,7 +614,7 @@ claim_free_ref:
* given mac address and vid.
*/
static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
- const uint8_t *mac, const short vid)
+ const uint8_t *mac, const unsigned short vid)
{
struct batadv_bla_claim search_claim, *claim;
@@ -622,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
return;
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
- mac, vid);
+ mac, BATADV_PRINT_VID(vid));
batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
batadv_choose_claim, claim);
@@ -637,7 +640,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
/* check for ANNOUNCE frame, return 1 if handled */
static int batadv_handle_announce(struct batadv_priv *bat_priv,
uint8_t *an_addr, uint8_t *backbone_addr,
- short vid)
+ unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
uint16_t crc;
@@ -658,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
- vid, backbone_gw->orig, crc);
+ BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
if (backbone_gw->crc != crc) {
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
"handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
- backbone_gw->orig, backbone_gw->vid,
+ backbone_gw->orig,
+ BATADV_PRINT_VID(backbone_gw->vid),
backbone_gw->crc, crc);
batadv_bla_send_request(backbone_gw);
@@ -685,7 +689,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
static int batadv_handle_request(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
uint8_t *backbone_addr,
- struct ethhdr *ethhdr, short vid)
+ struct ethhdr *ethhdr, unsigned short vid)
{
/* check for REQUEST frame */
if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest))
@@ -699,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_request(): REQUEST vid %d (sent by %pM)...\n",
- vid, ethhdr->h_source);
+ BATADV_PRINT_VID(vid), ethhdr->h_source);
batadv_bla_answer_request(bat_priv, primary_if, vid);
return 1;
@@ -709,7 +713,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
uint8_t *backbone_addr,
- uint8_t *claim_addr, short vid)
+ uint8_t *claim_addr, unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -727,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
/* this must be an UNCLAIM frame */
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n",
- claim_addr, vid, backbone_gw->orig);
+ claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
batadv_bla_del_claim(bat_priv, claim_addr, vid);
batadv_backbone_gw_free_ref(backbone_gw);
@@ -738,7 +742,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
static int batadv_handle_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
uint8_t *backbone_addr, uint8_t *claim_addr,
- short vid)
+ unsigned short vid)
{
struct batadv_bla_backbone_gw *backbone_gw;
@@ -861,14 +865,15 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
struct batadv_bla_claim_dst *bla_dst;
uint16_t proto;
int headlen;
- short vid = -1;
+ unsigned short vid = BATADV_NO_FLAGS;
int ret;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
vhdr = (struct vlan_ethhdr *)ethhdr;
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
proto = ntohs(vhdr->h_vlan_encapsulated_proto);
headlen = sizeof(*vhdr);
} else {
@@ -885,7 +890,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
return 0;
/* pskb_may_pull() may have modified the pointers, get ethhdr again */
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen);
/* Check whether the ARP frame carries a valid
@@ -910,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
if (ret == 1)
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, vid, hw_src, hw_dst);
+ ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src,
+ hw_dst);
if (ret < 2)
return ret;
@@ -945,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, vid, hw_src, hw_dst);
+ ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
return 1;
}
@@ -1358,7 +1364,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
struct ethhdr *ethhdr;
struct vlan_ethhdr *vhdr;
struct batadv_bla_backbone_gw *backbone_gw;
- short vid = -1;
+ unsigned short vid = BATADV_NO_FLAGS;
if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance))
return 0;
@@ -1375,6 +1381,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size);
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
}
/* see if this originator is a backbone gw for this VLAN */
@@ -1424,15 +1431,15 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
* returns 1, otherwise it returns 0 and the caller shall further
* process the skb.
*/
-int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid,
- bool is_bcast)
+int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, bool is_bcast)
{
struct ethhdr *ethhdr;
struct batadv_bla_claim search_claim, *claim = NULL;
struct batadv_hard_iface *primary_if;
int ret;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
@@ -1519,7 +1526,8 @@ out:
* returns 1, otherwise it returns 0 and the caller shall further
* process the skb.
*/
-int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid)
+int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid)
{
struct ethhdr *ethhdr;
struct batadv_bla_claim search_claim, *claim = NULL;
@@ -1539,7 +1547,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid)
if (batadv_bla_process_claim(bat_priv, primary_if, skb))
goto handled;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
/* don't allow broadcasts while requests are in flight */
@@ -1623,8 +1631,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
hlist_for_each_entry_rcu(claim, head, hash_entry) {
is_own = batadv_compare_eth(claim->backbone_gw->orig,
primary_addr);
- seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n",
- claim->addr, claim->vid,
+ seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
+ claim->addr, BATADV_PRINT_VID(claim->vid),
claim->backbone_gw->orig,
(is_own ? 'x' : ' '),
claim->backbone_gw->crc);
@@ -1676,10 +1684,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
if (is_own)
continue;
- seq_printf(seq,
- " * %pM on % 5d % 4i.%03is (%#.4x)\n",
- backbone_gw->orig, backbone_gw->vid,
- secs, msecs, backbone_gw->crc);
+ seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
+ backbone_gw->orig,
+ BATADV_PRINT_VID(backbone_gw->vid), secs,
+ msecs, backbone_gw->crc);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index dea2fbc5d98..4b102e71e5b 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -21,9 +21,10 @@
#define _NET_BATMAN_ADV_BLA_H_
#ifdef CONFIG_BATMAN_ADV_BLA
-int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid,
- bool is_bcast);
-int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid);
+int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, bool is_bcast);
+int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid);
int batadv_bla_is_backbone_gw(struct sk_buff *skb,
struct batadv_orig_node *orig_node, int hdr_size);
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
@@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv);
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
static inline int batadv_bla_rx(struct batadv_priv *bat_priv,
- struct sk_buff *skb, short vid, bool is_bcast)
+ struct sk_buff *skb, unsigned short vid,
+ bool is_bcast)
{
return 0;
}
static inline int batadv_bla_tx(struct batadv_priv *bat_priv,
- struct sk_buff *skb, short vid)
+ struct sk_buff *skb, unsigned short vid)
{
return 0;
}
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 8e15d966d9b..06345d40158 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly
+ * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly
* free it
- * @dat_entry: the oentry to free
+ * @dat_entry: the entry to free
*/
static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
{
@@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
}
/**
- * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not
+ * batadv_dat_to_purge - check whether a dat_entry has to be purged or not
* @dat_entry: the entry to check
*
- * Returns true if the entry has to be purged now, false otherwise
+ * Returns true if the entry has to be purged now, false otherwise.
*/
static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
{
@@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
* returns a boolean value: true is the entry has to be deleted,
* false otherwise
*
- * Loops over each entry in the DAT local storage and delete it if and only if
- * the to_purge function passed as argument returns true
+ * Loops over each entry in the DAT local storage and deletes it if and only if
+ * the to_purge function passed as argument returns true.
*/
static void __batadv_dat_purge(struct batadv_priv *bat_priv,
bool (*to_purge)(struct batadv_dat_entry *))
@@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
spin_lock_bh(list_lock);
hlist_for_each_entry_safe(dat_entry, node_tmp, head,
hash_entry) {
- /* if an helper function has been passed as parameter,
+ /* if a helper function has been passed as parameter,
* ask it if the entry has to be purged or not
*/
if (to_purge && !to_purge(dat_entry))
@@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work)
* @node: node in the local table
* @data2: second object to compare the node to
*
- * Returns 1 if the two entry are the same, 0 otherwise
+ * Returns 1 if the two entries are the same, 0 otherwise.
*/
static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
{
@@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the hw_src field in the ARP packet
+ * Returns the value of the hw_src field in the ARP packet.
*/
static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
{
@@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the ip_src field in the ARP packet
+ * Returns the value of the ip_src field in the ARP packet.
*/
static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
{
@@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the hw_dst field in the ARP packet
+ * Returns the value of the hw_dst field in the ARP packet.
*/
static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
{
@@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
* @skb: ARP packet
* @hdr_size: size of the possible header before the ARP packet
*
- * Returns the value of the ip_dst field in the ARP packet
+ * Returns the value of the ip_dst field in the ARP packet.
*/
static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
{
@@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
* @data: data to hash
* @size: size of the hash table
*
- * Returns the selected index in the hash table for the given data
+ * Returns the selected index in the hash table for the given data.
*/
static uint32_t batadv_hash_dat(const void *data, uint32_t size)
{
@@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size)
}
/**
- * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash
+ * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash
* table
* @bat_priv: the bat priv with all the soft interface information
* @ip: search key
*
- * Returns the dat_entry if found, NULL otherwise
+ * Returns the dat_entry if found, NULL otherwise.
*/
static struct batadv_dat_entry *
batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
@@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (hdr_size == 0)
return;
- /* if the ARP packet is encapsulated in a batman packet, let's print
- * some debug messages
- */
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
switch (unicast_4addr_packet->u.header.packet_type) {
@@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
* @candidate: orig_node under evaluation
* @max_orig_node: last selected candidate
*
- * Returns true if the node has been elected as next candidate or false othrwise
+ * Returns true if the node has been elected as next candidate or false
+ * otherwise.
*/
static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
int select, batadv_dat_addr_t tmp_max,
@@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
*/
cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND;
- /* iterate over the originator list and find the node with closest
+ /* iterate over the originator list and find the node with the closest
* dat_address which has not been selected yet
*/
for (i = 0; i < hash->size; i++) {
@@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- /* the dht space is a ring and addresses are unsigned */
+ /* the dht space is a ring using unsigned addresses */
tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr +
ip_key;
@@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
}
/**
- * batadv_dat_select_candidates - selects the nodes which the DHT message has to
+ * batadv_dat_select_candidates - select the nodes which the DHT message has to
* be sent to
* @bat_priv: the bat priv with all the soft interface information
* @ip_dst: ipv4 to look up in the DHT
@@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* closest values (from the LEFT, with wrap around if needed) then the hash
* value of the key. ip_dst is the key.
*
- * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM
+ * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM.
*/
static struct batadv_dat_candidate *
batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
@@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
* @ip: the DHT key
* @packet_subtype: unicast4addr packet subtype to use
*
- * In this function the skb is copied by means of pskb_copy() and is sent as
- * unicast packet to each of the selected candidates
+ * This function copies the skb with pskb_copy() and is sent as unicast packet
+ * to each of the selected candidates.
*
- * Returns true if the packet is sent to at least one candidate, false otherwise
+ * Returns true if the packet is sent to at least one candidate, false
+ * otherwise.
*/
static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *skb, __be32 ip,
@@ -727,7 +726,7 @@ out:
* @skb: packet to analyse
* @hdr_size: size of the possible header before the ARP packet in the skb
*
- * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise
+ * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise.
*/
static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN);
- /* Check whether the ARP packet carries a valid
- * IP information
- */
+ /* check whether the ARP packet carries a valid IP information */
if (arphdr->ar_hrd != htons(ARPHRD_ETHER))
goto out;
@@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src))
goto out;
- /* we don't care about the destination MAC address in ARP requests */
+ /* don't care about the destination MAC address in ARP requests */
if (arphdr->ar_op != htons(ARPOP_REQUEST)) {
hw_dst = batadv_arp_hw_dst(skb, hdr_size);
if (is_zero_ether_addr(hw_dst) ||
@@ -804,8 +801,8 @@ out:
* @skb: packet to check
*
* Returns true if the message has been sent to the dht candidates, false
- * otherwise. In case of true the message has to be enqueued to permit the
- * fallback
+ * otherwise. In case of a positive return value the message has to be enqueued
+ * to permit the fallback.
*/
bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb)
@@ -837,6 +834,19 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
if (dat_entry) {
+ /* If the ARP request is destined for a local client the local
+ * client will answer itself. DAT would only generate a
+ * duplicate packet.
+ *
+ * Moreover, if the soft-interface is enslaved into a bridge, an
+ * additional DAT answer may trigger kernel warnings about
+ * a packet coming from the wrong port.
+ */
+ if (batadv_is_my_client(bat_priv, dat_entry->mac_addr)) {
+ ret = true;
+ goto out;
+ }
+
skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
bat_priv->soft_iface, ip_dst, hw_src,
dat_entry->mac_addr, hw_src);
@@ -854,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
ret = true;
} else {
- /* Send the request on the DHT */
+ /* Send the request to the DHT */
ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
BATADV_P_DAT_DHT_GET);
}
@@ -871,7 +881,7 @@ out:
* @skb: packet to check
* @hdr_size: size of the encapsulation header
*
- * Returns true if the request has been answered, false otherwise
+ * Returns true if the request has been answered, false otherwise.
*/
bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -911,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
if (!skb_new)
goto out;
- /* to preserve backwards compatibility, here the node has to answer
- * using the same packet type it received for the request. This is due
- * to that if a node is not using the 4addr packet format it may not
- * support it.
+ /* To preserve backwards compatibility, the node has choose the outgoing
+ * format based on the incoming request packet type. The assumption is
+ * that a node not using the 4addr packet format doesn't support it.
*/
if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
err = batadv_unicast_4addr_send_skb(bat_priv, skb_new,
@@ -964,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
/* Send the ARP reply to the candidates for both the IP addresses that
- * the node got within the ARP reply
+ * the node obtained from the ARP reply
*/
batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
@@ -974,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
* DAT storage only
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to check
- * @hdr_size: siaze of the encapsulation header
+ * @hdr_size: size of the encapsulation header
*/
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
@@ -1018,11 +1027,11 @@ out:
/**
* batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped
- * (because the node has already got the reply via DAT) or not
+ * (because the node has already obtained the reply via DAT) or not
* @bat_priv: the bat priv with all the soft interface information
* @forw_packet: the broadcast packet
*
- * Returns true if the node can drop the packet, false otherwise
+ * Returns true if the node can drop the packet, false otherwise.
*/
bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 522243aff2f..c478e6bcf89 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
return 1;
}
+/**
+ * batadv_is_wifi_netdev - check if the given net_device struct is a wifi
+ * interface
+ * @net_device: the device to check
+ *
+ * Returns true if the net device is a 802.11 wireless device, false otherwise.
+ */
+static bool batadv_is_wifi_netdev(struct net_device *net_device)
+{
+#ifdef CONFIG_WIRELESS_EXT
+ /* pre-cfg80211 drivers have to implement WEXT, so it is possible to
+ * check for wireless_handlers != NULL
+ */
+ if (net_device->wireless_handlers)
+ return true;
+#endif
+
+ /* cfg80211 drivers have to set ieee80211_ptr */
+ if (net_device->ieee80211_ptr)
+ return true;
+
+ return false;
+}
+
+/**
+ * batadv_is_wifi_iface - check if the given interface represented by ifindex
+ * is a wifi interface
+ * @ifindex: interface index to check
+ *
+ * Returns true if the interface represented by ifindex is a 802.11 wireless
+ * device, false otherwise.
+ */
+bool batadv_is_wifi_iface(int ifindex)
+{
+ struct net_device *net_device = NULL;
+ bool ret = false;
+
+ if (ifindex == BATADV_NULL_IFINDEX)
+ goto out;
+
+ net_device = dev_get_by_index(&init_net, ifindex);
+ if (!net_device)
+ goto out;
+
+ ret = batadv_is_wifi_netdev(net_device);
+
+out:
+ if (net_device)
+ dev_put(net_device);
+ return ret;
+}
+
static struct batadv_hard_iface *
batadv_hardif_get_active(const struct net_device *soft_iface)
{
@@ -525,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
dev_hold(net_dev);
- hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC);
+ hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC);
if (!hard_iface)
goto release_dev;
@@ -541,18 +593,16 @@ batadv_hardif_add_interface(struct net_device *net_dev)
INIT_WORK(&hard_iface->cleanup_work,
batadv_hardif_remove_interface_finish);
+ hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
+ if (batadv_is_wifi_netdev(net_dev))
+ hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
+
/* extra reference for return */
atomic_set(&hard_iface->refcount, 2);
batadv_check_known_mac_addr(hard_iface->net_dev);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
- /* This can't be called via a bat_priv callback because
- * we have no bat_priv yet.
- */
- atomic_set(&hard_iface->bat_iv.ogm_seqno, 1);
- hard_iface->bat_iv.ogm_buff = NULL;
-
return hard_iface;
free_if:
@@ -595,7 +645,7 @@ void batadv_hardif_remove_interfaces(void)
static int batadv_hard_if_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- struct net_device *net_dev = ptr;
+ struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
@@ -657,38 +707,6 @@ out:
return NOTIFY_DONE;
}
-/* This function returns true if the interface represented by ifindex is a
- * 802.11 wireless device
- */
-bool batadv_is_wifi_iface(int ifindex)
-{
- struct net_device *net_device = NULL;
- bool ret = false;
-
- if (ifindex == BATADV_NULL_IFINDEX)
- goto out;
-
- net_device = dev_get_by_index(&init_net, ifindex);
- if (!net_device)
- goto out;
-
-#ifdef CONFIG_WIRELESS_EXT
- /* pre-cfg80211 drivers have to implement WEXT, so it is possible to
- * check for wireless_handlers != NULL
- */
- if (net_device->wireless_handlers)
- ret = true;
- else
-#endif
- /* cfg80211 drivers have to set ieee80211_ptr */
- if (net_device->ieee80211_ptr)
- ret = true;
-out:
- if (net_device)
- dev_put(net_device);
- return ret;
-}
-
struct notifier_block batadv_hard_if_notifier = {
.notifier_call = batadv_hard_if_event,
};
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 0ba6c899b2d..b27508b8085 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -177,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
if (len >= sizeof(struct batadv_icmp_packet_rr))
packet_len = sizeof(struct batadv_icmp_packet_rr);
- skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN);
if (!skb) {
len = -ENOMEM;
goto out;
}
- skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(skb, ETH_HLEN);
icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
if (copy_from_user(icmp_packet, buff, packet_len)) {
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3e30a0f1b90..51aafd669cb 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -163,16 +163,25 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_vis_quit(bat_priv);
batadv_gw_node_purge(bat_priv);
- batadv_originator_free(bat_priv);
batadv_nc_free(bat_priv);
+ batadv_dat_free(bat_priv);
+ batadv_bla_free(bat_priv);
+ /* Free the TT and the originator tables only after having terminated
+ * all the other depending components which may use these structures for
+ * their purposes.
+ */
batadv_tt_free(bat_priv);
- batadv_bla_free(bat_priv);
-
- batadv_dat_free(bat_priv);
+ /* Since the originator table clean up routine is accessing the TT
+ * tables as well, it has to be invoked after the TT tables have been
+ * freed and marked as empty. This ensures that no cleanup RCU callbacks
+ * accessing the TT data are scheduled for later execution.
+ */
+ batadv_originator_free(bat_priv);
free_percpu(bat_priv->bat_counters);
+ bat_priv->bat_counters = NULL;
atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
}
@@ -475,7 +484,7 @@ static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
char *algo_name = (char *)val;
size_t name_len = strlen(algo_name);
- if (algo_name[name_len - 1] == '\n')
+ if (name_len > 0 && algo_name[name_len - 1] == '\n')
algo_name[name_len - 1] = '\0';
bat_algo_ops = batadv_algo_get(algo_name);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 59a0d6af15c..5e9aebb7d56 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.2.0"
+#define BATADV_SOURCE_VERSION "2013.3.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -76,6 +76,11 @@
#define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */
+/* number of packets to send for broadcasts on different interface types */
+#define BATADV_NUM_BCASTS_DEFAULT 1
+#define BATADV_NUM_BCASTS_WIRELESS 3
+#define BATADV_NUM_BCASTS_MAX 3
+
/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
#define ARP_REQ_DELAY 250
/* numbers of originator to contact for any PUT/GET DHT operation */
@@ -157,6 +162,17 @@ enum batadv_uev_type {
#include <linux/seq_file.h>
#include "types.h"
+/**
+ * batadv_vlan_flags - flags for the four MSB of any vlan ID field
+ * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
+ */
+enum batadv_vlan_flags {
+ BATADV_VLAN_HAS_TAG = BIT(15),
+};
+
+#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \
+ (int)(vid & VLAN_VID_MASK) : -1)
+
extern char batadv_routing_algo[];
extern struct list_head batadv_hardif_list;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index f7c54305a91..a487d46e0ae 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
return;
/* Set the mac header as if we actually sent the packet uncoded */
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
@@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
* buffer
* @skb: data skb to forward
* @neigh_node: next hop to forward packet to
- * @ethhdr: pointer to the ethernet header inside the skb
*
* Returns true if the skb was consumed (encoded packet sent) or false otherwise
*/
bool batadv_nc_skb_forward(struct sk_buff *skb,
- struct batadv_neigh_node *neigh_node,
- struct ethhdr *ethhdr)
+ struct batadv_neigh_node *neigh_node)
{
const struct net_device *netdev = neigh_node->if_incoming->soft_iface;
struct batadv_priv *bat_priv = netdev_priv(netdev);
struct batadv_unicast_packet *packet;
struct batadv_nc_path *nc_path;
+ struct ethhdr *ethhdr = eth_hdr(skb);
__be32 packet_id;
u8 *payload;
@@ -1423,7 +1422,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
{
struct batadv_unicast_packet *packet;
struct batadv_nc_path *nc_path;
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *ethhdr = eth_hdr(skb);
__be32 packet_id;
u8 *payload;
@@ -1482,7 +1481,7 @@ out:
void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
struct sk_buff *skb)
{
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_dest))
return;
@@ -1514,6 +1513,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct ethhdr *ethhdr, ethhdr_tmp;
uint8_t *orig_dest, ttl, ttvn;
unsigned int coding_len;
+ int err;
/* Save headers temporarily */
memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp));
@@ -1532,7 +1532,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
skb_reset_network_header(skb);
/* Reconstruct original mac header */
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
/* Select the correct unicast header information based on the location
@@ -1568,8 +1568,11 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
coding_len);
/* Resize decoded skb if decoded with larger packet */
- if (nc_packet->skb->len > coding_len + h_size)
- pskb_trim_rcsum(skb, coding_len + h_size);
+ if (nc_packet->skb->len > coding_len + h_size) {
+ err = pskb_trim_rcsum(skb, coding_len + h_size);
+ if (err)
+ return NULL;
+ }
/* Create decoded unicast packet */
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -1673,7 +1676,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
return NET_RX_DROP;
coded_packet = (struct batadv_coded_packet *)skb->data;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
/* Verify frame is destined for us */
if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
@@ -1759,6 +1762,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
/* For each orig_node in this bin */
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ /* no need to print the orig node if it does not have
+ * network coding neighbors
+ */
+ if (list_empty(&orig_node->in_coding_list) &&
+ list_empty(&orig_node->out_coding_list))
+ continue;
+
seq_printf(seq, "Node: %pM\n", orig_node->orig);
seq_puts(seq, " Ingoing: ");
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 4fa6d0caddb..85a4ec81ad5 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv);
void batadv_nc_init_orig(struct batadv_orig_node *orig_node);
bool batadv_nc_skb_forward(struct sk_buff *skb,
- struct batadv_neigh_node *neigh_node,
- struct ethhdr *ethhdr);
+ struct batadv_neigh_node *neigh_node);
void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
struct sk_buff *skb);
void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
@@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
}
static inline bool batadv_nc_skb_forward(struct sk_buff *skb,
- struct batadv_neigh_node *neigh_node,
- struct ethhdr *ethhdr)
+ struct batadv_neigh_node *neigh_node)
{
return false;
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 2f345254663..f50553a7de6 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node)
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr, uint32_t seqno)
+ const uint8_t *neigh_addr)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_neigh_node *neigh_node;
@@ -110,8 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
atomic_set(&neigh_node->refcount, 2);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Creating new neighbor %pM, initial seqno %d\n",
- neigh_addr, seqno);
+ "Creating new neighbor %pM on interface %s\n", neigh_addr,
+ hard_iface->net_dev->name);
out:
return neigh_node;
@@ -156,12 +156,28 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
kfree(orig_node);
}
+/**
+ * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly
+ * schedule an rcu callback for freeing it
+ * @orig_node: the orig node to free
+ */
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
{
if (atomic_dec_and_test(&orig_node->refcount))
call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
}
+/**
+ * batadv_orig_node_free_ref_now - decrement the orig node refcounter and
+ * possibly free it (without rcu callback)
+ * @orig_node: the orig node to free
+ */
+void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node)
+{
+ if (atomic_dec_and_test(&orig_node->refcount))
+ batadv_orig_node_free_rcu(&orig_node->rcu);
+}
+
void batadv_originator_free(struct batadv_priv *bat_priv)
{
struct batadv_hashtable *hash = bat_priv->orig_hash;
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 7df48fa7669..7887b84a9af 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -26,11 +26,12 @@ int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node);
+void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
- const uint8_t *neigh_addr, uint32_t seqno);
+ const uint8_t *neigh_addr);
void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node);
struct batadv_neigh_node *
batadv_orig_node_get_router(struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
deleted file mode 100644
index ccab0bbdbb5..00000000000
--- a/net/batman-adv/ring_buffer.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include "main.h"
-#include "ring_buffer.h"
-
-void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
- uint8_t value)
-{
- lq_recv[*lq_index] = value;
- *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE;
-}
-
-uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
-{
- const uint8_t *ptr;
- uint16_t count = 0, i = 0, sum = 0;
-
- ptr = lq_recv;
-
- while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) {
- if (*ptr != 0) {
- count++;
- sum += *ptr;
- }
-
- i++;
- ptr++;
- }
-
- if (count == 0)
- return 0;
-
- return (uint8_t)(sum / count);
-}
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
deleted file mode 100644
index 3f92ae248e8..00000000000
--- a/net/batman-adv/ring_buffer.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_
-#define _NET_BATMAN_ADV_RING_BUFFER_H_
-
-void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index,
- uint8_t value);
-uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]);
-
-#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index b27a4d792d1..2f0bd3ffe6e 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -34,35 +34,6 @@
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
-void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
-{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
- struct batadv_orig_node *orig_node;
- unsigned long *word;
- uint32_t i;
- size_t word_index;
- uint8_t *w;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- spin_lock_bh(&orig_node->ogm_cnt_lock);
- word_index = hard_iface->if_num * BATADV_NUM_WORDS;
- word = &(orig_node->bcast_own[word_index]);
-
- batadv_bit_get_packet(bat_priv, word, 1, 0);
- w = &orig_node->bcast_own_sum[hard_iface->if_num];
- *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
- }
- rcu_read_unlock();
- }
-}
-
static void _batadv_update_route(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node)
@@ -256,7 +227,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, header_len)))
return false;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
/* packet with broadcast indication but unicast recipient */
if (!is_broadcast_ether_addr(ethhdr->h_dest))
@@ -314,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
icmp_packet->msg_type = BATADV_ECHO_REPLY;
icmp_packet->header.ttl = BATADV_TTL;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = NET_RX_SUCCESS;
out:
@@ -362,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
icmp_packet->header.ttl = BATADV_TTL;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = NET_RX_SUCCESS;
out:
@@ -392,7 +363,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, hdr_size)))
goto out;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
/* packet with unicast indication but broadcast recipient */
if (is_broadcast_ether_addr(ethhdr->h_dest))
@@ -439,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
icmp_packet->header.ttl--;
/* route it */
- if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
+ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
ret = NET_RX_SUCCESS;
out:
@@ -569,7 +540,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
if (unlikely(!pskb_may_pull(skb, hdr_size)))
return -ENODATA;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
/* packet with unicast indication but broadcast recipient */
if (is_broadcast_ether_addr(ethhdr->h_dest))
@@ -803,8 +774,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node = NULL;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_unicast_packet *unicast_packet;
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
- int ret = NET_RX_DROP;
+ struct ethhdr *ethhdr = eth_hdr(skb);
+ int res, ret = NET_RX_DROP;
struct sk_buff *new_skb;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -864,16 +835,19 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
/* decrement ttl */
unicast_packet->header.ttl--;
- /* network code packet if possible */
- if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) {
- ret = NET_RX_SUCCESS;
- } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) {
- ret = NET_RX_SUCCESS;
+ res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
- /* Update stats counter */
+ /* translate transmit result into receive result */
+ if (res == NET_XMIT_SUCCESS) {
+ /* skb was transmitted and consumed */
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
skb->len + ETH_HLEN);
+
+ ret = NET_RX_SUCCESS;
+ } else if (res == NET_XMIT_POLICED) {
+ /* skb was buffered and consumed */
+ ret = NET_RX_SUCCESS;
}
out:
@@ -1165,7 +1139,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, hdr_size)))
goto out;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
/* packet with broadcast indication but unicast recipient */
if (!is_broadcast_ether_addr(ethhdr->h_dest))
@@ -1265,7 +1239,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb,
return NET_RX_DROP;
vis_packet = (struct batadv_vis_packet *)skb->data;
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
/* not for me */
if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 99eeafaba40..72a29bde201 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -20,7 +20,6 @@
#ifndef _NET_BATMAN_ADV_ROUTING_H_
#define _NET_BATMAN_ADV_ROUTING_H_
-void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface);
bool batadv_check_management_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
int header_len);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 263cfd1ccee..e9ff8d80120 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -61,7 +61,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
skb_reset_mac_header(skb);
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
@@ -96,26 +96,37 @@ send_skb_err:
* host, NULL can be passed as recv_if and no interface alternating is
* attempted.
*
- * Returns TRUE on success; FALSE otherwise.
+ * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
+ * NET_XMIT_POLICED if the skb is buffered for later transmit.
*/
-bool batadv_send_skb_to_orig(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- struct batadv_hard_iface *recv_if)
+int batadv_send_skb_to_orig(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *recv_if)
{
struct batadv_priv *bat_priv = orig_node->bat_priv;
struct batadv_neigh_node *neigh_node;
+ int ret = NET_XMIT_DROP;
/* batadv_find_router() increases neigh_nodes refcount if found. */
neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
if (!neigh_node)
- return false;
+ return ret;
- /* route it */
- batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
+ /* try to network code the packet, if it is received on an interface
+ * (i.e. being forwarded). If the packet originates from this node or if
+ * network coding fails, then send the packet as usual.
+ */
+ if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
+ ret = NET_XMIT_POLICED;
+ } else {
+ batadv_send_skb_packet(skb, neigh_node->if_incoming,
+ neigh_node->addr);
+ ret = NET_XMIT_SUCCESS;
+ }
batadv_neigh_node_free_ref(neigh_node);
- return true;
+ return ret;
}
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
@@ -152,8 +163,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet,
unsigned long send_time)
{
- INIT_HLIST_NODE(&forw_packet->list);
-
/* add new packet to packet list */
spin_lock_bh(&bat_priv->forw_bcast_list_lock);
hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list);
@@ -260,6 +269,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (hard_iface->soft_iface != soft_iface)
continue;
+ if (forw_packet->num_packets >= hard_iface->num_bcasts)
+ continue;
+
/* send a copy of the saved skb */
skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
if (skb1)
@@ -271,7 +283,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
forw_packet->num_packets++;
/* if we still have some more bcasts to send */
- if (forw_packet->num_packets < 3) {
+ if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
_batadv_add_bcast_packet_to_list(bat_priv, forw_packet,
msecs_to_jiffies(5));
return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 38e662f619a..e7b17880fca 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -23,9 +23,9 @@
int batadv_send_skb_packet(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface,
const uint8_t *dst_addr);
-bool batadv_send_skb_to_orig(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- struct batadv_hard_iface *recv_if);
+int batadv_send_skb_to_orig(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *recv_if);
void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
const struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6f20d339e33..700d0b49742 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
0x00, 0x00};
unsigned int header_len = 0;
int data_len = skb->len, ret;
- short vid __maybe_unused = -1;
+ unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
bool do_bcast = false;
uint32_t seqno;
unsigned long brd_delay = 1;
@@ -303,7 +303,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct ethhdr *ethhdr;
struct vlan_ethhdr *vhdr;
struct batadv_header *batadv_header = (struct batadv_header *)skb->data;
- short vid __maybe_unused = -1;
+ unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
__be16 ethertype = __constant_htons(ETH_P_BATMAN);
bool is_bcast;
@@ -316,7 +316,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
skb_pull_rcsum(skb, hdr_size);
skb_reset_mac_header(skb);
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
@@ -505,6 +505,7 @@ unreg_debugfs:
batadv_debugfs_del_meshif(dev);
free_bat_counters:
free_percpu(bat_priv->bat_counters);
+ bat_priv->bat_counters = NULL;
return ret;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 5e89deeb954..429aeef3d8b 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -144,7 +144,12 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
struct batadv_tt_orig_list_entry *orig_entry;
orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
- batadv_orig_node_free_ref(orig_entry->orig_node);
+
+ /* We are in an rcu callback here, therefore we cannot use
+ * batadv_orig_node_free_ref() and its call_rcu():
+ * An rcu_barrier() wouldn't wait for that to finish
+ */
+ batadv_orig_node_free_ref_now(orig_entry->orig_node);
kfree(orig_entry);
}
@@ -158,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
}
+/**
+ * batadv_tt_local_event - store a local TT event (ADD/DEL)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_local_entry: the TT entry involved in the event
+ * @event_flags: flags to store in the event structure
+ */
static void batadv_tt_local_event(struct batadv_priv *bat_priv,
- const uint8_t *addr, uint8_t flags)
+ struct batadv_tt_local_entry *tt_local_entry,
+ uint8_t event_flags)
{
struct batadv_tt_change_node *tt_change_node, *entry, *safe;
+ struct batadv_tt_common_entry *common = &tt_local_entry->common;
+ uint8_t flags = common->flags | event_flags;
bool event_removed = false;
bool del_op_requested, del_op_entry;
@@ -171,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
return;
tt_change_node->change.flags = flags;
- memcpy(tt_change_node->change.addr, addr, ETH_ALEN);
+ memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN);
del_op_requested = flags & BATADV_TT_CLIENT_DEL;
@@ -179,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
spin_lock_bh(&bat_priv->tt.changes_list_lock);
list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
- if (!batadv_compare_eth(entry->change.addr, addr))
+ if (!batadv_compare_eth(entry->change.addr, common->addr))
continue;
/* DEL+ADD in the same orig interval have no effect and can be
@@ -327,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
}
add_event:
- batadv_tt_local_event(bat_priv, addr, tt_local->common.flags);
+ batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS);
check_roaming:
/* Check whether it is a roaming, but don't do anything if the roaming
@@ -524,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry,
uint16_t flags, const char *message)
{
- batadv_tt_local_event(bat_priv, tt_local_entry->common.addr,
- tt_local_entry->common.flags | flags);
+ batadv_tt_local_event(bat_priv, tt_local_entry, flags);
/* The local client has to be marked as "pending to be removed" but has
* to be kept in the table in order to send it in a full table
@@ -579,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
/* if this client has been added right now, it is possible to
* immediately purge it
*/
- batadv_tt_local_event(bat_priv, tt_local_entry->common.addr,
- curr_flags | BATADV_TT_CLIENT_DEL);
+ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
hlist_del_rcu(&tt_local_entry->common.hash_entry);
batadv_tt_local_entry_free_ref(tt_local_entry);
@@ -786,10 +798,25 @@ out:
batadv_tt_orig_list_entry_free_ref(orig_entry);
}
-/* caller must hold orig_node refcount */
+/**
+ * batadv_tt_global_add - add a new TT global entry or update an existing one
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: the originator announcing the client
+ * @tt_addr: the mac address of the non-mesh client
+ * @flags: TT flags that have to be set for this non-mesh client
+ * @ttvn: the tt version number ever announcing this non-mesh client
+ *
+ * Add a new TT global entry for the given originator. If the entry already
+ * exists add a new reference to the given originator (a global entry can have
+ * references to multiple originators) and adjust the flags attribute to reflect
+ * the function argument.
+ * If a TT local entry exists for this non-mesh client remove it.
+ *
+ * The caller must hold orig_node refcount.
+ */
int batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *tt_addr, uint8_t flags,
+ const unsigned char *tt_addr, uint16_t flags,
uint8_t ttvn)
{
struct batadv_tt_global_entry *tt_global_entry;
@@ -1595,11 +1622,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = tt_query_size + tt_len;
- skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
if (!skb)
goto out;
- skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(skb, ETH_HLEN);
tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
tt_response->ttvn = ttvn;
@@ -1660,11 +1687,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto out;
- skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN);
if (!skb)
goto out;
- skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(skb, ETH_HLEN);
tt_req_len = sizeof(*tt_request);
tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len);
@@ -1686,7 +1713,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX);
- if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP)
ret = 0;
out:
@@ -1710,7 +1737,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
struct batadv_orig_node *req_dst_orig_node;
struct batadv_orig_node *res_dst_orig_node = NULL;
uint8_t orig_ttvn, req_ttvn, ttvn;
- int ret = false;
+ int res, ret = false;
unsigned char *tt_buff;
bool full_table;
uint16_t tt_len, tt_tot;
@@ -1757,11 +1784,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = sizeof(*tt_response) + tt_len;
- skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
if (!skb)
goto unlock;
- skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(skb, ETH_HLEN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
tt_response->ttvn = req_ttvn;
@@ -1805,8 +1832,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
- if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL))
+ res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL);
+ if (res != NET_XMIT_DROP)
ret = true;
+
goto out;
unlock:
@@ -1873,11 +1902,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = sizeof(*tt_response) + tt_len;
- skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
if (!skb)
goto unlock;
- skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(skb, ETH_HLEN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
tt_response->ttvn = req_ttvn;
@@ -1920,7 +1949,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
- if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = true;
goto out;
@@ -2207,11 +2236,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
if (!batadv_tt_check_roam_count(bat_priv, client))
goto out;
- skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
if (!skb)
goto out;
- skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(skb, ETH_HLEN);
roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
@@ -2233,7 +2262,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
- if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = 0;
out:
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index ab8e683b402..659a3bb759c 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv,
const unsigned char *tt_buff, int tt_buff_len);
int batadv_tt_global_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- const unsigned char *addr, uint8_t flags,
+ const unsigned char *addr, uint16_t flags,
uint8_t ttvn);
int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index aba8364c368..b2c94e13931 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv {
* @if_status: status of the interface for batman-adv
* @net_dev: pointer to the net_device
* @frag_seqno: last fragment sequence number sent by this interface
+ * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
* @hardif_obj: kobject of the per interface sysfs "mesh" directory
* @refcount: number of contexts the object is used
* @batman_adv_ptype: packet type describing packets that should be processed by
@@ -76,6 +77,7 @@ struct batadv_hard_iface {
char if_status;
struct net_device *net_dev;
atomic_t frag_seqno;
+ uint8_t num_bcasts;
struct kobject *hardif_obj;
atomic_t refcount;
struct packet_type batman_adv_ptype;
@@ -640,7 +642,7 @@ struct batadv_socket_packet {
#ifdef CONFIG_BATMAN_ADV_BLA
struct batadv_bla_backbone_gw {
uint8_t orig[ETH_ALEN];
- short vid;
+ unsigned short vid;
struct hlist_node hash_entry;
struct batadv_priv *bat_priv;
unsigned long lasttime;
@@ -663,7 +665,7 @@ struct batadv_bla_backbone_gw {
*/
struct batadv_bla_claim {
uint8_t addr[ETH_ALEN];
- short vid;
+ unsigned short vid;
struct batadv_bla_backbone_gw *backbone_gw;
unsigned long lasttime;
struct hlist_node hash_entry;
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 0bb3b5982f9..dc8b5d4dd63 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -464,7 +464,7 @@ find_router:
goto out;
}
- if (batadv_send_skb_to_orig(skb, orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
ret = 0;
out:
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 1625e5793a8..4983340f194 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -392,12 +392,12 @@ batadv_add_packet(struct batadv_priv *bat_priv,
return NULL;
len = sizeof(*packet) + vis_info_len;
- info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
+ info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
if (!info->skb_packet) {
kfree(info);
return NULL;
}
- skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(info->skb_packet, ETH_HLEN);
packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
kref_init(&info->refcount);
@@ -697,7 +697,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node;
struct batadv_vis_packet *packet;
struct sk_buff *skb;
- uint32_t i;
+ uint32_t i, res;
packet = (struct batadv_vis_packet *)info->skb_packet->data;
@@ -724,7 +724,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
if (!skb)
continue;
- if (!batadv_send_skb_to_orig(skb, orig_node, NULL))
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res == NET_XMIT_DROP)
kfree_skb(skb);
}
rcu_read_unlock();
@@ -748,7 +749,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv,
if (!skb)
goto out;
- if (!batadv_send_skb_to_orig(skb, orig_node, NULL))
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
kfree_skb(skb);
out:
@@ -854,13 +855,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
if (!bat_priv->vis.my_info)
goto err;
- len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE;
- len += ETH_HLEN + NET_IP_ALIGN;
- bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len);
+ len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN;
+ bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL,
+ len);
if (!bat_priv->vis.my_info->skb_packet)
goto free_info;
- skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN);
+ skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
tmp_skb = bat_priv->vis.my_info->skb_packet;
packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 967312803e4..2ef66781fed 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -22,6 +22,9 @@
#include <asm/uaccess.h>
#include "br_private.h"
+#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_MASK | NETIF_F_HW_CSUM)
+
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -55,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_pull(skb, ETH_HLEN);
if (is_broadcast_ether_addr(dest))
- br_flood_deliver(br, skb);
+ br_flood_deliver(br, skb, false);
else if (is_multicast_ether_addr(dest)) {
if (unlikely(netpoll_tx_running(dev))) {
- br_flood_deliver(br, skb);
+ br_flood_deliver(br, skb, false);
goto out;
}
if (br_multicast_rcv(br, NULL, skb)) {
@@ -70,11 +73,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
br_multicast_deliver(mdst, skb);
else
- br_flood_deliver(br, skb);
+ br_flood_deliver(br, skb, false);
} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL)
br_deliver(dst->dst, skb);
else
- br_flood_deliver(br, skb);
+ br_flood_deliver(br, skb, true);
out:
rcu_read_unlock();
@@ -346,12 +349,10 @@ void br_dev_setup(struct net_device *dev)
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
- dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
- NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
- NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX;
- dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
- NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
- NETIF_F_HW_VLAN_CTAG_TX;
+ dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
+ NETIF_F_HW_VLAN_CTAG_TX;
+ dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
+ dev->vlan_features = COMMON_FEATURES;
br->dev = dev;
spin_lock_init(&br->lock);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 092b20e4ee4..4b81b147178 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -174,7 +174,8 @@ out:
static void br_flood(struct net_bridge *br, struct sk_buff *skb,
struct sk_buff *skb0,
void (*__packet_hook)(const struct net_bridge_port *p,
- struct sk_buff *skb))
+ struct sk_buff *skb),
+ bool unicast)
{
struct net_bridge_port *p;
struct net_bridge_port *prev;
@@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
prev = NULL;
list_for_each_entry_rcu(p, &br->port_list, list) {
+ /* Do not flood unicast traffic to ports that turn it off */
+ if (unicast && !(p->flags & BR_FLOOD))
+ continue;
prev = maybe_deliver(prev, p, skb, __packet_hook);
if (IS_ERR(prev))
goto out;
@@ -203,16 +207,16 @@ out:
/* called with rcu_read_lock */
-void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb)
+void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast)
{
- br_flood(br, skb, NULL, __br_deliver);
+ br_flood(br, skb, NULL, __br_deliver, unicast);
}
/* called under bridge lock */
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
- struct sk_buff *skb2)
+ struct sk_buff *skb2, bool unicast)
{
- br_flood(br, skb, skb2, __br_forward);
+ br_flood(br, skb, skb2, __br_forward, unicast);
}
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 4cdba60926f..5623be6b9ec 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
p->path_cost = port_cost(dev);
p->priority = 0x8000 >> BR_PORT_BITS;
p->port_no = index;
- p->flags = 0;
+ p->flags = BR_LEARNING | BR_FLOOD;
br_init_port(p);
p->state = BR_STATE_DISABLED;
br_stp_port_timer_init(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 828e2bcc1f5..1b8b8b824cd 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
struct sk_buff *skb2;
+ bool unicast = true;
u16 vid = 0;
if (!p || p->state == BR_STATE_DISABLED)
@@ -75,7 +76,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
- br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
+ if (p->flags & BR_LEARNING)
+ br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
br_multicast_rcv(br, p, skb))
@@ -94,9 +96,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
dst = NULL;
- if (is_broadcast_ether_addr(dest))
+ if (is_broadcast_ether_addr(dest)) {
skb2 = skb;
- else if (is_multicast_ether_addr(dest)) {
+ unicast = false;
+ } else if (is_multicast_ether_addr(dest)) {
mdst = br_mdb_get(br, skb, vid);
if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
if ((mdst && mdst->mglist) ||
@@ -109,6 +112,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
} else
skb2 = skb;
+ unicast = false;
br->dev->stats.multicast++;
} else if ((dst = __br_fdb_get(br, dest, vid)) &&
dst->is_local) {
@@ -122,7 +126,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
dst->used = jiffies;
br_forward(dst->dst, skb, skb2);
} else
- br_flood_forward(br, skb, skb2);
+ br_flood_forward(br, skb, skb2, unicast);
}
if (skb2)
@@ -142,7 +146,8 @@ static int br_handle_local_finish(struct sk_buff *skb)
u16 vid = 0;
br_vlan_get_tag(skb, &vid);
- br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
+ if (p->flags & BR_LEARNING)
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
return 0; /* process further */
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 81f2389f78e..37a46769796 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -23,6 +23,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/timer.h>
+#include <linux/inetdevice.h>
#include <net/ip.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -381,7 +382,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
- iph->saddr = 0;
+ iph->saddr = br->multicast_query_use_ifaddr ?
+ inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
@@ -615,8 +617,6 @@ rehash:
mp->br = br;
mp->addr = *group;
- setup_timer(&mp->timer, br_multicast_group_expired,
- (unsigned long)mp);
hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
mdb->size++;
@@ -654,7 +654,6 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- unsigned long now = jiffies;
int err;
spin_lock(&br->multicast_lock);
@@ -669,7 +668,6 @@ static int br_multicast_add_group(struct net_bridge *br,
if (!port) {
mp->mglist = true;
- mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -677,7 +675,7 @@ static int br_multicast_add_group(struct net_bridge *br,
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->port == port)
- goto found;
+ goto out;
if ((unsigned long)p->port < (unsigned long)port)
break;
}
@@ -688,8 +686,6 @@ static int br_multicast_add_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p);
br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
-found:
- mod_timer(&p->timer, now + br->multicast_membership_interval);
out:
err = 0;
@@ -1129,6 +1125,10 @@ static int br_ip4_multicast_query(struct net_bridge *br,
if (!mp)
goto out;
+ setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp);
+ mod_timer(&mp->timer, now + br->multicast_membership_interval);
+ mp->timer_armed = true;
+
max_delay *= br->multicast_last_member_count;
if (mp->mglist &&
@@ -1203,6 +1203,10 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (!mp)
goto out;
+ setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp);
+ mod_timer(&mp->timer, now + br->multicast_membership_interval);
+ mp->timer_armed = true;
+
max_delay *= br->multicast_last_member_count;
if (mp->mglist &&
(timer_pending(&mp->timer) ?
@@ -1246,6 +1250,32 @@ static void br_multicast_leave_group(struct net_bridge *br,
if (!mp)
goto out;
+ if (br->multicast_querier &&
+ !timer_pending(&br->multicast_querier_timer)) {
+ __br_multicast_send_query(br, port, &mp->addr);
+
+ time = jiffies + br->multicast_last_member_count *
+ br->multicast_last_member_interval;
+ mod_timer(port ? &port->multicast_query_timer :
+ &br->multicast_query_timer, time);
+
+ for (p = mlock_dereference(mp->ports, br);
+ p != NULL;
+ p = mlock_dereference(p->next, br)) {
+ if (p->port != port)
+ continue;
+
+ if (!hlist_unhashed(&p->mglist) &&
+ (timer_pending(&p->timer) ?
+ time_after(p->timer.expires, time) :
+ try_to_del_timer_sync(&p->timer) >= 0)) {
+ mod_timer(&p->timer, time);
+ }
+
+ break;
+ }
+ }
+
if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
struct net_bridge_port_group __rcu **pp;
@@ -1261,7 +1291,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
call_rcu_bh(&p->rcu, br_multicast_free_pg);
br_mdb_notify(br->dev, port, group, RTM_DELMDB);
- if (!mp->ports && !mp->mglist &&
+ if (!mp->ports && !mp->mglist && mp->timer_armed &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
@@ -1273,30 +1303,12 @@ static void br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (mp->mglist &&
+ if (mp->mglist && mp->timer_armed &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
mod_timer(&mp->timer, time);
}
-
- goto out;
- }
-
- for (p = mlock_dereference(mp->ports, br);
- p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (p->port != port)
- continue;
-
- if (!hlist_unhashed(&p->mglist) &&
- (timer_pending(&p->timer) ?
- time_after(p->timer.expires, time) :
- try_to_del_timer_sync(&p->timer) >= 0)) {
- mod_timer(&p->timer, time);
- }
-
- break;
}
out:
@@ -1618,6 +1630,7 @@ void br_multicast_init(struct net_bridge *br)
br->multicast_router = 1;
br->multicast_querier = 0;
+ br->multicast_query_use_ifaddr = 0;
br->multicast_last_member_count = 2;
br->multicast_startup_query_count = 2;
@@ -1671,6 +1684,7 @@ void br_multicast_stop(struct net_bridge *br)
hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
hlist[ver]) {
del_timer(&mp->timer);
+ mp->timer_armed = false;
call_rcu_bh(&mp->rcu, br_multicast_free_group);
}
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8e3abf56479..1fc30abd3a5 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -30,6 +30,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_GUARD */
+ nla_total_size(1) /* IFLA_BRPORT_PROTECT */
+ nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */
+ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */
+ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
+ 0;
}
@@ -56,7 +58,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
- nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)))
+ nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
+ nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
+ nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)))
return -EMSGSIZE;
return 0;
@@ -281,6 +285,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MODE] = { .type = NLA_U8 },
[IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
[IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
+ [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -328,6 +334,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
+ br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
+ br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
if (tb[IFLA_BRPORT_COST]) {
err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 1644b3e1f94..3a3f371b284 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = {
*/
static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
bool changed_addr;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d2c043a857b..3be89b3ce17 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -112,6 +112,7 @@ struct net_bridge_mdb_entry
struct timer_list timer;
struct br_ip addr;
bool mglist;
+ bool timer_armed;
};
struct net_bridge_mdb_htable
@@ -157,6 +158,8 @@ struct net_bridge_port
#define BR_ROOT_BLOCK 0x00000004
#define BR_MULTICAST_FAST_LEAVE 0x00000008
#define BR_ADMIN_COST 0x00000010
+#define BR_LEARNING 0x00000020
+#define BR_FLOOD 0x00000040
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
u32 multicast_startup_queries_sent;
@@ -249,6 +252,7 @@ struct net_bridge
u8 multicast_disabled:1;
u8 multicast_querier:1;
+ u8 multicast_query_use_ifaddr:1;
u32 hash_elasticity;
u32 hash_max;
@@ -411,9 +415,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb);
extern void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, struct sk_buff *skb0);
extern int br_forward_finish(struct sk_buff *skb);
-extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb);
+extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb,
+ bool unicast);
extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
- struct sk_buff *skb2);
+ struct sk_buff *skb2, bool unicast);
/* br_if.c */
extern void br_port_carrier_check(struct net_bridge_port *p);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 8baa9c08e1a..394bb96b608 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -375,6 +375,31 @@ static ssize_t store_multicast_snooping(struct device *d,
static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR,
show_multicast_snooping, store_multicast_snooping);
+static ssize_t show_multicast_query_use_ifaddr(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr);
+}
+
+static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val)
+{
+ br->multicast_query_use_ifaddr = !!val;
+ return 0;
+}
+
+static ssize_t
+store_multicast_query_use_ifaddr(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_query_use_ifaddr);
+}
+static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR,
+ show_multicast_query_use_ifaddr,
+ store_multicast_query_use_ifaddr);
+
static ssize_t show_multicast_querier(struct device *d,
struct device_attribute *attr,
char *buf)
@@ -734,6 +759,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_multicast_router.attr,
&dev_attr_multicast_snooping.attr,
&dev_attr_multicast_querier.attr,
+ &dev_attr_multicast_query_use_ifaddr.attr,
&dev_attr_hash_elasticity.attr,
&dev_attr_hash_max.attr,
&dev_attr_multicast_last_member_count.attr,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index a1ef1b6e14d..2a2cdb756d5 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -158,6 +158,8 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
+BRPORT_ATTR_FLAG(learning, BR_LEARNING);
+BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -195,6 +197,8 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_hairpin_mode,
&brport_attr_bpdu_guard,
&brport_attr_root_block,
+ &brport_attr_learning,
+ &brport_attr_unicast_flood,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
&brport_attr_multicast_router,
&brport_attr_multicast_fast_leave,
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 9878eb8204c..19c37a4929b 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -72,13 +72,12 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
}
static void
-ebt_log_packet(u_int8_t pf, unsigned int hooknum,
- const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const struct nf_loginfo *loginfo,
- const char *prefix)
+ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
+ const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const struct nf_loginfo *loginfo,
+ const char *prefix)
{
unsigned int bitmask;
- struct net *net = dev_net(in ? in : out);
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net))
@@ -191,7 +190,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
par->in, par->out, &li, "%s", info->prefix);
else
- ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
+ ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in,
par->out, &li, info->prefix);
return EBT_CONTINUE;
}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index fc1905c5141..518093802d1 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -131,14 +131,16 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
return skb;
}
-static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- const struct ebt_ulog_info *uloginfo, const char *prefix)
+static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct ebt_ulog_info *uloginfo,
+ const char *prefix)
{
ebt_ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
- struct net *net = dev_net(in ? in : out);
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
unsigned int group = uloginfo->nlgroup;
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
@@ -233,7 +235,7 @@ unlock:
}
/* this function is registered with the netfilter core */
-static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
+static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
const struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, const struct nf_loginfo *li,
const char *prefix)
@@ -252,13 +254,15 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
static unsigned int
ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return EBT_CONTINUE;
}
@@ -267,6 +271,12 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
{
struct ebt_ulog_info *uloginfo = par->targinfo;
+ if (!par->net->xt.ebt_ulog_warn_deprecated) {
+ pr_info("ebt_ulog is deprecated and it will be removed soon, "
+ "use ebt_nflog instead\n");
+ par->net->xt.ebt_ulog_warn_deprecated = true;
+ }
+
if (uloginfo->nlgroup > 31)
return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3d110c4fc78..ac780242838 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1339,7 +1339,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m,
/* ebtables expects 32 bytes long names but xt_match names are 29 bytes
long. Copy 29 bytes and fill remaining bytes with zeroes. */
- strncpy(name, m->u.match->name, sizeof(name));
+ strlcpy(name, m->u.match->name, sizeof(name));
if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
@@ -1351,7 +1351,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
char __user *hlp = ubase + ((char *)w - base);
char name[EBT_FUNCTION_MAXNAMELEN] = {};
- strncpy(name, w->u.watcher->name, sizeof(name));
+ strlcpy(name, w->u.watcher->name, sizeof(name));
if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
@@ -1377,7 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase);
if (ret != 0)
return ret;
- strncpy(name, t->u.target->name, sizeof(name));
+ strlcpy(name, t->u.target->name, sizeof(name));
if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 1f9ece1a9c3..4dca159435c 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev);
/* notify Caif of device events */
static int caif_device_notify(struct notifier_block *me, unsigned long what,
- void *arg)
+ void *ptr)
{
- struct net_device *dev = arg;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct caif_device_entry *caifd = NULL;
struct caif_dev_common *caifdev;
struct cfcnfg *cfg;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 942e00a425f..75ed04b78fa 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = {
};
static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
- void *arg)
+ void *ptr)
{
- struct net_device *dev = arg;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct caif_dev_common common;
struct cflayer *layer, *link_support;
struct usbnet *usbnet;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index c4e50852c9f..3ab8dd2e128 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister);
* af_can notifier to create/remove CAN netdevice specific structs
*/
static int can_notifier(struct notifier_block *nb, unsigned long msg,
- void *data)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dev_rcv_lists *d;
if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8f113e6ff32..46f20bfafc0 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
* notification handler for netdevice status changes
*/
static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
- void *data)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
struct sock *sk = &bo->sk;
struct bcm_op *op;
diff --git a/net/can/gw.c b/net/can/gw.c
index 3ee690e8c7d..2f291f961a1 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -445,9 +445,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj)
}
static int cgw_notifier(struct notifier_block *nb,
- unsigned long msg, void *data)
+ unsigned long msg, void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
diff --git a/net/can/raw.c b/net/can/raw.c
index 1085e65f848..641e1c89512 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
}
static int raw_notifier(struct notifier_block *nb,
- unsigned long msg, void *data)
+ unsigned long msg, void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a3395fdfbd4..d5953b87918 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1204,6 +1204,7 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
mutex_lock(&osdc->request_mutex);
if (req->r_linger) {
__unregister_linger_request(osdc, req);
+ req->r_linger = 0;
ceph_osdc_put_request(req);
}
mutex_unlock(&osdc->request_mutex);
@@ -2120,7 +2121,9 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
down_read(&osdc->map_sem);
mutex_lock(&osdc->request_mutex);
__register_request(osdc, req);
- WARN_ON(req->r_sent);
+ req->r_sent = 0;
+ req->r_got_reply = 0;
+ req->r_completed = 0;
rc = __map_request(osdc, req, 0);
if (rc < 0) {
if (nofail) {
diff --git a/net/compat.c b/net/compat.c
index 79ae8848500..f0a1ba6c808 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -734,19 +734,25 @@ static unsigned char nas[21] = {
asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
{
- return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags)
{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
{
- return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags)
@@ -768,6 +774,9 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
int datagrams;
struct timespec ktspec;
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+
if (COMPAT_USE_64BIT_TIME)
return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b71423db778..9cbaba98ce4 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -56,6 +56,7 @@
#include <net/sock.h>
#include <net/tcp_states.h>
#include <trace/events/skb.h>
+#include <net/ll_poll.h>
/*
* Is a socket 'connection oriented' ?
@@ -207,6 +208,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT))
+ continue;
+
/* User doesn't want to wait */
error = -EAGAIN;
if (!timeo)
diff --git a/net/core/dev.c b/net/core/dev.c
index fc1e289397f..fa007dba6be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
+#include <linux/hashtable.h>
#include "net-sysfs.h"
@@ -166,6 +167,12 @@ static struct list_head offload_base __read_mostly;
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+/* protects napi_hash addition/deletion and napi_gen_id */
+static DEFINE_SPINLOCK(napi_hash_lock);
+
+static unsigned int napi_gen_id;
+static DEFINE_HASHTABLE(napi_hash, 8);
+
seqcount_t devnet_rename_seq;
static inline void dev_base_seq_inc(struct net *net)
@@ -1198,9 +1205,7 @@ static int __dev_open(struct net_device *dev)
* If we don't do this there is a chance ndo_poll_controller
* or ndo_poll may be running while we open the device
*/
- ret = netpoll_rx_disable(dev);
- if (ret)
- return ret;
+ netpoll_rx_disable(dev);
ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
ret = notifier_to_errno(ret);
@@ -1309,9 +1314,7 @@ static int __dev_close(struct net_device *dev)
LIST_HEAD(single);
/* Temporarily disable netpoll until the interface is down */
- retval = netpoll_rx_disable(dev);
- if (retval)
- return retval;
+ netpoll_rx_disable(dev);
list_add(&dev->unreg_list, &single);
retval = __dev_close_many(&single);
@@ -1353,14 +1356,11 @@ static int dev_close_many(struct list_head *head)
*/
int dev_close(struct net_device *dev)
{
- int ret = 0;
if (dev->flags & IFF_UP) {
LIST_HEAD(single);
/* Block netpoll rx while the interface is going down */
- ret = netpoll_rx_disable(dev);
- if (ret)
- return ret;
+ netpoll_rx_disable(dev);
list_add(&dev->unreg_list, &single);
dev_close_many(&single);
@@ -1368,7 +1368,7 @@ int dev_close(struct net_device *dev)
netpoll_rx_enable(dev);
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL(dev_close);
@@ -1398,6 +1398,14 @@ void dev_disable_lro(struct net_device *dev)
}
EXPORT_SYMBOL(dev_disable_lro);
+static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
+ struct net_device *dev)
+{
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return nb->notifier_call(nb, val, &info);
+}
static int dev_boot_phase = 1;
@@ -1430,7 +1438,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
goto unlock;
for_each_net(net) {
for_each_netdev(net, dev) {
- err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
err = notifier_to_errno(err);
if (err)
goto rollback;
@@ -1438,7 +1446,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
if (!(dev->flags & IFF_UP))
continue;
- nb->notifier_call(nb, NETDEV_UP, dev);
+ call_netdevice_notifier(nb, NETDEV_UP, dev);
}
}
@@ -1454,10 +1462,11 @@ rollback:
goto outroll;
if (dev->flags & IFF_UP) {
- nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
- nb->notifier_call(nb, NETDEV_DOWN, dev);
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
}
- nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
}
}
@@ -1495,10 +1504,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
for_each_net(net) {
for_each_netdev(net, dev) {
if (dev->flags & IFF_UP) {
- nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
- nb->notifier_call(nb, NETDEV_DOWN, dev);
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
}
- nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
}
}
unlock:
@@ -1508,6 +1518,25 @@ unlock:
EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
+ * call_netdevice_notifiers_info - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @info: notifier information data
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+
+int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
+ struct netdev_notifier_info *info)
+{
+ ASSERT_RTNL();
+ netdev_notifier_info_init(info, dev);
+ return raw_notifier_call_chain(&netdev_chain, val, info);
+}
+EXPORT_SYMBOL(call_netdevice_notifiers_info);
+
+/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
* @dev: net_device pointer passed unmodified to notifier function
@@ -1518,8 +1547,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- ASSERT_RTNL();
- return raw_notifier_call_chain(&netdev_chain, val, dev);
+ struct netdev_notifier_info info;
+
+ return call_netdevice_notifiers_info(val, dev, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -1629,7 +1659,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
return NET_RX_DROP;
}
skb->skb_iif = 0;
- skb->dev = dev;
skb_dst_drop(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
@@ -1702,7 +1731,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb2);
if (skb_network_header(skb2) < skb2->data ||
- skb2->network_header > skb2->tail) {
+ skb_network_header(skb2) > skb_tail_pointer(skb2)) {
net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
ntohs(skb2->protocol),
dev->name);
@@ -3065,6 +3094,46 @@ static int rps_ipi_queued(struct softnet_data *sd)
return 0;
}
+#ifdef CONFIG_NET_FLOW_LIMIT
+int netdev_flow_limit_table_len __read_mostly = (1 << 12);
+#endif
+
+static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
+{
+#ifdef CONFIG_NET_FLOW_LIMIT
+ struct sd_flow_limit *fl;
+ struct softnet_data *sd;
+ unsigned int old_flow, new_flow;
+
+ if (qlen < (netdev_max_backlog >> 1))
+ return false;
+
+ sd = &__get_cpu_var(softnet_data);
+
+ rcu_read_lock();
+ fl = rcu_dereference(sd->flow_limit);
+ if (fl) {
+ new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
+ old_flow = fl->history[fl->history_head];
+ fl->history[fl->history_head] = new_flow;
+
+ fl->history_head++;
+ fl->history_head &= FLOW_LIMIT_HISTORY - 1;
+
+ if (likely(fl->buckets[old_flow]))
+ fl->buckets[old_flow]--;
+
+ if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
+ fl->count++;
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+#endif
+ return false;
+}
+
/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
@@ -3074,13 +3143,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
{
struct softnet_data *sd;
unsigned long flags;
+ unsigned int qlen;
sd = &per_cpu(softnet_data, cpu);
local_irq_save(flags);
rps_lock(sd);
- if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+ qlen = skb_queue_len(&sd->input_pkt_queue);
+ if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
if (skb_queue_len(&sd->input_pkt_queue)) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -3828,7 +3899,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
- if (skb->mac_header == skb->tail &&
+ if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
@@ -4072,6 +4143,58 @@ void napi_complete(struct napi_struct *n)
}
EXPORT_SYMBOL(napi_complete);
+/* must be called under rcu_read_lock(), as we dont take a reference */
+struct napi_struct *napi_by_id(unsigned int napi_id)
+{
+ unsigned int hash = napi_id % HASH_SIZE(napi_hash);
+ struct napi_struct *napi;
+
+ hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
+ if (napi->napi_id == napi_id)
+ return napi;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(napi_by_id);
+
+void napi_hash_add(struct napi_struct *napi)
+{
+ if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
+
+ spin_lock(&napi_hash_lock);
+
+ /* 0 is not a valid id, we also skip an id that is taken
+ * we expect both events to be extremely rare
+ */
+ napi->napi_id = 0;
+ while (!napi->napi_id) {
+ napi->napi_id = ++napi_gen_id;
+ if (napi_by_id(napi->napi_id))
+ napi->napi_id = 0;
+ }
+
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+
+ spin_unlock(&napi_hash_lock);
+ }
+}
+EXPORT_SYMBOL_GPL(napi_hash_add);
+
+/* Warning : caller is responsible to make sure rcu grace period
+ * is respected before freeing memory containing @napi
+ */
+void napi_hash_del(struct napi_struct *napi)
+{
+ spin_lock(&napi_hash_lock);
+
+ if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
+ hlist_del_rcu(&napi->napi_hash_node);
+
+ spin_unlock(&napi_hash_lock);
+}
+EXPORT_SYMBOL_GPL(napi_hash_del);
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -4370,7 +4493,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
else
list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
dev_hold(upper_dev);
-
+ call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0;
}
@@ -4430,6 +4553,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
list_del_rcu(&upper->list);
dev_put(upper_dev);
kfree_rcu(upper, rcu);
+ call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -4700,8 +4824,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
}
if (dev->flags & IFF_UP &&
- (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = changes;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
+ }
}
/**
@@ -5235,6 +5364,10 @@ int register_netdevice(struct net_device *dev)
*/
dev->hw_enc_features |= NETIF_F_SG;
+ /* Make NETIF_F_SG inheritable to MPLS.
+ */
+ dev->mpls_features |= NETIF_F_SG;
+
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
@@ -6014,7 +6147,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
}
EXPORT_SYMBOL(netdev_increment_features);
-static struct hlist_head *netdev_create_hash(void)
+static struct hlist_head * __net_init netdev_create_hash(void)
{
int i;
struct hlist_head *hash;
@@ -6270,6 +6403,10 @@ static int __init net_dev_init(void)
sd->backlog.weight = weight_p;
sd->backlog.gro_list = NULL;
sd->backlog.gro_count = 0;
+
+#ifdef CONFIG_NET_FLOW_LIMIT
+ sd->flow_limit = NULL;
+#endif
}
dev_boot_phase = 0;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c013f38482a..6cda4e2c213 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -39,6 +39,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
ha->refcount = 1;
ha->global_use = global;
ha->synced = sync;
+ ha->sync_cnt = 0;
list_add_tail_rcu(&ha->list, &list->list);
list->count++;
@@ -66,7 +67,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
}
if (sync) {
if (ha->synced)
- return 0;
+ return -EEXIST;
else
ha->synced = true;
}
@@ -139,10 +140,13 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
false, true);
- if (err)
+ if (err && err != -EEXIST)
return err;
- ha->sync_cnt++;
- ha->refcount++;
+
+ if (!err) {
+ ha->sync_cnt++;
+ ha->refcount++;
+ }
return 0;
}
@@ -159,7 +163,8 @@ static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
if (err)
return;
ha->sync_cnt--;
- __hw_addr_del_entry(from_list, ha, false, true);
+ /* address on from list is not marked synced */
+ __hw_addr_del_entry(from_list, ha, false, false);
}
static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
@@ -796,7 +801,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
return -EINVAL;
netif_addr_lock_nested(to);
- err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+ err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
netif_addr_unlock(to);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d23b6682f4e..5e78d44333b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
}
static int dropmon_net_event(struct notifier_block *ev_block,
- unsigned long event, void *ptr)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dm_hw_stat_delta *new_stat = NULL;
struct dm_hw_stat_delta *tmp;
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8..ca4231ec734 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static int dst_dev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dst_entry *dst, *last = NULL;
switch (event) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 22efdaa76eb..cd23d314d68 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
+ [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -1413,7 +1414,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
modinfo.eeprom_len);
}
-/* The main entry point in this file. Called from net/core/dev.c */
+/* The main entry point in this file. Called from net/core/dev_ioctl.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d5a9f8ead0d..21735440c44 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
static int fib_rules_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+ void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct fib_rules_ops *ops;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index d9d198aa9fe..6b5b6e7013c 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -82,7 +82,7 @@ struct gen_estimator
{
struct list_head list;
struct gnet_stats_basic_packed *bstats;
- struct gnet_stats_rate_est *rate_est;
+ struct gnet_stats_rate_est64 *rate_est;
spinlock_t *stats_lock;
int ewma_log;
u64 last_bytes;
@@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est)
static
struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
- const struct gnet_stats_rate_est *rate_est)
+ const struct gnet_stats_rate_est64 *rate_est)
{
struct rb_node *p = est_root.rb_node;
@@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
*
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est *rate_est,
+ struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
struct nlattr *opt)
{
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator);
* Note : Caller should respect an RCU grace period before freeing stats_lock
*/
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est *rate_est)
+ struct gnet_stats_rate_est64 *rate_est)
{
struct gen_estimator *e;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
* Returns 0 on success or a negative error code.
*/
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_rate_est *rate_est,
+ struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
* Returns true if estimator is active, and false if not.
*/
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
- const struct gnet_stats_rate_est *rate_est)
+ const struct gnet_stats_rate_est64 *rate_est)
{
bool res;
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index ddedf211e58..9d3d9e78397 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
int
gnet_stats_copy_rate_est(struct gnet_dump *d,
const struct gnet_stats_basic_packed *b,
- struct gnet_stats_rate_est *r)
+ struct gnet_stats_rate_est64 *r)
{
+ struct gnet_stats_rate_est est;
+ int res;
+
if (b && !gen_estimator_active(b, r))
return 0;
+ est.bps = min_t(u64, UINT_MAX, r->bps);
+ /* we have some time before reaching 2^32 packets per second */
+ est.pps = r->pps;
+
if (d->compat_tc_stats) {
- d->tc_stats.bps = r->bps;
- d->tc_stats.pps = r->pps;
+ d->tc_stats.bps = est.bps;
+ d->tc_stats.pps = est.pps;
}
- if (d->tail)
- return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+ if (d->tail) {
+ res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est));
+ if (res < 0 || est.bps == r->bps)
+ return res;
+ /* emit 64bit stats only if needed */
+ return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r));
+ }
return 0;
}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 7e7aeb01de4..de178e46268 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -75,31 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
/*
* Copy kernel to iovec. Returns -EFAULT on error.
- *
- * Note: this modifies the original iovec.
- */
-
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
-{
- while (len > 0) {
- if (iov->iov_len) {
- int copy = min_t(unsigned int, iov->iov_len, len);
- if (copy_to_user(iov->iov_base, kdata, copy))
- return -EFAULT;
- kdata += copy;
- len -= copy;
- iov->iov_len -= copy;
- iov->iov_base += copy;
- }
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovec);
-
-/*
- * Copy kernel to iovec. Returns -EFAULT on error.
*/
int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
@@ -125,31 +100,6 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
EXPORT_SYMBOL(memcpy_toiovecend);
/*
- * Copy iovec to kernel. Returns -EFAULT on error.
- *
- * Note: this modifies the original iovec.
- */
-
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
-{
- while (len > 0) {
- if (iov->iov_len) {
- int copy = min_t(unsigned int, len, iov->iov_len);
- if (copy_from_user(kdata, iov->iov_base, copy))
- return -EFAULT;
- len -= copy;
- kdata += copy;
- iov->iov_base += copy;
- iov->iov_len -= copy;
- }
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovec);
-
-/*
* Copy iovec from kernel. Returns -EFAULT on error.
*/
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 569d355fec3..2bf83299600 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
static int softnet_seq_show(struct seq_file *seq, void *v)
{
struct softnet_data *sd = v;
+ unsigned int flow_limit_count = 0;
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+#ifdef CONFIG_NET_FLOW_LIMIT
+ struct sd_flow_limit *fl;
+
+ rcu_read_lock();
+ fl = rcu_dereference(sd->flow_limit);
+ if (fl)
+ flow_limit_count = fl->count;
+ rcu_read_unlock();
+#endif
+
+ seq_printf(seq,
+ "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- sd->cpu_collision, sd->received_rps);
+ sd->cpu_collision, sd->received_rps, flow_limit_count);
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cec074be8c4..03c8ec3edc7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -247,7 +247,7 @@ static void netpoll_poll_dev(struct net_device *dev)
zap_completion_queue();
}
-int netpoll_rx_disable(struct net_device *dev)
+void netpoll_rx_disable(struct net_device *dev)
{
struct netpoll_info *ni;
int idx;
@@ -257,7 +257,6 @@ int netpoll_rx_disable(struct net_device *dev)
if (ni)
down(&ni->dev_lock);
srcu_read_unlock(&netpoll_srcu, idx);
- return 0;
}
EXPORT_SYMBOL(netpoll_rx_disable);
@@ -690,25 +689,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
send_skb->dev = skb->dev;
skb_reset_network_header(send_skb);
- skb_put(send_skb, sizeof(struct ipv6hdr));
- hdr = ipv6_hdr(send_skb);
-
+ hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
*(__be32*)hdr = htonl(0x60000000);
-
hdr->payload_len = htons(size);
hdr->nexthdr = IPPROTO_ICMPV6;
hdr->hop_limit = 255;
hdr->saddr = *saddr;
hdr->daddr = *daddr;
- send_skb->transport_header = send_skb->tail;
- skb_put(send_skb, size);
-
- icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
+ icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
icmp6h->icmp6_router = 0;
icmp6h->icmp6_solicited = 1;
- target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr));
+
+ target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
*target = msg->target;
icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
IPPROTO_ICMPV6,
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0777d0aa18c..e533259dce3 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = {
static int netprio_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netprio_map *old;
/*
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 11f2704c381..303412d8332 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
static int pktgen_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id);
if (pn->pktgen_exiting)
@@ -2708,15 +2708,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
*vlan_encapsulated_proto = htons(ETH_P_IP);
}
- skb->network_header = skb->tail;
- skb->transport_header = skb->network_header + sizeof(struct iphdr);
- skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+ skb_set_mac_header(skb, 0);
+ skb_set_network_header(skb, skb->len);
+ iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
+
+ skb_set_transport_header(skb, skb->len);
+ udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
skb->priority = pkt_dev->skb_priority;
- iph = ip_hdr(skb);
- udph = udp_hdr(skb);
-
memcpy(eth, pkt_dev->hh, 12);
*(__be16 *) & eth[12] = protocol;
@@ -2746,8 +2746,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->check = 0;
iph->check = ip_fast_csum((void *)iph, iph->ihl);
skb->protocol = protocol;
- skb->mac_header = (skb->network_header - ETH_HLEN -
- pkt_dev->pkt_overhead);
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
pktgen_finalize_skb(pkt_dev, skb, datalen);
@@ -2822,13 +2820,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
*vlan_encapsulated_proto = htons(ETH_P_IPV6);
}
- skb->network_header = skb->tail;
- skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
- skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+ skb_set_mac_header(skb, 0);
+ skb_set_network_header(skb, skb->len);
+ iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+
+ skb_set_transport_header(skb, skb->len);
+ udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
skb->priority = pkt_dev->skb_priority;
- iph = ipv6_hdr(skb);
- udph = udp_hdr(skb);
memcpy(eth, pkt_dev->hh, 12);
*(__be16 *) &eth[12] = protocol;
@@ -2863,8 +2862,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
iph->daddr = pkt_dev->cur_in6_daddr;
iph->saddr = pkt_dev->cur_in6_saddr;
- skb->mac_header = (skb->network_header - ETH_HLEN -
- pkt_dev->pkt_overhead);
skb->protocol = protocol;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a08bd2b7fe3..49c14451d8a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2667,7 +2667,7 @@ static void rtnetlink_rcv(struct sk_buff *skb)
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
case NETDEV_UP:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index af9185d0be6..edf37578e21 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -195,13 +195,11 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->data = NULL;
+ skb->head = NULL;
skb->truesize = sizeof(struct sk_buff);
atomic_set(&skb->users, 1);
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->mac_header = ~0U;
-#endif
+ skb->mac_header = (typeof(skb->mac_header))~0U;
out:
return skb;
}
@@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->mac_header = ~0U;
- skb->transport_header = ~0U;
-#endif
+ skb->mac_header = (typeof(skb->mac_header))~0U;
+ skb->transport_header = (typeof(skb->transport_header))~0U;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
@@ -344,10 +340,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->mac_header = ~0U;
- skb->transport_header = ~0U;
-#endif
+ skb->mac_header = (typeof(skb->mac_header))~0U;
+ skb->transport_header = (typeof(skb->transport_header))~0U;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
@@ -611,7 +605,7 @@ static void skb_release_head_state(struct sk_buff *skb)
static void skb_release_all(struct sk_buff *skb)
{
skb_release_head_state(skb);
- if (likely(skb->data))
+ if (likely(skb->head))
skb_release_data(skb);
}
@@ -739,6 +733,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->vlan_tci = old->vlan_tci;
skb_copy_secmark(new, old);
+
+#ifdef CONFIG_NET_LL_RX_POLL
+ new->napi_id = old->napi_id;
+#endif
}
/*
@@ -911,18 +909,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
-#ifndef NET_SKBUFF_DATA_USES_OFFSET
- /*
- * Shift between the two data areas in bytes
- */
- unsigned long offset = new->data - old->data;
-#endif
-
__copy_skb_header(new, old);
-#ifndef NET_SKBUFF_DATA_USES_OFFSET
- skb_headers_offset_update(new, offset);
-#endif
skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -1114,7 +1102,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->end = skb->head + size;
#endif
skb->tail += off;
- skb_headers_offset_update(skb, off);
+ skb_headers_offset_update(skb, nhead);
/* Only adjust this if it actually is csum_start rather than csum */
if (skb->ip_summed == CHECKSUM_PARTIAL)
skb->csum_start += nhead;
@@ -1209,9 +1197,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
off = newheadroom - oldheadroom;
if (n->ip_summed == CHECKSUM_PARTIAL)
n->csum_start += off;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
+
skb_headers_offset_update(n, off);
-#endif
return n;
}
@@ -2853,7 +2840,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
doffset + tnl_hlen);
if (fskb != skb_shinfo(skb)->frag_list)
- continue;
+ goto perform_csum_check;
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
@@ -2917,6 +2904,7 @@ skip_fraglist:
nskb->len += nskb->data_len;
nskb->truesize += nskb->data_len;
+perform_csum_check:
if (!csum) {
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
diff --git a/net/core/sock.c b/net/core/sock.c
index d4f4cea726e..788c0da5eed 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,6 +139,8 @@
#include <net/tcp.h>
#endif
+#include <net/ll_poll.h>
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
@@ -210,7 +212,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_MAX"
+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -226,7 +228,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_MAX"
+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -242,7 +244,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_MAX"
+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
};
/*
@@ -1217,18 +1219,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#endif
}
-/*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
- * un-modified. Special care is taken when initializing object to zero.
- */
-static inline void sk_prot_clear_nulls(struct sock *sk, int size)
-{
- if (offsetof(struct sock, sk_node.next) != 0)
- memset(sk, 0, offsetof(struct sock, sk_node.next));
- memset(&sk->sk_node.pprev, 0,
- size - offsetof(struct sock, sk_node.pprev));
-}
-
void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
{
unsigned long nulls1, nulls2;
@@ -2296,6 +2286,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, 0);
+#ifdef CONFIG_NET_LL_RX_POLL
+ sk->sk_napi_id = 0;
+#endif
+
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cfdb46ab3a7..4b48f39582b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -19,6 +19,7 @@
#include <net/ip.h>
#include <net/sock.h>
#include <net/net_ratelimit.h>
+#include <net/ll_poll.h>
static int one = 1;
@@ -87,6 +88,96 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
}
#endif /* CONFIG_RPS */
+#ifdef CONFIG_NET_FLOW_LIMIT
+static DEFINE_MUTEX(flow_limit_update_mutex);
+
+static int flow_limit_cpu_sysctl(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct sd_flow_limit *cur;
+ struct softnet_data *sd;
+ cpumask_var_t mask;
+ int i, len, ret = 0;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (write) {
+ ret = cpumask_parse_user(buffer, *lenp, mask);
+ if (ret)
+ goto done;
+
+ mutex_lock(&flow_limit_update_mutex);
+ len = sizeof(*cur) + netdev_flow_limit_table_len;
+ for_each_possible_cpu(i) {
+ sd = &per_cpu(softnet_data, i);
+ cur = rcu_dereference_protected(sd->flow_limit,
+ lockdep_is_held(&flow_limit_update_mutex));
+ if (cur && !cpumask_test_cpu(i, mask)) {
+ RCU_INIT_POINTER(sd->flow_limit, NULL);
+ synchronize_rcu();
+ kfree(cur);
+ } else if (!cur && cpumask_test_cpu(i, mask)) {
+ cur = kzalloc(len, GFP_KERNEL);
+ if (!cur) {
+ /* not unwinding previous changes */
+ ret = -ENOMEM;
+ goto write_unlock;
+ }
+ cur->num_buckets = netdev_flow_limit_table_len;
+ rcu_assign_pointer(sd->flow_limit, cur);
+ }
+ }
+write_unlock:
+ mutex_unlock(&flow_limit_update_mutex);
+ } else {
+ if (*ppos || !*lenp) {
+ *lenp = 0;
+ goto done;
+ }
+
+ cpumask_clear(mask);
+ rcu_read_lock();
+ for_each_possible_cpu(i) {
+ sd = &per_cpu(softnet_data, i);
+ if (rcu_dereference(sd->flow_limit))
+ cpumask_set_cpu(i, mask);
+ }
+ rcu_read_unlock();
+
+ len = cpumask_scnprintf(buffer, *lenp, mask);
+ *lenp = len + 1;
+ *ppos += len + 1;
+ }
+
+done:
+ free_cpumask_var(mask);
+ return ret;
+}
+
+static int flow_limit_table_len_sysctl(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned int old, *ptr;
+ int ret;
+
+ mutex_lock(&flow_limit_update_mutex);
+
+ ptr = table->data;
+ old = *ptr;
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (!ret && write && !is_power_of_2(*ptr)) {
+ *ptr = old;
+ ret = -EINVAL;
+ }
+
+ mutex_unlock(&flow_limit_update_mutex);
+ return ret;
+}
+#endif /* CONFIG_NET_FLOW_LIMIT */
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -180,6 +271,29 @@ static struct ctl_table net_core_table[] = {
.proc_handler = rps_sock_flow_sysctl
},
#endif
+#ifdef CONFIG_NET_FLOW_LIMIT
+ {
+ .procname = "flow_limit_cpu_bitmap",
+ .mode = 0644,
+ .proc_handler = flow_limit_cpu_sysctl
+ },
+ {
+ .procname = "flow_limit_table_len",
+ .data = &netdev_flow_limit_table_len,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = flow_limit_table_len_sysctl
+ },
+#endif /* CONFIG_NET_FLOW_LIMIT */
+#ifdef CONFIG_NET_LL_RX_POLL
+ {
+ .procname = "low_latency_poll",
+ .data = &sysctl_net_ll_poll,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax
+ },
+#endif
#endif /* CONFIG_NET */
{
.procname = "netdev_budget",
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c21f200eed9..dd4d506ef92 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2078,9 +2078,9 @@ out_err:
}
static int dn_device_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *)ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 55e1fd5b3e5..3b9d5f20bd1 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1352,10 +1352,9 @@ static inline void lowpan_netlink_fini(void)
}
static int lowpan_device_event(struct notifier_block *unused,
- unsigned long event,
- void *ptr)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
LIST_HEAD(del_list);
struct lowpan_dev_record *entry, *tmp;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8603ca82710..37cf1a6ea3a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -9,10 +9,7 @@ config IP_MULTICAST
intend to participate in the MBONE, a high bandwidth network on top
of the Internet which carries audio and video broadcasts. More
information about the MBONE is on the WWW at
- <http://www.savetz.com/mbone/>. Information about the multicast
- capabilities of the various network cards is contained in
- <file:Documentation/networking/multicast.txt>. For most people, it's
- safe to say N.
+ <http://www.savetz.com/mbone/>. For most people, it's safe to say N.
config IP_ADVANCED_ROUTER
bool "IP: advanced router"
@@ -223,10 +220,8 @@ config IP_MROUTE
packets that have several destination addresses. It is needed on the
MBONE, a high bandwidth network on top of the Internet which carries
audio and video broadcasts. In order to do that, you would most
- likely run the program mrouted. Information about the multicast
- capabilities of the various network cards is contained in
- <file:Documentation/networking/multicast.txt>. If you haven't heard
- about it, you don't need it.
+ likely run the program mrouted. If you haven't heard about it, you
+ don't need it.
config IP_MROUTE_MULTIPLE_TABLES
bool "IP: multicast policy routing"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 089cb9f3638..4d3e138c564 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \
inet_timewait_sock.o inet_connection_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
- datagram.o raw.o udp.o udplite.o \
+ tcp_offload.o datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o \
inet_fragment.o ping.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d01be2a3ae5..7b514290efc 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_GRE |
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_MPLS |
0)))
goto out;
@@ -1384,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
goto out_unlock;
id = ntohl(*(__be32 *)&iph->id);
- flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
+ flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
id >>= 16;
for (p = *head; p; p = p->next) {
@@ -1406,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
NAPI_GRO_CB(p)->flush |=
(iph->ttl ^ iph2->ttl) |
(iph->tos ^ iph2->tos) |
+ ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) |
((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush;
@@ -1557,15 +1559,6 @@ static const struct net_protocol tcp_protocol = {
.netns_ok = 1,
};
-static const struct net_offload tcp_offload = {
- .callbacks = {
- .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .gro_receive = tcp4_gro_receive,
- .gro_complete = tcp4_gro_complete,
- },
-};
-
static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
@@ -1681,8 +1674,8 @@ static int __init ipv4_offload_init(void)
*/
if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0)
pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
- if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0)
- pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__);
+ if (tcpv4_offload_init() < 0)
+ pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
dev_add_offload(&ip_packet_offload);
return 0;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2e7f1948216..717902669d2 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info)
if (!x)
return;
- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
- atomic_inc(&flow_cache_genid);
- rt_genid_bump(net);
-
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
- } else
+ else
ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 247ec1951c3..4429b013f26 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1234,13 +1234,19 @@ out:
static int arp_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&arp_tbl, dev);
rt_cache_flush(dev_net(dev));
break;
+ case NETDEV_CHANGE:
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ neigh_changeaddr(&arp_tbl, dev);
+ break;
default:
break;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dfc39d4d48b..b047e2d8a61 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1333,7 +1333,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
static int inetdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct in_device *in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL();
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4cfe34d4cc9..ab3d814bc80 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info)
if (!x)
return;
- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
- atomic_inc(&flow_cache_genid);
- rt_genid_bump(net);
-
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
- } else
+ else
ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c7629a209f9..05a4888dede 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct in_device *in_dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 76e10b47e05..5f7d11a4587 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
struct iphdr *iph;
int room;
- struct icmp_bxm icmp_param;
+ struct icmp_bxm *icmp_param;
struct rtable *rt = skb_rtable(skb_in);
struct ipcm_cookie ipc;
struct flowi4 fl4;
@@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
iph = ip_hdr(skb_in);
if ((u8 *)iph < skb_in->head ||
- (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
+ (skb_network_header(skb_in) + sizeof(*iph)) >
+ skb_tail_pointer(skb_in))
goto out;
/*
@@ -557,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
}
}
+ icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC);
+ if (!icmp_param)
+ return;
+
sk = icmp_xmit_lock(net);
if (sk == NULL)
- return;
+ goto out_free;
/*
* Construct source address and options.
@@ -585,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
IPTOS_PREC_INTERNETCONTROL) :
iph->tos;
- if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
+ if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
goto out_unlock;
@@ -593,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
* Prepare data for ICMP header.
*/
- icmp_param.data.icmph.type = type;
- icmp_param.data.icmph.code = code;
- icmp_param.data.icmph.un.gateway = info;
- icmp_param.data.icmph.checksum = 0;
- icmp_param.skb = skb_in;
- icmp_param.offset = skb_network_offset(skb_in);
+ icmp_param->data.icmph.type = type;
+ icmp_param->data.icmph.code = code;
+ icmp_param->data.icmph.un.gateway = info;
+ icmp_param->data.icmph.checksum = 0;
+ icmp_param->skb = skb_in;
+ icmp_param->offset = skb_network_offset(skb_in);
inet_sk(sk)->tos = tos;
ipc.addr = iph->saddr;
- ipc.opt = &icmp_param.replyopts.opt;
+ ipc.opt = &icmp_param->replyopts.opt;
ipc.tx_flags = 0;
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
- type, code, &icmp_param);
+ type, code, icmp_param);
if (IS_ERR(rt))
goto out_unlock;
@@ -617,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
room = dst_mtu(&rt->dst);
if (room > 576)
room = 576;
- room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
+ room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen;
room -= sizeof(struct icmphdr);
- icmp_param.data_len = skb_in->len - icmp_param.offset;
- if (icmp_param.data_len > room)
- icmp_param.data_len = room;
- icmp_param.head_len = sizeof(struct icmphdr);
+ icmp_param->data_len = skb_in->len - icmp_param->offset;
+ if (icmp_param->data_len > room)
+ icmp_param->data_len = room;
+ icmp_param->head_len = sizeof(struct icmphdr);
- icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
+ icmp_push_reply(icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
out_unlock:
icmp_xmit_unlock(sk);
+out_free:
+ kfree(icmp_param);
out:;
}
EXPORT_SYMBOL(icmp_send);
@@ -657,7 +664,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
}
/*
- * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH.
+ * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
+ * ICMP_PARAMETERPROB.
*/
static void icmp_unreach(struct sk_buff *skb)
@@ -939,7 +947,8 @@ error:
void icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
- struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+ int offset = iph->ihl<<2;
+ struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
int type = icmp_hdr(skb)->type;
int code = icmp_hdr(skb)->code;
struct net *net = dev_net(skb->dev);
@@ -949,7 +958,7 @@ void icmp_err(struct sk_buff *skb, u32 info)
* triggered by ICMP_ECHOREPLY which sent from kernel.
*/
if (icmph->type != ICMP_ECHOREPLY) {
- ping_err(skb, info);
+ ping_err(skb, offset, info);
return;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d8c232794bc..450f625361e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -363,7 +363,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
static int igmpv3_sendpack(struct sk_buff *skb)
{
struct igmphdr *pig = igmp_hdr(skb);
- const int igmplen = skb->tail - skb->transport_header;
+ const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb);
pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c625e4dad4b..a982657d05e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -235,7 +235,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
*/
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn;
- const struct iphdr *iph = (const struct iphdr *)skb->data;
+ const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
@@ -281,6 +281,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
else
itn = net_generic(net, ipgre_net_id);
+ iph = (const struct iphdr *)skb->data;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
iph->daddr, iph->saddr, tpi.key);
@@ -428,7 +429,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
return;
}
- ip_tunnel_xmit(skb, dev, tnl_params);
+ ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 147abf5275a..4bcabf3ab4c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -84,7 +84,7 @@ int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
/* Generate a checksum for an outgoing IP datagram. */
-__inline__ void ip_send_check(struct iphdr *iph)
+void ip_send_check(struct iphdr *iph)
{
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e4147ec1665..7c79cf8ad44 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -487,7 +487,7 @@ drop:
EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params)
+ const struct iphdr *tnl_params, const u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *inner_iph;
@@ -503,6 +503,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -658,7 +659,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
@@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->frag_off = df;
- iph->protocol = tnl_params->protocol;
+ iph->protocol = protocol;
iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
iph->daddr = fl4.daddr;
iph->saddr = fl4.saddr;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 59cb8c76905..826be4cb482 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
if (!x)
return;
- if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
- atomic_inc(&flow_cache_genid);
- rt_genid_bump(net);
-
+ if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
- } else
+ else
ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
xfrm_state_put(x);
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 77bfcce64fe..9df7ecd393f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -222,7 +222,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->encapsulation = 1;
}
- ip_tunnel_xmit(skb, dev, tiph);
+ ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
return NETDEV_TX_OK;
tx_error:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d9610ae785..132a0966470 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -980,7 +980,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
/* Copy the IP header */
- skb->network_header = skb->tail;
+ skb_set_network_header(skb, skb->len);
skb_put(skb, ihl);
skb_copy_to_linear_data(skb, pkt->data, ihl);
ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
@@ -1609,7 +1609,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct mr_table *mrt;
struct vif_device *v;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e7916c19393..4e902801742 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -111,7 +111,7 @@ config IP_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ULOG
- tristate "ULOG target support"
+ tristate "ULOG target support (obsolete)"
default m if NETFILTER_ADVANCED=n
---help---
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5d5d4d1be9c..30e4de94056 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- const struct net_device *dev = ptr;
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_DOWN) {
@@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this,
void *ptr)
{
struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
- return masq_device_event(this, event, dev);
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return masq_device_event(this, event, &info);
}
static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index f8a222cb644..57c671152c4 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -162,7 +162,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
return skb;
}
-static void ipt_ulog_packet(unsigned int hooknum,
+static void ipt_ulog_packet(struct net *net,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -174,7 +175,6 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len;
struct nlmsghdr *nlh;
struct timeval tv;
- struct net *net = dev_net(in ? in : out);
struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
@@ -231,8 +231,10 @@ static void ipt_ulog_packet(unsigned int hooknum,
put_unaligned(tv.tv_usec, &pm->timestamp_usec);
put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
- if (prefix != NULL)
- strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ if (prefix != NULL) {
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
+ pm->prefix[sizeof(pm->prefix) - 1] = '\0';
+ }
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
else
@@ -291,12 +293,15 @@ alloc_failure:
static unsigned int
ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return XT_CONTINUE;
}
-static void ipt_logfn(u_int8_t pf,
+static void ipt_logfn(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -318,13 +323,19 @@ static void ipt_logfn(u_int8_t pf,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
static int ulog_tg_check(const struct xt_tgchk_param *par)
{
const struct ipt_ulog_info *loginfo = par->targinfo;
+ if (!par->net->xt.ulog_warn_deprecated) {
+ pr_info("ULOG is deprecated and it will be removed soon, "
+ "use NFLOG instead\n");
+ par->net->xt.ulog_warn_deprecated = true;
+ }
+
if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
pr_debug("prefix not null-terminated\n");
return -EINVAL;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 7d93d62cd5f..1f1b2dd9027 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -33,7 +33,6 @@
#include <linux/netdevice.h>
#include <net/snmp.h>
#include <net/ip.h>
-#include <net/ipv6.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
@@ -46,8 +45,18 @@
#include <net/inet_common.h>
#include <net/checksum.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#endif
-static struct ping_table ping_table;
+
+struct ping_table ping_table;
+struct pingv6_ops pingv6_ops;
+EXPORT_SYMBOL_GPL(pingv6_ops);
static u16 ping_port_rover;
@@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma
pr_debug("hash(%d) = %d\n", num, res);
return res;
}
+EXPORT_SYMBOL_GPL(ping_hash);
static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
struct net *net, unsigned int num)
@@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
}
-static int ping_v4_get_port(struct sock *sk, unsigned short ident)
+int ping_get_port(struct sock *sk, unsigned short ident)
{
struct hlist_nulls_node *node;
struct hlist_nulls_head *hlist;
@@ -103,6 +113,10 @@ next_port:
ping_portaddr_for_each_entry(sk2, node, hlist) {
isk2 = inet_sk(sk2);
+ /* BUG? Why is this reuse and not reuseaddr? ping.c
+ * doesn't turn off SO_REUSEADDR, and it doesn't expect
+ * that other ping processes can steal its packets.
+ */
if ((isk2->inet_num == ident) &&
(sk2 != sk) &&
(!sk2->sk_reuse || !sk->sk_reuse))
@@ -125,17 +139,18 @@ fail:
write_unlock_bh(&ping_table.lock);
return 1;
}
+EXPORT_SYMBOL_GPL(ping_get_port);
-static void ping_v4_hash(struct sock *sk)
+void ping_hash(struct sock *sk)
{
- pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
+ pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
}
-static void ping_v4_unhash(struct sock *sk)
+void ping_unhash(struct sock *sk)
{
struct inet_sock *isk = inet_sk(sk);
- pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+ pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
if (sk_hashed(sk)) {
write_lock_bh(&ping_table.lock);
hlist_nulls_del(&sk->sk_nulls_node);
@@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk)
write_unlock_bh(&ping_table.lock);
}
}
+EXPORT_SYMBOL_GPL(ping_unhash);
-static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr,
- u16 ident, int dif)
+static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
{
struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
+ int dif = skb->dev->ifindex;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
+ (int)ident, &ip_hdr(skb)->daddr, dif);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
+ (int)ident, &ipv6_hdr(skb)->daddr, dif);
+#endif
+ }
- pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
- (int)ident, &daddr, dif);
read_lock_bh(&ping_table.lock);
ping_portaddr_for_each_entry(sk, hnode, hslot) {
isk = inet_sk(sk);
- pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk,
- (int)isk->inet_num, &isk->inet_rcv_saddr,
- sk->sk_bound_dev_if);
-
pr_debug("iterate\n");
if (isk->inet_num != ident)
continue;
- if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
- continue;
+
+ if (skb->protocol == htons(ETH_P_IP) &&
+ sk->sk_family == AF_INET) {
+ pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk,
+ (int) isk->inet_num, &isk->inet_rcv_saddr,
+ sk->sk_bound_dev_if);
+
+ if (isk->inet_rcv_saddr &&
+ isk->inet_rcv_saddr != ip_hdr(skb)->daddr)
+ continue;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6) &&
+ sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
+ (int) isk->inet_num,
+ &inet6_sk(sk)->rcv_saddr,
+ sk->sk_bound_dev_if);
+
+ if (!ipv6_addr_any(&np->rcv_saddr) &&
+ !ipv6_addr_equal(&np->rcv_saddr,
+ &ipv6_hdr(skb)->daddr))
+ continue;
+#endif
+ }
+
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
@@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
}
-static int ping_init_sock(struct sock *sk)
+int ping_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
kgid_t group = current_egid();
@@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk)
return -EACCES;
}
+EXPORT_SYMBOL_GPL(ping_init_sock);
-static void ping_close(struct sock *sk, long timeout)
+void ping_close(struct sock *sk, long timeout)
{
pr_debug("ping_close(sk=%p,sk->num=%u)\n",
inet_sk(sk), inet_sk(sk)->inet_num);
@@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
+EXPORT_SYMBOL_GPL(ping_close);
+
+/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
+int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
+ struct sockaddr *uaddr, int addr_len) {
+ struct net *net = sock_net(sk);
+ if (sk->sk_family == AF_INET) {
+ struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ int chk_addr_ret;
+
+ if (addr_len < sizeof(*addr))
+ return -EINVAL;
+
+ pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
+ sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
+
+ chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ chk_addr_ret = RTN_LOCAL;
+
+ if ((sysctl_ip_nonlocal_bind == 0 &&
+ isk->freebind == 0 && isk->transparent == 0 &&
+ chk_addr_ret != RTN_LOCAL) ||
+ chk_addr_ret == RTN_MULTICAST ||
+ chk_addr_ret == RTN_BROADCAST)
+ return -EADDRNOTAVAIL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
+ int addr_type, scoped, has_addr;
+ struct net_device *dev = NULL;
+
+ if (addr_len < sizeof(*addr))
+ return -EINVAL;
+
+ pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
+ sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
+
+ addr_type = ipv6_addr_type(&addr->sin6_addr);
+ scoped = __ipv6_addr_needs_scope_id(addr_type);
+ if ((addr_type != IPV6_ADDR_ANY &&
+ !(addr_type & IPV6_ADDR_UNICAST)) ||
+ (scoped && !addr->sin6_scope_id))
+ return -EINVAL;
+
+ rcu_read_lock();
+ if (addr->sin6_scope_id) {
+ dev = dev_get_by_index_rcu(net, addr->sin6_scope_id);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
+ has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
+ scoped);
+ rcu_read_unlock();
+
+ if (!(isk->freebind || isk->transparent || has_addr ||
+ addr_type == IPV6_ADDR_ANY))
+ return -EADDRNOTAVAIL;
+
+ if (scoped)
+ sk->sk_bound_dev_if = addr->sin6_scope_id;
+#endif
+ } else {
+ return -EAFNOSUPPORT;
+ }
+ return 0;
+}
+
+void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
+{
+ if (saddr->sa_family == AF_INET) {
+ struct inet_sock *isk = inet_sk(sk);
+ struct sockaddr_in *addr = (struct sockaddr_in *) saddr;
+ isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (saddr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ np->rcv_saddr = np->saddr = addr->sin6_addr;
+#endif
+ }
+}
+void ping_clear_saddr(struct sock *sk, int dif)
+{
+ sk->sk_bound_dev_if = dif;
+ if (sk->sk_family == AF_INET) {
+ struct inet_sock *isk = inet_sk(sk);
+ isk->inet_rcv_saddr = isk->inet_saddr = 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+ memset(&np->saddr, 0, sizeof(np->saddr));
+#endif
+ }
+}
/*
* We need our own bind because there are no privileged id's == local ports.
* Moreover, we don't allow binding to multi- and broadcast addresses.
*/
-static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct inet_sock *isk = inet_sk(sk);
unsigned short snum;
- int chk_addr_ret;
int err;
+ int dif = sk->sk_bound_dev_if;
- if (addr_len < sizeof(struct sockaddr_in))
- return -EINVAL;
-
- pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
- sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
-
- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
- if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
- chk_addr_ret = RTN_LOCAL;
-
- if ((sysctl_ip_nonlocal_bind == 0 &&
- isk->freebind == 0 && isk->transparent == 0 &&
- chk_addr_ret != RTN_LOCAL) ||
- chk_addr_ret == RTN_MULTICAST ||
- chk_addr_ret == RTN_BROADCAST)
- return -EADDRNOTAVAIL;
+ err = ping_check_bind_addr(sk, isk, uaddr, addr_len);
+ if (err)
+ return err;
lock_sock(sk);
@@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
goto out;
err = -EADDRINUSE;
- isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
- snum = ntohs(addr->sin_port);
- if (ping_v4_get_port(sk, snum) != 0) {
- isk->inet_saddr = isk->inet_rcv_saddr = 0;
+ ping_set_saddr(sk, uaddr);
+ snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port);
+ if (ping_get_port(sk, snum) != 0) {
+ ping_clear_saddr(sk, dif);
goto out;
}
- pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n",
+ pr_debug("after bind(): num = %d, dif = %d\n",
(int)isk->inet_num,
- &isk->inet_rcv_saddr,
(int)sk->sk_bound_dev_if);
err = 0;
- if (isk->inet_rcv_saddr)
+ if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
+ (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
isk->inet_sport = htons(isk->inet_num);
isk->inet_daddr = 0;
isk->inet_dport = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
+#endif
+
sk_dst_reset(sk);
out:
release_sock(sk);
pr_debug("ping_v4_bind -> %d\n", err);
return err;
}
+EXPORT_SYMBOL_GPL(ping_bind);
/*
* Is this a supported type of ICMP message?
*/
-static inline int ping_supported(int type, int code)
+static inline int ping_supported(int family, int type, int code)
{
- if (type == ICMP_ECHO && code == 0)
- return 1;
- return 0;
+ return (family == AF_INET && type == ICMP_ECHO && code == 0) ||
+ (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0);
}
/*
@@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code)
* sort of error condition.
*/
-static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-
-void ping_err(struct sk_buff *skb, u32 info)
+void ping_err(struct sk_buff *skb, int offset, u32 info)
{
- struct iphdr *iph = (struct iphdr *)skb->data;
- struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+ int family;
+ struct icmphdr *icmph;
struct inet_sock *inet_sock;
- int type = icmp_hdr(skb)->type;
- int code = icmp_hdr(skb)->code;
+ int type;
+ int code;
struct net *net = dev_net(skb->dev);
struct sock *sk;
int harderr;
int err;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ family = AF_INET;
+ type = icmp_hdr(skb)->type;
+ code = icmp_hdr(skb)->code;
+ icmph = (struct icmphdr *)(skb->data + offset);
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ family = AF_INET6;
+ type = icmp6_hdr(skb)->icmp6_type;
+ code = icmp6_hdr(skb)->icmp6_code;
+ icmph = (struct icmphdr *) (skb->data + offset);
+ } else {
+ BUG();
+ }
+
/* We assume the packet has already been checked by icmp_unreach */
- if (!ping_supported(icmph->type, icmph->code))
+ if (!ping_supported(family, icmph->type, icmph->code))
return;
- pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
- code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
+ pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n",
+ skb->protocol, type, code, ntohs(icmph->un.echo.id),
+ ntohs(icmph->un.echo.sequence));
- sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
- ntohs(icmph->un.echo.id), skb->dev->ifindex);
+ sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
if (sk == NULL) {
pr_debug("no socket, dropping\n");
return; /* No socket for error */
@@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info)
harderr = 0;
inet_sock = inet_sk(sk);
- switch (type) {
- default:
- case ICMP_TIME_EXCEEDED:
- err = EHOSTUNREACH;
- break;
- case ICMP_SOURCE_QUENCH:
- /* This is not a real error but ping wants to see it.
- * Report it with some fake errno. */
- err = EREMOTEIO;
- break;
- case ICMP_PARAMETERPROB:
- err = EPROTO;
- harderr = 1;
- break;
- case ICMP_DEST_UNREACH:
- if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
- ipv4_sk_update_pmtu(skb, sk, info);
- if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
- err = EMSGSIZE;
- harderr = 1;
- break;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ switch (type) {
+ default:
+ case ICMP_TIME_EXCEEDED:
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_SOURCE_QUENCH:
+ /* This is not a real error but ping wants to see it.
+ * Report it with some fake errno.
+ */
+ err = EREMOTEIO;
+ break;
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+ harderr = 1;
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
+ ipv4_sk_update_pmtu(skb, sk, info);
+ if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
+ err = EMSGSIZE;
+ harderr = 1;
+ break;
+ }
+ goto out;
}
- goto out;
- }
- err = EHOSTUNREACH;
- if (code <= NR_ICMP_UNREACH) {
- harderr = icmp_err_convert[code].fatal;
- err = icmp_err_convert[code].errno;
+ err = EHOSTUNREACH;
+ if (code <= NR_ICMP_UNREACH) {
+ harderr = icmp_err_convert[code].fatal;
+ err = icmp_err_convert[code].errno;
+ }
+ break;
+ case ICMP_REDIRECT:
+ /* See ICMP_SOURCE_QUENCH */
+ ipv4_sk_redirect(skb, sk);
+ err = EREMOTEIO;
+ break;
}
- break;
- case ICMP_REDIRECT:
- /* See ICMP_SOURCE_QUENCH */
- ipv4_sk_redirect(skb, sk);
- err = EREMOTEIO;
- break;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ harderr = pingv6_ops.icmpv6_err_convert(type, code, &err);
+#endif
}
/*
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
- if (!inet_sock->recverr) {
+ if ((family == AF_INET && !inet_sock->recverr) ||
+ (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
- ip_icmp_error(sk, skb, err, 0 /* no remote port */,
- info, (u8 *)icmph);
+ if (family == AF_INET) {
+ ip_icmp_error(sk, skb, err, 0 /* no remote port */,
+ info, (u8 *)icmph);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ pingv6_ops.ipv6_icmp_error(sk, skb, err, 0,
+ info, (u8 *)icmph);
+#endif
+ }
}
sk->sk_err = err;
sk->sk_error_report(sk);
out:
sock_put(sk);
}
+EXPORT_SYMBOL_GPL(ping_err);
/*
- * Copy and checksum an ICMP Echo packet from user space into a buffer.
+ * Copy and checksum an ICMP Echo packet from user space into a buffer
+ * starting from the payload.
*/
-struct pingfakehdr {
- struct icmphdr icmph;
- struct iovec *iov;
- __wsum wcheck;
-};
-
-static int ping_getfrag(void *from, char *to,
- int offset, int fraglen, int odd, struct sk_buff *skb)
+int ping_getfrag(void *from, char *to,
+ int offset, int fraglen, int odd, struct sk_buff *skb)
{
struct pingfakehdr *pfh = (struct pingfakehdr *)from;
@@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to,
pfh->iov, 0, fraglen - sizeof(struct icmphdr),
&pfh->wcheck))
return -EFAULT;
+ } else if (offset < sizeof(struct icmphdr)) {
+ BUG();
+ } else {
+ if (csum_partial_copy_fromiovecend
+ (to, pfh->iov, offset - sizeof(struct icmphdr),
+ fraglen, &pfh->wcheck))
+ return -EFAULT;
+ }
- return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ /* For IPv6, checksum each skb as we go along, as expected by
+ * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in
+ * wcheck, it will be finalized in ping_v4_push_pending_frames.
+ */
+ if (pfh->family == AF_INET6) {
+ skb->csum = pfh->wcheck;
+ skb->ip_summed = CHECKSUM_NONE;
+ pfh->wcheck = 0;
}
- if (offset < sizeof(struct icmphdr))
- BUG();
- if (csum_partial_copy_fromiovecend
- (to, pfh->iov, offset - sizeof(struct icmphdr),
- fraglen, &pfh->wcheck))
- return -EFAULT;
+#endif
+
return 0;
}
+EXPORT_SYMBOL_GPL(ping_getfrag);
-static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
- struct flowi4 *fl4)
+static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
+ struct flowi4 *fl4)
{
struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
@@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
return ip_push_pending_frames(sk, fl4);
}
-static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len)
-{
- struct net *net = sock_net(sk);
- struct flowi4 fl4;
- struct inet_sock *inet = inet_sk(sk);
- struct ipcm_cookie ipc;
- struct icmphdr user_icmph;
- struct pingfakehdr pfh;
- struct rtable *rt = NULL;
- struct ip_options_data opt_copy;
- int free = 0;
- __be32 saddr, daddr, faddr;
- u8 tos;
- int err;
-
- pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
-
+int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
+ void *user_icmph, size_t icmph_len) {
+ u8 type, code;
if (len > 0xFFFF)
return -EMSGSIZE;
@@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/*
* Fetch the ICMP header provided by the userland.
- * iovec is modified!
+ * iovec is modified! The ICMP header is consumed.
*/
-
- if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
- sizeof(struct icmphdr)))
+ if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len))
return -EFAULT;
- if (!ping_supported(user_icmph.type, user_icmph.code))
+
+ if (family == AF_INET) {
+ type = ((struct icmphdr *) user_icmph)->type;
+ code = ((struct icmphdr *) user_icmph)->code;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ type = ((struct icmp6hdr *) user_icmph)->icmp6_type;
+ code = ((struct icmp6hdr *) user_icmph)->icmp6_code;
+#endif
+ } else {
+ BUG();
+ }
+
+ if (!ping_supported(family, type, code))
return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ping_common_sendmsg);
+
+int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct net *net = sock_net(sk);
+ struct flowi4 fl4;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipcm_cookie ipc;
+ struct icmphdr user_icmph;
+ struct pingfakehdr pfh;
+ struct rtable *rt = NULL;
+ struct ip_options_data opt_copy;
+ int free = 0;
+ __be32 saddr, daddr, faddr;
+ u8 tos;
+ int err;
+
+ pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
/*
* Get and verify the address.
*/
@@ -595,13 +794,14 @@ back_from_confirm:
pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
pfh.iov = msg->msg_iov;
pfh.wcheck = 0;
+ pfh.family = AF_INET;
err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
0, &ipc, &rt, msg->msg_flags);
if (err)
ip_flush_pending_frames(sk);
else
- err = ping_push_pending_frames(sk, &pfh, &fl4);
+ err = ping_v4_push_pending_frames(sk, &pfh, &fl4);
release_sock(sk);
out:
@@ -622,11 +822,13 @@ do_confirm:
goto out;
}
-static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
- struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+ int family = sk->sk_family;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
struct sk_buff *skb;
int copied, err;
@@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- if (addr_len)
- *addr_len = sizeof(*sin);
+ if (addr_len) {
+ if (family == AF_INET)
+ *addr_len = sizeof(*sin);
+ else if (family == AF_INET6 && addr_len)
+ *addr_len = sizeof(*sin6);
+ }
- if (flags & MSG_ERRQUEUE)
- return ip_recv_error(sk, msg, len);
+ if (flags & MSG_ERRQUEUE) {
+ if (family == AF_INET) {
+ return ip_recv_error(sk, msg, len);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ return pingv6_ops.ipv6_recv_error(sk, msg, len);
+#endif
+ }
+ }
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sock_recv_timestamp(msg, sk, skb);
- /* Copy the address. */
- if (sin) {
+ /* Copy the address and add cmsg data. */
+ if (family == AF_INET) {
+ sin = (struct sockaddr_in *) msg->msg_name;
sin->sin_family = AF_INET;
sin->sin_port = 0 /* skb->h.uh->source */;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+
+ if (isk->cmsg_flags)
+ ip_cmsg_recv(msg, skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *ip6 = ipv6_hdr(skb);
+ sin6 = (struct sockaddr_in6 *) msg->msg_name;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = 0;
+ sin6->sin6_addr = ip6->saddr;
+
+ sin6->sin6_flowinfo = 0;
+ if (np->sndflow)
+ sin6->sin6_flowinfo = ip6_flowinfo(ip6);
+
+ sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
+ IP6CB(skb)->iif);
+
+ if (inet6_sk(sk)->rxopt.all)
+ pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb);
+#endif
+ } else {
+ BUG();
}
- if (isk->cmsg_flags)
- ip_cmsg_recv(msg, skb);
+
err = copied;
done:
@@ -676,8 +914,9 @@ out:
pr_debug("ping_recvmsg -> %d\n", err);
return err;
}
+EXPORT_SYMBOL_GPL(ping_recvmsg);
-static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
@@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
return 0;
}
+EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
/*
@@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb)
{
struct sock *sk;
struct net *net = dev_net(skb->dev);
- struct iphdr *iph = ip_hdr(skb);
struct icmphdr *icmph = icmp_hdr(skb);
- __be32 saddr = iph->saddr;
- __be32 daddr = iph->daddr;
/* We assume the packet has already been checked by icmp_rcv */
@@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb)
/* Push ICMP header back */
skb_push(skb, skb->data - (u8 *)icmph);
- sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
- skb->dev->ifindex);
+ sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
if (sk != NULL) {
pr_debug("rcv on socket %p\n", sk);
ping_queue_rcv_skb(sk, skb_get(skb));
@@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb)
/* We're called from icmp_rcv(). kfree_skb() is done there. */
}
+EXPORT_SYMBOL_GPL(ping_rcv);
struct proto ping_prot = {
.name = "PING",
@@ -733,14 +970,14 @@ struct proto ping_prot = {
.disconnect = udp_disconnect,
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
- .sendmsg = ping_sendmsg,
+ .sendmsg = ping_v4_sendmsg,
.recvmsg = ping_recvmsg,
.bind = ping_bind,
.backlog_rcv = ping_queue_rcv_skb,
.release_cb = ip4_datagram_release_cb,
- .hash = ping_v4_hash,
- .unhash = ping_v4_unhash,
- .get_port = ping_v4_get_port,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
.obj_size = sizeof(struct inet_sock),
};
EXPORT_SYMBOL(ping_prot);
@@ -764,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start)
continue;
sk_nulls_for_each(sk, node, hslot) {
- if (net_eq(sock_net(sk), net))
+ if (net_eq(sock_net(sk), net) &&
+ sk->sk_family == state->family)
goto found;
}
}
@@ -797,17 +1035,24 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
return pos ? NULL : sk;
}
-static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
+void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
{
struct ping_iter_state *state = seq->private;
state->bucket = 0;
+ state->family = family;
read_lock_bh(&ping_table.lock);
return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
}
+EXPORT_SYMBOL_GPL(ping_seq_start);
+
+static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return ping_seq_start(seq, pos, AF_INET);
+}
-static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock *sk;
@@ -819,13 +1064,15 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
return sk;
}
+EXPORT_SYMBOL_GPL(ping_seq_next);
-static void ping_seq_stop(struct seq_file *seq, void *v)
+void ping_seq_stop(struct seq_file *seq, void *v)
{
read_unlock_bh(&ping_table.lock);
}
+EXPORT_SYMBOL_GPL(ping_seq_stop);
-static void ping_format_sock(struct sock *sp, struct seq_file *f,
+static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
int bucket, int *len)
{
struct inet_sock *inet = inet_sk(sp);
@@ -846,7 +1093,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f,
atomic_read(&sp->sk_drops), len);
}
-static int ping_seq_show(struct seq_file *seq, void *v)
+static int ping_v4_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-127s\n",
@@ -857,72 +1104,86 @@ static int ping_seq_show(struct seq_file *seq, void *v)
struct ping_iter_state *state = seq->private;
int len;
- ping_format_sock(v, seq, state->bucket, &len);
+ ping_v4_format_sock(v, seq, state->bucket, &len);
seq_printf(seq, "%*s\n", 127 - len, "");
}
return 0;
}
-static const struct seq_operations ping_seq_ops = {
- .show = ping_seq_show,
- .start = ping_seq_start,
+static const struct seq_operations ping_v4_seq_ops = {
+ .show = ping_v4_seq_show,
+ .start = ping_v4_seq_start,
.next = ping_seq_next,
.stop = ping_seq_stop,
};
static int ping_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_net(inode, file, &ping_seq_ops,
+ struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
+ return seq_open_net(inode, file, &afinfo->seq_ops,
sizeof(struct ping_iter_state));
}
-static const struct file_operations ping_seq_fops = {
+const struct file_operations ping_seq_fops = {
.open = ping_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
+EXPORT_SYMBOL_GPL(ping_seq_fops);
+
+static struct ping_seq_afinfo ping_v4_seq_afinfo = {
+ .name = "icmp",
+ .family = AF_INET,
+ .seq_fops = &ping_seq_fops,
+ .seq_ops = {
+ .start = ping_v4_seq_start,
+ .show = ping_v4_seq_show,
+ .next = ping_seq_next,
+ .stop = ping_seq_stop,
+ },
+};
-static int ping_proc_register(struct net *net)
+int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
{
struct proc_dir_entry *p;
- int rc = 0;
-
- p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops);
+ p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ afinfo->seq_fops, afinfo);
if (!p)
- rc = -ENOMEM;
- return rc;
+ return -ENOMEM;
+ return 0;
}
+EXPORT_SYMBOL_GPL(ping_proc_register);
-static void ping_proc_unregister(struct net *net)
+void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo)
{
- remove_proc_entry("icmp", net->proc_net);
+ remove_proc_entry(afinfo->name, net->proc_net);
}
+EXPORT_SYMBOL_GPL(ping_proc_unregister);
-
-static int __net_init ping_proc_init_net(struct net *net)
+static int __net_init ping_v4_proc_init_net(struct net *net)
{
- return ping_proc_register(net);
+ return ping_proc_register(net, &ping_v4_seq_afinfo);
}
-static void __net_exit ping_proc_exit_net(struct net *net)
+static void __net_exit ping_v4_proc_exit_net(struct net *net)
{
- ping_proc_unregister(net);
+ ping_proc_unregister(net, &ping_v4_seq_afinfo);
}
-static struct pernet_operations ping_net_ops = {
- .init = ping_proc_init_net,
- .exit = ping_proc_exit_net,
+static struct pernet_operations ping_v4_net_ops = {
+ .init = ping_v4_proc_init_net,
+ .exit = ping_v4_proc_exit_net,
};
int __init ping_proc_init(void)
{
- return register_pernet_subsys(&ping_net_ops);
+ return register_pernet_subsys(&ping_v4_net_ops);
}
void ping_proc_exit(void)
{
- unregister_pernet_subsys(&ping_net_ops);
+ unregister_pernet_subsys(&ping_v4_net_ops);
}
#endif
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 2a5bf86d241..6577a1149a4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
+ SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 550781a17b3..198ea596f2d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr)
return hval & (FNHE_HASH_SIZE - 1);
}
+static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
+{
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ rt->dst.expires = fnhe->fnhe_expires;
+
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ rt->rt_uses_gateway = 1;
+ }
+}
+
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
u32 pmtu, unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
+ struct rtable *rt;
+ unsigned int i;
int depth;
u32 hval = fnhe_hashfun(daddr);
@@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_gw = gw;
if (pmtu) {
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = expires;
+ fnhe->fnhe_expires = max(1UL, expires);
}
+ /* Update all cached dsts too */
+ rt = rcu_dereference(fnhe->fnhe_rth);
+ if (rt)
+ fill_route_from_fnhe(rt, fnhe);
} else {
if (depth > FNHE_RECLAIM_DEPTH)
fnhe = fnhe_oldest(hash);
@@ -640,10 +658,23 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
+ fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
fnhe->fnhe_expires = expires;
+
+ /* Exception created; mark the cached routes for the nexthop
+ * stale, so anyone caching it rechecks if this exception
+ * applies to them.
+ */
+ for_each_possible_cpu(i) {
+ struct rtable __rcu **prt;
+ prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+ rt = rcu_dereference(*prt);
+ if (rt)
+ rt->dst.obsolete = DST_OBSOLETE_KILL;
+ }
}
fnhe->fnhe_stamp = jiffies;
@@ -737,10 +768,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
{
struct rtable *rt;
struct flowi4 fl4;
+ const struct iphdr *iph = (const struct iphdr *) skb->data;
+ int oif = skb->dev->ifindex;
+ u8 tos = RT_TOS(iph->tos);
+ u8 prot = iph->protocol;
+ u32 mark = skb->mark;
rt = (struct rtable *) dst;
- ip_rt_build_flow_key(&fl4, sk, skb);
+ __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -917,12 +953,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
- if (!rt->rt_pmtu) {
- dst->obsolete = DST_OBSOLETE_KILL;
- } else {
- rt->rt_pmtu = mtu;
- dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
- }
+ if (rt->rt_pmtu == mtu &&
+ time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
+ return;
rcu_read_lock();
if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
@@ -1063,11 +1096,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*
- * When a PMTU/redirect information update invalidates a
- * route, this is indicated by setting obsolete to
- * DST_OBSOLETE_KILL.
+ * When a PMTU/redirect information update invalidates a route,
+ * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
+ * DST_OBSOLETE_DEAD by dst_free().
*/
- if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
+ if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
return NULL;
return dst;
}
@@ -1209,26 +1242,17 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
spin_lock_bh(&fnhe_lock);
if (daddr == fnhe->fnhe_daddr) {
+ int genid = fnhe_genid(dev_net(rt->dst.dev));
struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
- if (orig && rt_is_expired(orig)) {
+
+ if (fnhe->fnhe_genid != genid) {
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_gw = 0;
fnhe->fnhe_pmtu = 0;
fnhe->fnhe_expires = 0;
}
- if (fnhe->fnhe_pmtu) {
- unsigned long expires = fnhe->fnhe_expires;
- unsigned long diff = expires - jiffies;
-
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = fnhe->fnhe_pmtu;
- dst_set_expires(&rt->dst, diff);
- }
- }
- if (fnhe->fnhe_gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = fnhe->fnhe_gw;
- rt->rt_uses_gateway = 1;
- } else if (!rt->rt_gateway)
+ fill_route_from_fnhe(rt, fnhe);
+ if (!rt->rt_gateway)
rt->rt_gateway = daddr;
rcu_assign_pointer(fnhe->fnhe_rth, rt);
@@ -2428,8 +2452,11 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = (struct net *)__ctl->extra1;
+
if (write) {
- rt_cache_flush((struct net *)__ctl->extra1);
+ rt_cache_flush(net);
+ fnhe_genid_bump(net);
return 0;
}
@@ -2604,6 +2631,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
static __net_init int rt_genid_init(struct net *net)
{
atomic_set(&net->rt_genid, 0);
+ atomic_set(&net->fnhe_genid, 0);
get_random_bytes(&net->ipv4.dev_addr_genid,
sizeof(net->ipv4.dev_addr_genid));
return 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dcb116dde21..46ed9afd1f5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -279,6 +279,7 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <net/ll_poll.h>
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
@@ -436,6 +437,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ sock_rps_record_flow(sk);
+
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
@@ -1551,6 +1554,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct sk_buff *skb;
u32 urg_hole = 0;
+ if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue)
+ && (sk->sk_state == TCP_ESTABLISHED))
+ sk_poll_ll(sk, nonblock);
+
lock_sock(sk);
err = -ENOTCONN;
@@ -2875,229 +2882,9 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct tcphdr *th;
- unsigned int thlen;
- unsigned int seq;
- __be32 delta;
- unsigned int oldlen;
- unsigned int mss;
- struct sk_buff *gso_skb = skb;
- __sum16 newcheck;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- goto out;
-
- th = tcp_hdr(skb);
- thlen = th->doff * 4;
- if (thlen < sizeof(*th))
- goto out;
-
- if (!pskb_may_pull(skb, thlen))
- goto out;
-
- oldlen = (u16)~skb->len;
- __skb_pull(skb, thlen);
-
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
- if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
- /* Packet is from an untrusted source, reset gso_segs. */
- int type = skb_shinfo(skb)->gso_type;
-
- if (unlikely(type &
- ~(SKB_GSO_TCPV4 |
- SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_TCPV6 |
- SKB_GSO_GRE |
- SKB_GSO_UDP_TUNNEL |
- 0) ||
- !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
- goto out;
-
- skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
- segs = NULL;
- goto out;
- }
-
- segs = skb_segment(skb, features);
- if (IS_ERR(segs))
- goto out;
-
- delta = htonl(oldlen + (thlen + mss));
-
- skb = segs;
- th = tcp_hdr(skb);
- seq = ntohl(th->seq);
-
- newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
-
- do {
- th->fin = th->psh = 0;
- th->check = newcheck;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check =
- csum_fold(csum_partial(skb_transport_header(skb),
- thlen, skb->csum));
-
- seq += mss;
- skb = skb->next;
- th = tcp_hdr(skb);
-
- th->seq = htonl(seq);
- th->cwr = 0;
- } while (skb->next);
-
- /* Following permits TCP Small Queues to work well with GSO :
- * The callback to TCP stack will be called at the time last frag
- * is freed at TX completion, and not right now when gso_skb
- * is freed by GSO engine
- */
- if (gso_skb->destructor == tcp_wfree) {
- swap(gso_skb->sk, skb->sk);
- swap(gso_skb->destructor, skb->destructor);
- swap(gso_skb->truesize, skb->truesize);
- }
-
- delta = htonl(oldlen + (skb->tail - skb->transport_header) +
- skb->data_len);
- th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb_transport_header(skb),
- thlen, skb->csum));
-
-out:
- return segs;
-}
-EXPORT_SYMBOL(tcp_tso_segment);
-
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
-{
- struct sk_buff **pp = NULL;
- struct sk_buff *p;
- struct tcphdr *th;
- struct tcphdr *th2;
- unsigned int len;
- unsigned int thlen;
- __be32 flags;
- unsigned int mss = 1;
- unsigned int hlen;
- unsigned int off;
- int flush = 1;
- int i;
-
- off = skb_gro_offset(skb);
- hlen = off + sizeof(*th);
- th = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- th = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!th))
- goto out;
- }
-
- thlen = th->doff * 4;
- if (thlen < sizeof(*th))
- goto out;
-
- hlen = off + thlen;
- if (skb_gro_header_hard(skb, hlen)) {
- th = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!th))
- goto out;
- }
-
- skb_gro_pull(skb, thlen);
-
- len = skb_gro_len(skb);
- flags = tcp_flag_word(th);
-
- for (; (p = *head); head = &p->next) {
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- th2 = tcp_hdr(p);
-
- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
-
- goto found;
- }
-
- goto out_check_final;
-
-found:
- flush = NAPI_GRO_CB(p)->flush;
- flush |= (__force int)(flags & TCP_FLAG_CWR);
- flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
- ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
- flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
- for (i = sizeof(*th); i < thlen; i += 4)
- flush |= *(u32 *)((u8 *)th + i) ^
- *(u32 *)((u8 *)th2 + i);
-
- mss = skb_shinfo(p)->gso_size;
-
- flush |= (len - 1) >= mss;
- flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
-
- if (flush || skb_gro_receive(head, skb)) {
- mss = 1;
- goto out_check_final;
- }
-
- p = *head;
- th2 = tcp_hdr(p);
- tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
-
-out_check_final:
- flush = len < mss;
- flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
- TCP_FLAG_RST | TCP_FLAG_SYN |
- TCP_FLAG_FIN));
-
- if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
- pp = head;
-
-out:
- NAPI_GRO_CB(skb)->flush |= flush;
-
- return pp;
-}
-EXPORT_SYMBOL(tcp_gro_receive);
-
-int tcp_gro_complete(struct sk_buff *skb)
-{
- struct tcphdr *th = tcp_hdr(skb);
-
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
-
- if (th->cwr)
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
-
- return 0;
-}
-EXPORT_SYMBOL(tcp_gro_complete);
-
#ifdef CONFIG_TCP_MD5SIG
-static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
-static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_MUTEX(tcp_md5sig_mutex);
static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
{
@@ -3112,30 +2899,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
free_percpu(pool);
}
-void tcp_free_md5sig_pool(void)
-{
- struct tcp_md5sig_pool __percpu *pool = NULL;
-
- spin_lock_bh(&tcp_md5sig_pool_lock);
- if (--tcp_md5sig_users == 0) {
- pool = tcp_md5sig_pool;
- tcp_md5sig_pool = NULL;
- }
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- if (pool)
- __tcp_free_md5sig_pool(pool);
-}
-EXPORT_SYMBOL(tcp_free_md5sig_pool);
-
-static struct tcp_md5sig_pool __percpu *
-__tcp_alloc_md5sig_pool(struct sock *sk)
+static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
struct tcp_md5sig_pool __percpu *pool;
pool = alloc_percpu(struct tcp_md5sig_pool);
if (!pool)
- return NULL;
+ return;
for_each_possible_cpu(cpu) {
struct crypto_hash *hash;
@@ -3146,53 +2917,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
}
- return pool;
+ /* before setting tcp_md5sig_pool, we must commit all writes
+ * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ */
+ smp_wmb();
+ tcp_md5sig_pool = pool;
+ return;
out_free:
__tcp_free_md5sig_pool(pool);
- return NULL;
}
-struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+bool tcp_alloc_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *pool;
- bool alloc = false;
-
-retry:
- spin_lock_bh(&tcp_md5sig_pool_lock);
- pool = tcp_md5sig_pool;
- if (tcp_md5sig_users++ == 0) {
- alloc = true;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- } else if (!pool) {
- tcp_md5sig_users--;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- cpu_relax();
- goto retry;
- } else
- spin_unlock_bh(&tcp_md5sig_pool_lock);
-
- if (alloc) {
- /* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool __percpu *p;
-
- p = __tcp_alloc_md5sig_pool(sk);
- spin_lock_bh(&tcp_md5sig_pool_lock);
- if (!p) {
- tcp_md5sig_users--;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- return NULL;
- }
- pool = tcp_md5sig_pool;
- if (pool) {
- /* oops, it has already been assigned. */
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- __tcp_free_md5sig_pool(p);
- } else {
- tcp_md5sig_pool = pool = p;
- spin_unlock_bh(&tcp_md5sig_pool_lock);
- }
+ if (unlikely(!tcp_md5sig_pool)) {
+ mutex_lock(&tcp_md5sig_mutex);
+
+ if (!tcp_md5sig_pool)
+ __tcp_alloc_md5sig_pool();
+
+ mutex_unlock(&tcp_md5sig_mutex);
}
- return pool;
+ return tcp_md5sig_pool != NULL;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -3209,28 +2954,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
struct tcp_md5sig_pool __percpu *p;
local_bh_disable();
-
- spin_lock(&tcp_md5sig_pool_lock);
- p = tcp_md5sig_pool;
- if (p)
- tcp_md5sig_users++;
- spin_unlock(&tcp_md5sig_pool_lock);
-
+ p = ACCESS_ONCE(tcp_md5sig_pool);
if (p)
- return this_cpu_ptr(p);
+ return __this_cpu_ptr(p);
local_bh_enable();
return NULL;
}
EXPORT_SYMBOL(tcp_get_md5sig_pool);
-void tcp_put_md5sig_pool(void)
-{
- local_bh_enable();
- tcp_free_md5sig_pool();
-}
-EXPORT_SYMBOL(tcp_put_md5sig_pool);
-
int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
const struct tcphdr *th)
{
@@ -3269,8 +3001,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
for (i = 0; i < shi->nr_frags; ++i) {
const struct skb_frag_struct *f = &shi->frags[i];
- struct page *page = skb_frag_page(f);
- sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
+ unsigned int offset = f->page_offset;
+ struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+
+ sg_set_page(&sg, page, skb_frag_size(f),
+ offset_in_page(offset));
if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
return 1;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 08bbe609652..907311c9a01 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -360,9 +360,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
if (mss > 1460)
icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
- rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
- while (tcp_win_from_space(rcvmem) < mss)
- rcvmem += 128;
+ rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER);
rcvmem *= icwnd;
@@ -1257,8 +1255,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
if (skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = prev;
- if (skb == tp->scoreboard_skb_hint)
- tp->scoreboard_skb_hint = prev;
if (skb == tp->lost_skb_hint) {
tp->lost_skb_hint = prev;
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
@@ -1966,20 +1962,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
return true;
}
-static inline int tcp_skb_timedout(const struct sock *sk,
- const struct sk_buff *skb)
-{
- return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
-}
-
-static inline int tcp_head_timedout(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
-
- return tp->packets_out &&
- tcp_skb_timedout(sk, tcp_write_queue_head(sk));
-}
-
/* Linux NewReno/SACK/FACK/ECN state machine.
* --------------------------------------
*
@@ -2086,12 +2068,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
if (tcp_dupack_heuristics(tp) > tp->reordering)
return true;
- /* Trick#3 : when we use RFC2988 timer restart, fast
- * retransmit can be triggered by timeout of queue head.
- */
- if (tcp_is_fack(tp) && tcp_head_timedout(sk))
- return true;
-
/* Trick#4: It is still not OK... But will it be useful to delay
* recovery more?
*/
@@ -2128,44 +2104,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
return false;
}
-/* New heuristics: it is possible only after we switched to restart timer
- * each time when something is ACKed. Hence, we can detect timed out packets
- * during fast retransmit without falling to slow start.
- *
- * Usefulness of this as is very questionable, since we should know which of
- * the segments is the next to timeout which is relatively expensive to find
- * in general case unless we add some data structure just for that. The
- * current approach certainly won't find the right one too often and when it
- * finally does find _something_ it usually marks large part of the window
- * right away (because a retransmission with a larger timestamp blocks the
- * loop from advancing). -ij
- */
-static void tcp_timeout_skbs(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
-
- if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
- return;
-
- skb = tp->scoreboard_skb_hint;
- if (tp->scoreboard_skb_hint == NULL)
- skb = tcp_write_queue_head(sk);
-
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
- if (!tcp_skb_timedout(sk, skb))
- break;
-
- tcp_skb_mark_lost(tp, skb);
- }
-
- tp->scoreboard_skb_hint = skb;
-
- tcp_verify_left_out(tp);
-}
-
/* Detect loss in event "A" above by marking head of queue up as lost.
* For FACK or non-SACK(Reno) senders, the first "packets" number of segments
* are considered lost. For RFC3517 SACK, a segment is considered lost if it
@@ -2251,8 +2189,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
else if (fast_rexmit)
tcp_mark_head_lost(sk, 1, 1);
}
-
- tcp_timeout_skbs(sk);
}
/* CWND moderation, preventing bursts due to too big ACKs
@@ -2307,10 +2243,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
#define DBGUNDO(x...) do { } while (0)
#endif
-static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
+static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (unmark_loss) {
+ struct sk_buff *skb;
+
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+ }
+ tp->lost_out = 0;
+ tcp_clear_all_retrans_hints(tp);
+ }
+
if (tp->prior_ssthresh) {
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2319,7 +2267,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
else
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
- if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
+ if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
TCP_ECN_withdraw_cwr(tp);
}
@@ -2327,6 +2275,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
}
tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->undo_marker = 0;
}
static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2346,14 +2295,13 @@ static bool tcp_try_undo_recovery(struct sock *sk)
* or our original transmission succeeded.
*/
DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
- tcp_undo_cwr(sk, true);
+ tcp_undo_cwnd_reduction(sk, false);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
mib_idx = LINUX_MIB_TCPLOSSUNDO;
else
mib_idx = LINUX_MIB_TCPFULLUNDO;
NET_INC_STATS_BH(sock_net(sk), mib_idx);
- tp->undo_marker = 0;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
@@ -2367,16 +2315,17 @@ static bool tcp_try_undo_recovery(struct sock *sk)
}
/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk)
+static bool tcp_try_undo_dsack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && !tp->undo_retrans) {
DBGUNDO(sk, "D-SACK");
- tcp_undo_cwr(sk, true);
- tp->undo_marker = 0;
+ tcp_undo_cwnd_reduction(sk, false);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
+ return true;
}
+ return false;
}
/* We can clear retrans_stamp when there are no retransmissions in the
@@ -2408,60 +2357,20 @@ static bool tcp_any_retrans_done(const struct sock *sk)
return false;
}
-/* Undo during fast recovery after partial ACK. */
-
-static int tcp_try_undo_partial(struct sock *sk, int acked)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- /* Partial ACK arrived. Force Hoe's retransmit. */
- int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
-
- if (tcp_may_undo(tp)) {
- /* Plain luck! Hole if filled with delayed
- * packet, rather than with a retransmit.
- */
- if (!tcp_any_retrans_done(sk))
- tp->retrans_stamp = 0;
-
- tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
-
- DBGUNDO(sk, "Hoe");
- tcp_undo_cwr(sk, false);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
-
- /* So... Do not make Hoe's retransmit yet.
- * If the first packet was delayed, the rest
- * ones are most probably delayed as well.
- */
- failed = 0;
- }
- return failed;
-}
-
/* Undo during loss recovery after partial ACK or using F-RTO. */
static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
{
struct tcp_sock *tp = tcp_sk(sk);
if (frto_undo || tcp_may_undo(tp)) {
- struct sk_buff *skb;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
- }
-
- tcp_clear_all_retrans_hints(tp);
+ tcp_undo_cwnd_reduction(sk, true);
DBGUNDO(sk, "partial loss");
- tp->lost_out = 0;
- tcp_undo_cwr(sk, true);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
if (frto_undo)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
- tp->undo_marker = 0;
if (frto_undo || tcp_is_sack(tp))
tcp_set_ca_state(sk, TCP_CA_Open);
return true;
@@ -2494,12 +2403,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
TCP_ECN_queue_cwr(tp);
}
-static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
int sndcnt = 0;
int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+ int newly_acked_sacked = prior_unsacked -
+ (tp->packets_out - tp->sacked_out);
tp->prr_delivered += newly_acked_sacked;
if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
@@ -2556,7 +2467,7 @@ static void tcp_try_keep_open(struct sock *sk)
}
}
-static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
+static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2573,7 +2484,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
tcp_moderate_cwnd(tp);
} else {
- tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
+ tcp_cwnd_reduction(sk, prior_unsacked, 0);
}
}
@@ -2731,6 +2642,40 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
tcp_xmit_retransmit_queue(sk);
}
+/* Undo during fast recovery after partial ACK. */
+static bool tcp_try_undo_partial(struct sock *sk, const int acked,
+ const int prior_unsacked)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->undo_marker && tcp_packet_delayed(tp)) {
+ /* Plain luck! Hole if filled with delayed
+ * packet, rather than with a retransmit.
+ */
+ tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+
+ /* We are getting evidence that the reordering degree is higher
+ * than we realized. If there are no retransmits out then we
+ * can undo. Otherwise we clock out new packets but do not
+ * mark more packets lost or retransmit more.
+ */
+ if (tp->retrans_out) {
+ tcp_cwnd_reduction(sk, prior_unsacked, 0);
+ return true;
+ }
+
+ if (!tcp_any_retrans_done(sk))
+ tp->retrans_stamp = 0;
+
+ DBGUNDO(sk, "partial recovery");
+ tcp_undo_cwnd_reduction(sk, true);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
+ tcp_try_keep_open(sk);
+ return true;
+ }
+ return false;
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2742,15 +2687,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int prior_sacked, bool is_dupack,
- int flag)
+static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+ const int prior_unsacked,
+ bool is_dupack, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
+ bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- int newly_acked_sacked = 0;
int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2802,9 +2746,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
- } else
- do_lost = tcp_try_undo_partial(sk, pkts_acked);
- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
+ } else {
+ if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+ return;
+ /* Partial ACK arrived. Force fast retransmit. */
+ do_lost = tcp_is_reno(tp) ||
+ tcp_fackets_out(tp) > tp->reordering;
+ }
+ if (tcp_try_undo_dsack(sk)) {
+ tcp_try_keep_open(sk);
+ return;
+ }
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack);
@@ -2818,13 +2770,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (is_dupack)
tcp_add_reno_sack(sk);
}
- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk, flag)) {
- tcp_try_to_open(sk, flag, newly_acked_sacked);
+ tcp_try_to_open(sk, flag, prior_unsacked);
return;
}
@@ -2844,9 +2795,9 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
fast_rexmit = 1;
}
- if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+ if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
+ tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
tcp_xmit_retransmit_queue(sk);
}
@@ -3077,7 +3028,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
- tp->scoreboard_skb_hint = NULL;
if (skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = NULL;
if (skb == tp->lost_skb_hint)
@@ -3330,9 +3280,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_dupack = false;
u32 prior_in_flight;
u32 prior_fackets;
- int prior_packets;
- int prior_sacked = tp->sacked_out;
- int pkts_acked = 0;
+ int prior_packets = tp->packets_out;
+ const int prior_unsacked = tp->packets_out - tp->sacked_out;
+ int acked = 0; /* Number of packets newly acked */
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3403,21 +3353,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
tp->rcv_tstamp = tcp_time_stamp;
- prior_packets = tp->packets_out;
if (!prior_packets)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
+ acked = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
-
- pkts_acked = prior_packets - tp->packets_out;
+ acked -= tp->packets_out;
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+ tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
} else {
if (flag & FLAG_DATA_ACKED)
@@ -3440,7 +3389,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+ tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
@@ -3463,7 +3412,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+ tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
@@ -5598,6 +5547,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock *req;
int queued = 0;
+ bool acceptable;
tp->rx_opt.saw_tstamp = 0;
@@ -5668,157 +5618,147 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 0;
/* step 5: check the ACK field */
- if (true) {
- int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- FLAG_UPDATE_TS_RECENT) > 0;
-
- switch (sk->sk_state) {
- case TCP_SYN_RECV:
- if (acceptable) {
- /* Once we leave TCP_SYN_RECV, we no longer
- * need req so release it.
- */
- if (req) {
- tcp_synack_rtt_meas(sk, req);
- tp->total_retrans = req->num_retrans;
-
- reqsk_fastopen_remove(sk, req, false);
- } else {
- /* Make sure socket is routed, for
- * correct metrics.
- */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_init_congestion_control(sk);
+ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
+ FLAG_UPDATE_TS_RECENT) > 0;
- tcp_mtup_init(sk);
- tcp_init_buffer_space(sk);
- tp->copied_seq = tp->rcv_nxt;
- }
- smp_mb();
- tcp_set_state(sk, TCP_ESTABLISHED);
- sk->sk_state_change(sk);
-
- /* Note, that this wakeup is only for marginal
- * crossed SYN case. Passively open sockets
- * are not waked up, because sk->sk_sleep ==
- * NULL and sk->sk_socket == NULL.
- */
- if (sk->sk_socket)
- sk_wake_async(sk,
- SOCK_WAKE_IO, POLL_OUT);
-
- tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
- tp->snd_wnd = ntohs(th->window) <<
- tp->rx_opt.snd_wscale;
- tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-
- if (tp->rx_opt.tstamp_ok)
- tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-
- if (req) {
- /* Re-arm the timer because data may
- * have been sent out. This is similar
- * to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more
- * aggressive and retranmitting any data
- * sooner based on when they were sent
- * out.
- */
- tcp_rearm_rto(sk);
- } else
- tcp_init_metrics(sk);
+ switch (sk->sk_state) {
+ case TCP_SYN_RECV:
+ if (!acceptable)
+ return 1;
- /* Prevent spurious tcp_cwnd_restart() on
- * first data packet.
- */
- tp->lsndtime = tcp_time_stamp;
+ /* Once we leave TCP_SYN_RECV, we no longer need req
+ * so release it.
+ */
+ if (req) {
+ tcp_synack_rtt_meas(sk, req);
+ tp->total_retrans = req->num_retrans;
- tcp_initialize_rcv_mss(sk);
- tcp_fast_path_on(tp);
- } else {
- return 1;
- }
- break;
+ reqsk_fastopen_remove(sk, req, false);
+ } else {
+ /* Make sure socket is routed, for correct metrics. */
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_congestion_control(sk);
+
+ tcp_mtup_init(sk);
+ tcp_init_buffer_space(sk);
+ tp->copied_seq = tp->rcv_nxt;
+ }
+ smp_mb();
+ tcp_set_state(sk, TCP_ESTABLISHED);
+ sk->sk_state_change(sk);
+
+ /* Note, that this wakeup is only for marginal crossed SYN case.
+ * Passively open sockets are not waked up, because
+ * sk->sk_sleep == NULL and sk->sk_socket == NULL.
+ */
+ if (sk->sk_socket)
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+
+ tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+ tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
+ tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+
+ if (tp->rx_opt.tstamp_ok)
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- case TCP_FIN_WAIT1:
- /* If we enter the TCP_FIN_WAIT1 state and we are a
- * Fast Open socket and this is the first acceptable
- * ACK we have received, this would have acknowledged
- * our SYNACK so stop the SYNACK timer.
+ if (req) {
+ /* Re-arm the timer because data may have been sent out.
+ * This is similar to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more aggressive and
+ * retransmitting any data sooner based on when they
+ * are sent out.
*/
- if (req != NULL) {
- /* Return RST if ack_seq is invalid.
- * Note that RFC793 only says to generate a
- * DUPACK for it but for TCP Fast Open it seems
- * better to treat this case like TCP_SYN_RECV
- * above.
- */
- if (!acceptable)
- return 1;
- /* We no longer need the request sock. */
- reqsk_fastopen_remove(sk, req, false);
- tcp_rearm_rto(sk);
- }
- if (tp->snd_una == tp->write_seq) {
- struct dst_entry *dst;
-
- tcp_set_state(sk, TCP_FIN_WAIT2);
- sk->sk_shutdown |= SEND_SHUTDOWN;
-
- dst = __sk_dst_get(sk);
- if (dst)
- dst_confirm(dst);
-
- if (!sock_flag(sk, SOCK_DEAD))
- /* Wake up lingering close() */
- sk->sk_state_change(sk);
- else {
- int tmo;
-
- if (tp->linger2 < 0 ||
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
- after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
- tcp_done(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
- return 1;
- }
+ tcp_rearm_rto(sk);
+ } else
+ tcp_init_metrics(sk);
- tmo = tcp_fin_time(sk);
- if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
- } else if (th->fin || sock_owned_by_user(sk)) {
- /* Bad case. We could lose such FIN otherwise.
- * It is not a big problem, but it looks confusing
- * and not so rare event. We still can lose it now,
- * if it spins in bh_lock_sock(), but it is really
- * marginal case.
- */
- inet_csk_reset_keepalive_timer(sk, tmo);
- } else {
- tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
- goto discard;
- }
- }
- }
- break;
+ /* Prevent spurious tcp_cwnd_restart() on first data packet */
+ tp->lsndtime = tcp_time_stamp;
- case TCP_CLOSING:
- if (tp->snd_una == tp->write_seq) {
- tcp_time_wait(sk, TCP_TIME_WAIT, 0);
- goto discard;
- }
+ tcp_initialize_rcv_mss(sk);
+ tcp_fast_path_on(tp);
+ break;
+
+ case TCP_FIN_WAIT1: {
+ struct dst_entry *dst;
+ int tmo;
+
+ /* If we enter the TCP_FIN_WAIT1 state and we are a
+ * Fast Open socket and this is the first acceptable
+ * ACK we have received, this would have acknowledged
+ * our SYNACK so stop the SYNACK timer.
+ */
+ if (req != NULL) {
+ /* Return RST if ack_seq is invalid.
+ * Note that RFC793 only says to generate a
+ * DUPACK for it but for TCP Fast Open it seems
+ * better to treat this case like TCP_SYN_RECV
+ * above.
+ */
+ if (!acceptable)
+ return 1;
+ /* We no longer need the request sock. */
+ reqsk_fastopen_remove(sk, req, false);
+ tcp_rearm_rto(sk);
+ }
+ if (tp->snd_una != tp->write_seq)
break;
- case TCP_LAST_ACK:
- if (tp->snd_una == tp->write_seq) {
- tcp_update_metrics(sk);
- tcp_done(sk);
- goto discard;
- }
+ tcp_set_state(sk, TCP_FIN_WAIT2);
+ sk->sk_shutdown |= SEND_SHUTDOWN;
+
+ dst = __sk_dst_get(sk);
+ if (dst)
+ dst_confirm(dst);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ /* Wake up lingering close() */
+ sk->sk_state_change(sk);
break;
}
+
+ if (tp->linger2 < 0 ||
+ (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+ after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+ tcp_done(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ return 1;
+ }
+
+ tmo = tcp_fin_time(sk);
+ if (tmo > TCP_TIMEWAIT_LEN) {
+ inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+ } else if (th->fin || sock_owned_by_user(sk)) {
+ /* Bad case. We could lose such FIN otherwise.
+ * It is not a big problem, but it looks confusing
+ * and not so rare event. We still can lose it now,
+ * if it spins in bh_lock_sock(), but it is really
+ * marginal case.
+ */
+ inet_csk_reset_keepalive_timer(sk, tmo);
+ } else {
+ tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+ goto discard;
+ }
+ break;
+ }
+
+ case TCP_CLOSING:
+ if (tp->snd_una == tp->write_seq) {
+ tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+ goto discard;
+ }
+ break;
+
+ case TCP_LAST_ACK:
+ if (tp->snd_una == tp->write_seq) {
+ tcp_update_metrics(sk);
+ tcp_done(sk);
+ goto discard;
+ }
+ break;
}
/* step 6: check the URG bit */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 719652305a2..1063bb83e34 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -75,6 +75,7 @@
#include <net/netdma.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
+#include <net/ll_poll.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -545,8 +546,7 @@ out:
sock_put(sk);
}
-static void __tcp_v4_send_check(struct sk_buff *skb,
- __be32 saddr, __be32 daddr)
+void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct tcphdr *th = tcp_hdr(skb);
@@ -571,23 +571,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_v4_send_check);
-int tcp_v4_gso_send_check(struct sk_buff *skb)
-{
- const struct iphdr *iph;
- struct tcphdr *th;
-
- if (!pskb_may_pull(skb, sizeof(*th)))
- return -EINVAL;
-
- iph = ip_hdr(skb);
- th = tcp_hdr(skb);
-
- th->check = 0;
- skb->ip_summed = CHECKSUM_PARTIAL;
- __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
- return 0;
-}
-
/*
* This routine will send an RST to the other tcp.
*
@@ -1026,7 +1009,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key = sock_kmalloc(sk, sizeof(*key), gfp);
if (!key)
return -ENOMEM;
- if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
+ if (!tcp_alloc_md5sig_pool()) {
sock_kfree_s(sk, key, sizeof(*key));
return -ENOMEM;
}
@@ -1044,9 +1027,7 @@ EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
- struct tcp_md5sig_info *md5sig;
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
if (!key)
@@ -1054,10 +1035,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
kfree_rcu(key, rcu);
- md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
- if (hlist_empty(&md5sig->head))
- tcp_free_md5sig_pool();
return 0;
}
EXPORT_SYMBOL(tcp_md5_do_del);
@@ -1071,8 +1048,6 @@ static void tcp_clear_md5_list(struct sock *sk)
md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
- if (!hlist_empty(&md5sig->head))
- tcp_free_md5sig_pool();
hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
hlist_del_rcu(&key->node);
atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
@@ -2019,6 +1994,7 @@ process:
if (sk_filter(sk, skb))
goto discard_and_relse;
+ sk_mark_ll(sk, skb);
skb->dev = NULL;
bh_lock_sock_nested(sk);
@@ -2803,52 +2779,6 @@ void tcp4_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
-{
- const struct iphdr *iph = skb_gro_network_header(skb);
- __wsum wsum;
- __sum16 sum;
-
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
- skb->csum)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-flush:
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
-
- case CHECKSUM_NONE:
- wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
- skb_gro_len(skb), IPPROTO_TCP, 0);
- sum = csum_fold(skb_checksum(skb,
- skb_gro_offset(skb),
- skb_gro_len(skb),
- wsum));
- if (sum)
- goto flush;
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
-
- return tcp_gro_receive(head, skb);
-}
-
-int tcp4_gro_complete(struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct tcphdr *th = tcp_hdr(skb);
-
- th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
- iph->saddr, iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-
- return tcp_gro_complete(skb);
-}
-
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0f017882725..ab1c0865852 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
key = tp->af_specific->md5_lookup(sk, sk);
if (key != NULL) {
tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL)
+ if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool())
BUG();
}
} while (0);
@@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk)
#ifdef CONFIG_TCP_MD5SIG
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
- if (twsk->tw_md5_key) {
- tcp_free_md5sig_pool();
+ if (twsk->tw_md5_key)
kfree_rcu(twsk->tw_md5_key, rcu);
- }
#endif
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
new file mode 100644
index 00000000000..3a7525e6c08
--- /dev/null
+++ b/net/ipv4/tcp_offload.c
@@ -0,0 +1,332 @@
+/*
+ * IPV4 GSO/GRO offload support
+ * Linux INET implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * TCPv4 GSO/GRO support
+ */
+
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/protocol.h>
+
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct tcphdr *th;
+ unsigned int thlen;
+ unsigned int seq;
+ __be32 delta;
+ unsigned int oldlen;
+ unsigned int mss;
+ struct sk_buff *gso_skb = skb;
+ __sum16 newcheck;
+ bool ooo_okay, copy_destructor;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ goto out;
+
+ th = tcp_hdr(skb);
+ thlen = th->doff * 4;
+ if (thlen < sizeof(*th))
+ goto out;
+
+ if (!pskb_may_pull(skb, thlen))
+ goto out;
+
+ oldlen = (u16)~skb->len;
+ __skb_pull(skb, thlen);
+
+ mss = tcp_skb_mss(skb);
+ if (unlikely(skb->len <= mss))
+ goto out;
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ int type = skb_shinfo(skb)->gso_type;
+
+ if (unlikely(type &
+ ~(SKB_GSO_TCPV4 |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_TCPV6 |
+ SKB_GSO_GRE |
+ SKB_GSO_MPLS |
+ SKB_GSO_UDP_TUNNEL |
+ 0) ||
+ !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
+ goto out;
+
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+ segs = NULL;
+ goto out;
+ }
+
+ copy_destructor = gso_skb->destructor == tcp_wfree;
+ ooo_okay = gso_skb->ooo_okay;
+ /* All segments but the first should have ooo_okay cleared */
+ skb->ooo_okay = 0;
+
+ segs = skb_segment(skb, features);
+ if (IS_ERR(segs))
+ goto out;
+
+ /* Only first segment might have ooo_okay set */
+ segs->ooo_okay = ooo_okay;
+
+ delta = htonl(oldlen + (thlen + mss));
+
+ skb = segs;
+ th = tcp_hdr(skb);
+ seq = ntohl(th->seq);
+
+ newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
+ (__force u32)delta));
+
+ do {
+ th->fin = th->psh = 0;
+ th->check = newcheck;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ th->check =
+ csum_fold(csum_partial(skb_transport_header(skb),
+ thlen, skb->csum));
+
+ seq += mss;
+ if (copy_destructor) {
+ skb->destructor = gso_skb->destructor;
+ skb->sk = gso_skb->sk;
+ /* {tcp|sock}_wfree() use exact truesize accounting :
+ * sum(skb->truesize) MUST be exactly be gso_skb->truesize
+ * So we account mss bytes of 'true size' for each segment.
+ * The last segment will contain the remaining.
+ */
+ skb->truesize = mss;
+ gso_skb->truesize -= mss;
+ }
+ skb = skb->next;
+ th = tcp_hdr(skb);
+
+ th->seq = htonl(seq);
+ th->cwr = 0;
+ } while (skb->next);
+
+ /* Following permits TCP Small Queues to work well with GSO :
+ * The callback to TCP stack will be called at the time last frag
+ * is freed at TX completion, and not right now when gso_skb
+ * is freed by GSO engine
+ */
+ if (copy_destructor) {
+ swap(gso_skb->sk, skb->sk);
+ swap(gso_skb->destructor, skb->destructor);
+ swap(gso_skb->truesize, skb->truesize);
+ }
+
+ delta = htonl(oldlen + (skb_tail_pointer(skb) -
+ skb_transport_header(skb)) +
+ skb->data_len);
+ th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
+ (__force u32)delta));
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ th->check = csum_fold(csum_partial(skb_transport_header(skb),
+ thlen, skb->csum));
+out:
+ return segs;
+}
+EXPORT_SYMBOL(tcp_tso_segment);
+
+struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ struct sk_buff **pp = NULL;
+ struct sk_buff *p;
+ struct tcphdr *th;
+ struct tcphdr *th2;
+ unsigned int len;
+ unsigned int thlen;
+ __be32 flags;
+ unsigned int mss = 1;
+ unsigned int hlen;
+ unsigned int off;
+ int flush = 1;
+ int i;
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*th);
+ th = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ th = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!th))
+ goto out;
+ }
+
+ thlen = th->doff * 4;
+ if (thlen < sizeof(*th))
+ goto out;
+
+ hlen = off + thlen;
+ if (skb_gro_header_hard(skb, hlen)) {
+ th = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!th))
+ goto out;
+ }
+
+ skb_gro_pull(skb, thlen);
+
+ len = skb_gro_len(skb);
+ flags = tcp_flag_word(th);
+
+ for (; (p = *head); head = &p->next) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ th2 = tcp_hdr(p);
+
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ goto found;
+ }
+
+ goto out_check_final;
+
+found:
+ flush = NAPI_GRO_CB(p)->flush;
+ flush |= (__force int)(flags & TCP_FLAG_CWR);
+ flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
+ ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
+ flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
+ for (i = sizeof(*th); i < thlen; i += 4)
+ flush |= *(u32 *)((u8 *)th + i) ^
+ *(u32 *)((u8 *)th2 + i);
+
+ mss = tcp_skb_mss(p);
+
+ flush |= (len - 1) >= mss;
+ flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
+
+ if (flush || skb_gro_receive(head, skb)) {
+ mss = 1;
+ goto out_check_final;
+ }
+
+ p = *head;
+ th2 = tcp_hdr(p);
+ tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+
+out_check_final:
+ flush = len < mss;
+ flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
+ TCP_FLAG_RST | TCP_FLAG_SYN |
+ TCP_FLAG_FIN));
+
+ if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
+ pp = head;
+
+out:
+ NAPI_GRO_CB(skb)->flush |= flush;
+
+ return pp;
+}
+EXPORT_SYMBOL(tcp_gro_receive);
+
+int tcp_gro_complete(struct sk_buff *skb)
+{
+ struct tcphdr *th = tcp_hdr(skb);
+
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ if (th->cwr)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+ return 0;
+}
+EXPORT_SYMBOL(tcp_gro_complete);
+
+static int tcp_v4_gso_send_check(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ struct tcphdr *th;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+ th = tcp_hdr(skb);
+
+ th->check = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
+ return 0;
+}
+
+static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ const struct iphdr *iph = skb_gro_network_header(skb);
+ __wsum wsum;
+ __sum16 sum;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
+ skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+
+ case CHECKSUM_NONE:
+ wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ skb_gro_len(skb), IPPROTO_TCP, 0);
+ sum = csum_fold(skb_checksum(skb,
+ skb_gro_offset(skb),
+ skb_gro_len(skb),
+ wsum));
+ if (sum)
+ goto flush;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+
+ return tcp_gro_receive(head, skb);
+}
+
+static int tcp4_gro_complete(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
+ iph->saddr, iph->daddr, 0);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ return tcp_gro_complete(skb);
+}
+
+static const struct net_offload tcpv4_offload = {
+ .callbacks = {
+ .gso_send_check = tcp_v4_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .gro_receive = tcp4_gro_receive,
+ .gro_complete = tcp4_gro_complete,
+ },
+};
+
+int __init tcpv4_offload_init(void)
+{
+ return inet_add_offload(&tcpv4_offload, IPPROTO_TCP);
+}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 536d40929ba..ec335fabd5c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -874,11 +874,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- if (tcp_packets_in_flight(tp) == 0) {
+ if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- skb->ooo_okay = 1;
- } else
- skb->ooo_okay = 0;
+
+ /* if no packet is in qdisc/device queue, then allow XPS to select
+ * another queue.
+ */
+ skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0bf5d399a03..2955b25aee6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -109,6 +109,7 @@
#include <trace/events/udp.h>
#include <linux/static_key.h>
#include <trace/events/skb.h>
+#include <net/ll_poll.h>
#include "udp_impl.h"
struct udp_table udp_table __read_mostly;
@@ -1709,7 +1710,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk != NULL) {
- int ret = udp_queue_rcv_skb(sk, skb);
+ int ret;
+
+ sk_mark_ll(sk, skb);
+ ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);
/* a return value > 0 means to resubmit the input, but
@@ -1967,6 +1971,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
+ sock_rps_record_flow(sk);
+
/* Check for false positives due to checksum errors */
if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
@@ -2381,7 +2387,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
- SKB_GSO_GRE) ||
+ SKB_GSO_GRE | SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 05a5df2febc..06347dbd32c 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
.handler = xfrm_tunnel_rcv,
.err_handler = xfrm_tunnel_err,
- .priority = 2,
+ .priority = 3,
};
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 9af088d2cda..470a9c008e9 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
- raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d1ab6ab29a5..21010fddb20 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1126,8 +1126,7 @@ retry:
ift = !max_addresses ||
ipv6_count_addresses(idev) < max_addresses ?
- ipv6_add_addr(idev, &addr, tmp_plen,
- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+ ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr),
addr_flags) : NULL;
if (IS_ERR_OR_NULL(ift)) {
in6_ifa_put(ifp);
@@ -1487,7 +1486,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
}
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev, int strict)
+ const struct net_device *dev, int strict)
{
struct inet6_ifaddr *ifp;
unsigned int hash = inet6_addr_hash(addr);
@@ -2402,6 +2401,7 @@ err_exit:
* Manual configuration of address on an interface
*/
static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx,
+ const struct in6_addr *peer_pfx,
unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
__u32 valid_lft)
{
@@ -2457,6 +2457,8 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
ifp->tstamp = jiffies;
+ if (peer_pfx)
+ ifp->peer_addr = *peer_pfx;
spin_unlock_bh(&ifp->lock);
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
@@ -2526,7 +2528,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
ireq.ifr6_prefixlen, IFA_F_PERMANENT,
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
rtnl_unlock();
@@ -2658,8 +2660,10 @@ static void init_loopback(struct net_device *dev)
sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
/* Failure cases are ignored */
- if (!IS_ERR(sp_rt))
+ if (!IS_ERR(sp_rt)) {
+ sp_ifa->rt = sp_rt;
ip6_ins_rt(sp_rt);
+ }
}
read_unlock_bh(&idev->lock);
}
@@ -2824,9 +2828,9 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
}
static int addrconf_notify(struct notifier_block *this, unsigned long event,
- void *data)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *) data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct inet6_dev *idev = __in6_dev_get(dev);
int run_pending = 0;
int err;
@@ -3610,18 +3614,20 @@ restart:
rcu_read_unlock_bh();
}
-static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,
+ struct in6_addr **peer_pfx)
{
struct in6_addr *pfx = NULL;
+ *peer_pfx = NULL;
+
if (addr)
pfx = nla_data(addr);
if (local) {
if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
- pfx = NULL;
- else
- pfx = nla_data(local);
+ *peer_pfx = pfx;
+ pfx = nla_data(local);
}
return pfx;
@@ -3639,7 +3645,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
+ struct in6_addr *pfx, *peer_pfx;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3647,7 +3653,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
if (pfx == NULL)
return -EINVAL;
@@ -3705,7 +3711,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
+ struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
@@ -3717,7 +3723,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
if (pfx == NULL)
return -EINVAL;
@@ -3745,7 +3751,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
* It would be best to check for !NLM_F_CREATE here but
* userspace alreay relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, pfx,
+ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
preferred_lft, valid_lft);
}
@@ -3802,6 +3808,7 @@ static inline int rt_scope(int ifa_scope)
static inline int inet6_ifaddr_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ + nla_total_size(16) /* IFA_LOCAL */
+ nla_total_size(16) /* IFA_ADDRESS */
+ nla_total_size(sizeof(struct ifa_cacheinfo));
}
@@ -3840,13 +3847,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
valid = INFINITY_LIFE_TIME;
}
- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
- }
+ if (!ipv6_addr_any(&ifa->peer_addr)) {
+ if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 ||
+ nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0)
+ goto error;
+ } else
+ if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0)
+ goto error;
+
+ if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ goto error;
return nlmsg_end(skb, nlh);
+
+error:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
@@ -4046,7 +4062,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
struct net *net = sock_net(in_skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *addr = NULL;
+ struct in6_addr *addr = NULL, *peer;
struct net_device *dev = NULL;
struct inet6_ifaddr *ifa;
struct sk_buff *skb;
@@ -4056,7 +4072,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+ addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
if (addr == NULL) {
err = -EINVAL;
goto errout;
@@ -4564,11 +4580,26 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
+ if (!ipv6_addr_any(&ifp->peer_addr))
+ addrconf_prefix_route(&ifp->peer_addr, 128,
+ ifp->idev->dev, 0, 0);
break;
case RTM_DELADDR:
if (ifp->idev->cnf.forwarding)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
+ if (!ipv6_addr_any(&ifp->peer_addr)) {
+ struct rt6_info *rt;
+ struct net_device *dev = ifp->idev->dev;
+
+ rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL,
+ dev->ifindex, 1);
+ if (rt) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ }
+ }
dst_hold(&ifp->rt->dst);
if (ip6_del_rt(ifp->rt))
@@ -4616,13 +4647,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
static void dev_disable_change(struct inet6_dev *idev)
{
+ struct netdev_notifier_info info;
+
if (!idev || !idev->dev)
return;
+ netdev_notifier_info_init(&info, idev->dev);
if (idev->cnf.disable_ipv6)
- addrconf_notify(NULL, NETDEV_DOWN, idev->dev);
+ addrconf_notify(NULL, NETDEV_DOWN, &info);
else
- addrconf_notify(NULL, NETDEV_UP, idev->dev);
+ addrconf_notify(NULL, NETDEV_UP, &info);
}
static void addrconf_disable_change(struct net *net, __s32 newf)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ab5c7ad482c..a5ac969aeef 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,6 +49,7 @@
#include <net/udp.h>
#include <net/udplite.h>
#include <net/tcp.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/route.h>
@@ -840,6 +841,9 @@ static int __init inet6_init(void)
if (err)
goto out_unregister_udplite_proto;
+ err = proto_register(&pingv6_prot, 1);
+ if (err)
+ goto out_unregister_ping_proto;
/* We MUST register RAW sockets before we create the ICMP6,
* IGMP6, or NDISC control sockets.
@@ -930,6 +934,10 @@ static int __init inet6_init(void)
if (err)
goto ipv6_packet_fail;
+ err = pingv6_init();
+ if (err)
+ goto pingv6_fail;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -942,6 +950,8 @@ out:
sysctl_fail:
ipv6_packet_cleanup();
#endif
+pingv6_fail:
+ pingv6_exit();
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
@@ -985,6 +995,8 @@ register_pernet_fail:
rtnl_unregister_all(PF_INET6);
out_sock_register_fail:
rawv6_exit();
+out_unregister_ping_proto:
+ proto_unregister(&pingv6_prot);
out_unregister_raw_proto:
proto_unregister(&rawv6_prot);
out_unregister_udplite_proto:
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4b56cbbc789..197e6f4a2b7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -879,3 +879,30 @@ exit_f:
return err;
}
EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
+
+void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
+ __u16 srcp, __u16 destp, int bucket)
+{
+ struct ipv6_pinfo *np = inet6_sk(sp);
+ const struct in6_addr *dest, *src;
+
+ dest = &np->daddr;
+ src = &np->rcv_saddr;
+ seq_printf(seq,
+ "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
+ bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->sk_state,
+ sk_wmem_alloc_get(sp),
+ sk_rmem_alloc_get(sp),
+ 0, 0L, 0,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ 0,
+ sock_i_ino(sp),
+ atomic_read(&sp->sk_refcnt), sp,
+ atomic_read(&sp->sk_drops));
+}
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index c5e83fae4df..140748debc4 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr);
int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
{
const unsigned char *nh = skb_network_header(skb);
- int packet_len = skb->tail - skb->network_header;
+ int packet_len = skb_tail_pointer(skb) - skb_network_header(skb);
struct ipv6_opt_hdr *hdr;
int len;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4ff0a42b8c..4b4890bbe16 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -57,6 +57,7 @@
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
+#include <net/ping.h>
#include <net/protocol.h>
#include <net/raw.h>
#include <net/rawv6.h>
@@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net)
static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
struct net *net = dev_net(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, 0, 0);
else if (type == NDISC_REDIRECT)
ip6_redirect(skb, net, 0, 0);
+
+ if (!(type & ICMPV6_INFOMSG_MASK))
+ if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+ ping_err(skb, offset, info);
}
static int icmpv6_rcv(struct sk_buff *skb);
@@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
return (*op & 0xC0) == 0x80;
}
-static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
@@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
static inline void mip6_addr_swap(struct sk_buff *skb) {}
#endif
-static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
- struct sock *sk, struct flowi6 *fl6)
+struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+ struct sock *sk, struct flowi6 *fl6)
{
struct dst_entry *dst, *dst2;
struct flowi6 fl2;
@@ -391,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
int err = 0;
if ((u8 *)hdr < skb->head ||
- (skb->network_header + sizeof(*hdr)) > skb->tail)
+ (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
return;
/*
@@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
IPPROTO_ICMPV6, 0));
if (__skb_checksum_complete(skb)) {
- LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
+ LIMIT_NETDEBUG(KERN_DEBUG
+ "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
saddr, daddr);
goto csum_error;
}
@@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- /* we couldn't care less */
+ ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d3ddd840035..ecd60733e5e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1081,6 +1081,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
if (t == NULL)
t = netdev_priv(dev);
+ memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -1128,6 +1129,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
if (t) {
err = 0;
+ memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 71b766ee821..a263b990ee1 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d2eedf19233..dae1949019d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1147,7 +1147,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (WARN_ON(np->cork.opt))
return -EINVAL;
- np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
+ np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
if (unlikely(np->cork.opt == NULL))
return -ENOBUFS;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 241fb8ad9fc..583e8d435f9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1319,7 +1319,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
static int ip6mr_device_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct mr6_table *mrt;
struct mif_device *v;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bfa6cc36ef2..72c8bfe06bb 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1409,8 +1409,9 @@ static void mld_sendpack(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
- mldlen = skb->tail - skb->transport_header;
+ payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
+ sizeof(*pip6);
+ mldlen = skb_tail_pointer(skb) - skb_transport_header(skb);
pip6->payload_len = htons(payload_len);
pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0f9bdc5ee9f..9ac01dc9402 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2712ab22a17..781dd3c9968 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -693,7 +693,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
@@ -853,7 +853,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
@@ -1069,7 +1069,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__u8 * opt = (__u8 *)(ra_msg + 1);
- optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
+ optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
+ sizeof(struct ra_msg);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
@@ -1346,7 +1347,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
u8 *hdr;
struct ndisc_options ndopts;
struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
- u32 ndoptlen = skb->tail - (skb->transport_header +
+ u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct rd_msg, opt));
#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1568,7 +1569,7 @@ int ndisc_rcv(struct sk_buff *skb)
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct inet6_dev *idev;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 72836f40b73..95f3f1da0d7 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -10,6 +10,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/export.h>
+#include <net/addrconf.h>
#include <net/dst.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -186,6 +187,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
return csum;
};
+static const struct nf_ipv6_ops ipv6ops = {
+ .chk_addr = ipv6_chk_addr,
+};
+
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
.checksum = nf_ip6_checksum,
@@ -198,6 +203,7 @@ static const struct nf_afinfo nf_ip6_afinfo = {
int __init ipv6_netfilter_init(void)
{
+ RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
return nf_register_afinfo(&nf_ip6_afinfo);
}
@@ -206,5 +212,6 @@ int __init ipv6_netfilter_init(void)
*/
void ipv6_netfilter_fini(void)
{
+ RCU_INIT_POINTER(nf_ipv6_ops, NULL);
nf_unregister_afinfo(&nf_ip6_afinfo);
}
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 60e9053bab0..47bff610751 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -71,7 +71,7 @@ static int device_cmp(struct nf_conn *ct, void *ifindex)
static int masq_device_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- const struct net_device *dev = ptr;
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_DOWN)
@@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
+ struct netdev_notifier_info info;
- return masq_device_event(this, event, ifa->idev->dev);
+ netdev_notifier_info_init(&info, ifa->idev->dev);
+ return masq_device_event(this, event, &info);
}
static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index c2e73e647e4..ab92a3673fb 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -40,7 +40,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
u16 offset = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr =
(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- unsigned int packet_len = skb->tail - skb->network_header;
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 00000000000..a4311038591
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,272 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * "Ping" sockets
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors: Lorenzo Colitti (IPv6 support)
+ * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+
+struct proto pingv6_prot = {
+ .name = "PINGv6",
+ .owner = THIS_MODULE,
+ .init = ping_init_sock,
+ .close = ping_close,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .sendmsg = ping_v6_sendmsg,
+ .recvmsg = ping_recvmsg,
+ .bind = ping_bind,
+ .backlog_rcv = ping_queue_rcv_skb,
+ .hash = ping_hash,
+ .unhash = ping_unhash,
+ .get_port = ping_get_port,
+ .obj_size = sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMPV6,
+ .prot = &pingv6_prot,
+ .ops = &inet6_dgram_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+ return -EAFNOSUPPORT;
+}
+int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ return -EAFNOSUPPORT;
+}
+int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+ return -EAFNOSUPPORT;
+}
+void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload) {}
+int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, int strict)
+{
+ return 0;
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr user_icmph;
+ int addr_type;
+ struct in6_addr *daddr;
+ int iif = 0;
+ struct flowi6 fl6;
+ int err;
+ int hlimit;
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ struct pingfakehdr pfh;
+
+ pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+ err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+ sizeof(user_icmph));
+ if (err)
+ return err;
+
+ if (msg->msg_name) {
+ struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
+ if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
+ u->sin6_family != AF_INET6) {
+ return -EINVAL;
+ }
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != u->sin6_scope_id) {
+ return -EINVAL;
+ }
+ daddr = &(u->sin6_addr);
+ iif = u->sin6_scope_id;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+ daddr = &np->daddr;
+ }
+
+ if (!iif)
+ iif = sk->sk_bound_dev_if;
+
+ addr_type = ipv6_addr_type(daddr);
+ if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+ return -EINVAL;
+ if (addr_type & IPV6_ADDR_MAPPED)
+ return -EINVAL;
+
+ /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+ memset(&fl6, 0, sizeof(fl6));
+
+ fl6.flowi6_proto = IPPROTO_ICMPV6;
+ fl6.saddr = np->saddr;
+ fl6.daddr = *daddr;
+ fl6.fl6_icmp_type = user_icmph.icmp6_type;
+ fl6.fl6_icmp_code = user_icmph.icmp6_code;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+ rt = (struct rt6_info *) dst;
+
+ np = inet6_sk(sk);
+ if (!np)
+ return -EBADF;
+
+ if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+ fl6.flowi6_oif = np->mcast_oif;
+ else if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = np->ucast_oif;
+
+ pfh.icmph.type = user_icmph.icmp6_type;
+ pfh.icmph.code = user_icmph.icmp6_code;
+ pfh.icmph.checksum = 0;
+ pfh.icmph.un.echo.id = inet->inet_sport;
+ pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+ pfh.iov = msg->msg_iov;
+ pfh.wcheck = 0;
+ pfh.family = AF_INET6;
+
+ if (ipv6_addr_is_multicast(&fl6.daddr))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = ip6_dst_hoplimit(dst);
+
+ err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+ 0, hlimit,
+ np->tclass, NULL, &fl6, rt,
+ MSG_DONTWAIT, np->dontfrag);
+
+ if (err) {
+ ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
+ ICMP6_MIB_OUTERRORS);
+ ip6_flush_pending_frames(sk);
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *) &pfh.icmph,
+ len);
+ }
+
+ return err;
+}
+
+#ifdef CONFIG_PROC_FS
+static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return ping_seq_start(seq, pos, AF_INET6);
+}
+
+int ping_v6_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ int bucket = ((struct ping_iter_state *) seq->private)->bucket;
+ struct inet_sock *inet = inet_sk(v);
+ __u16 srcp = ntohs(inet->inet_sport);
+ __u16 destp = ntohs(inet->inet_dport);
+ ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+ }
+ return 0;
+}
+
+static struct ping_seq_afinfo ping_v6_seq_afinfo = {
+ .name = "icmp6",
+ .family = AF_INET6,
+ .seq_fops = &ping_seq_fops,
+ .seq_ops = {
+ .start = ping_v6_seq_start,
+ .show = ping_v6_seq_show,
+ .next = ping_seq_next,
+ .stop = ping_seq_stop,
+ },
+};
+
+static int __net_init ping_v6_proc_init_net(struct net *net)
+{
+ return ping_proc_register(net, &ping_v6_seq_afinfo);
+}
+
+static void __net_init ping_v6_proc_exit_net(struct net *net)
+{
+ return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+}
+
+static struct pernet_operations ping_v6_net_ops = {
+ .init = ping_v6_proc_init_net,
+ .exit = ping_v6_proc_exit_net,
+};
+#endif
+
+int __init pingv6_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ int ret = register_pernet_subsys(&ping_v6_net_ops);
+ if (ret)
+ return ret;
+#endif
+ pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+ return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+ pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+ pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl;
+ pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+ pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+ pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+#ifdef CONFIG_PROC_FS
+ unregister_pernet_subsys(&ping_v6_net_ops);
+#endif
+ inet6_unregister_protosw(&pingv6_protosw);
+}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index f3c1ff4357f..51c3285b5d9 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -90,7 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
- SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
+ /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index eedff8ccded..c45f7a5c36e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1132,7 +1132,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL)
- amount = skb->tail - skb->transport_header;
+ amount = skb_tail_pointer(skb) -
+ skb_transport_header(skb);
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
@@ -1226,45 +1227,16 @@ struct proto rawv6_prot = {
};
#ifdef CONFIG_PROC_FS
-static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
-{
- struct ipv6_pinfo *np = inet6_sk(sp);
- const struct in6_addr *dest, *src;
- __u16 destp, srcp;
-
- dest = &np->daddr;
- src = &np->rcv_saddr;
- destp = 0;
- srcp = inet_sk(sp)->inet_num;
- seq_printf(seq,
- "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
- i,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- sk_wmem_alloc_get(sp),
- sk_rmem_alloc_get(sp),
- 0, 0L, 0,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
- 0,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
-}
-
static int raw6_seq_show(struct seq_file *seq, void *v)
{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq,
- " sl "
- "local_address "
- "remote_address "
- "st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
- else
- raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ struct sock *sp = v;
+ __u16 srcp = inet_sk(sp)->inet_num;
+ ip6_dgram_sock_seq_show(seq, v, srcp, 0,
+ raw_seq_private(seq)->bucket);
+ }
return 0;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ad0aa6b0b86..2b874185ebb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1649,7 +1649,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
int optlen, on_link;
u8 *lladdr;
- optlen = skb->tail - skb->transport_header;
+ optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
optlen -= sizeof(*msg);
if (optlen < 0) {
@@ -2681,9 +2681,9 @@ errout:
}
static int ip6_route_dev_notify(struct notifier_block *this,
- unsigned long event, void *data)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 335363478bb..6b9c1f128ea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -577,6 +577,10 @@ static int ipip6_rcv(struct sk_buff *skb)
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
+ if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
+ tunnel->parms.iph.protocol != 0)
+ goto out;
+
secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
@@ -629,6 +633,35 @@ out:
return 0;
}
+static const struct tnl_ptk_info tpi = {
+ /* no tunnel info required for ipip. */
+ .proto = htons(ETH_P_IP),
+};
+
+static int ipip_rcv(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct ip_tunnel *tunnel;
+
+ tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+ iph->saddr, iph->daddr);
+ if (tunnel != NULL) {
+ if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ tunnel->parms.iph.protocol != 0)
+ goto drop;
+
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ }
+
+ return 1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
/*
* If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
* stores the embedded IPv4 address in v4dst and returns true.
@@ -877,6 +910,43 @@ tx_error:
return NETDEV_TX_OK;
}
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *tiph = &tunnel->parms.iph;
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ipip_tunnel_xmit(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ipip6_tunnel_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ dev->stats.tx_errors++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+
+}
+
static void ipip6_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -1027,7 +1097,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 ||
+ if (p.iph.protocol != IPPROTO_IPV6 &&
+ p.iph.protocol != IPPROTO_IPIP &&
+ p.iph.protocol != 0)
+ goto done;
+ if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
goto done;
if (p.iph.ttl)
@@ -1164,7 +1238,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
- .ndo_start_xmit = ipip6_tunnel_xmit,
+ .ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
.ndo_change_mtu = ipip6_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
@@ -1232,6 +1306,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
return 0;
}
+static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ u8 proto;
+
+ if (!data)
+ return 0;
+
+ proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+ if (proto != IPPROTO_IPV6 &&
+ proto != IPPROTO_IPIP &&
+ proto != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
static void ipip6_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms)
{
@@ -1268,6 +1358,10 @@ static void ipip6_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_FLAGS])
parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+
+ if (data[IFLA_IPTUN_PROTO])
+ parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
}
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1391,6 +1485,8 @@ static size_t ipip6_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_FLAGS */
nla_total_size(2) +
+ /* IFLA_IPTUN_PROTO */
+ nla_total_size(1) +
#ifdef CONFIG_IPV6_SIT_6RD
/* IFLA_IPTUN_6RD_PREFIX */
nla_total_size(sizeof(struct in6_addr)) +
@@ -1416,6 +1512,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
goto nla_put_failure;
@@ -1445,6 +1542,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
[IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
#ifdef CONFIG_IPV6_SIT_6RD
[IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
[IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
@@ -1459,6 +1557,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
.policy = ipip6_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipip6_tunnel_setup,
+ .validate = ipip6_validate,
.newlink = ipip6_newlink,
.changelink = ipip6_changelink,
.get_size = ipip6_get_size,
@@ -1471,6 +1570,12 @@ static struct xfrm_tunnel sit_handler __read_mostly = {
.priority = 1,
};
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+ .handler = ipip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+
static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
{
int prio;
@@ -1553,6 +1658,7 @@ static void __exit sit_cleanup(void)
{
rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1569,9 +1675,14 @@ static int __init sit_init(void)
return err;
err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
if (err < 0) {
- pr_info("%s: can't add protocol\n", __func__);
+ pr_info("%s: can't register ip6ip4\n", __func__);
goto xfrm_tunnel_failed;
}
+ err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+ if (err < 0) {
+ pr_info("%s: can't register ip4ip4\n", __func__);
+ goto xfrm_tunnel4_failed;
+ }
err = rtnl_link_register(&sit_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1580,6 +1691,8 @@ out:
return err;
rtnl_link_failed:
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel4_failed:
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
xfrm_tunnel_failed:
unregister_pernet_device(&sit_net_ops);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 71167069b39..5cffa5c3e6b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -63,6 +63,7 @@
#include <net/inet_common.h>
#include <net/secure_seq.h>
#include <net/tcp_memcontrol.h>
+#include <net/ll_poll.h>
#include <asm/uaccess.h>
@@ -1498,6 +1499,7 @@ process:
if (sk_filter(sk, skb))
goto discard_and_relse;
+ sk_mark_ll(sk, skb);
skb->dev = NULL;
bh_lock_sock_nested(sk);
@@ -1890,6 +1892,17 @@ void tcp6_proc_exit(struct net *net)
}
#endif
+static void tcp_v6_clear_sk(struct sock *sk, int size)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* we do not want to clear pinet6 field, because of RCU lookups */
+ sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+ size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+ memset(&inet->pinet6 + 1, 0, size);
+}
+
struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
@@ -1933,6 +1946,7 @@ struct proto tcpv6_prot = {
#ifdef CONFIG_MEMCG_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
+ .clear_sk = tcp_v6_clear_sk,
};
static const struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d4defdd4493..f77e34c5a0e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/inet6_hashtables.h>
+#include <net/ll_poll.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -841,7 +842,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
*/
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk != NULL) {
- int ret = udpv6_queue_rcv_skb(sk, skb);
+ int ret;
+
+ sk_mark_ll(sk, skb);
+ ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
/* a return value > 0 means to resubmit the input, but
@@ -1359,48 +1363,17 @@ static const struct inet6_protocol udpv6_protocol = {
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
-
-static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket)
-{
- struct inet_sock *inet = inet_sk(sp);
- struct ipv6_pinfo *np = inet6_sk(sp);
- const struct in6_addr *dest, *src;
- __u16 destp, srcp;
-
- dest = &np->daddr;
- src = &np->rcv_saddr;
- destp = ntohs(inet->inet_dport);
- srcp = ntohs(inet->inet_sport);
- seq_printf(seq,
- "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
- bucket,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- sk_wmem_alloc_get(sp),
- sk_rmem_alloc_get(sp),
- 0, 0L, 0,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
- 0,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
- atomic_read(&sp->sk_drops));
-}
-
int udp6_seq_show(struct seq_file *seq, void *v)
{
- if (v == SEQ_START_TOKEN)
- seq_printf(seq,
- " sl "
- "local_address "
- "remote_address "
- "st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode ref pointer drops\n");
- else
- udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket);
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
+ } else {
+ int bucket = ((struct udp_iter_state *)seq->private)->bucket;
+ struct inet_sock *inet = inet_sk(v);
+ __u16 srcp = ntohs(inet->inet_sport);
+ __u16 destp = ntohs(inet->inet_dport);
+ ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+ }
return 0;
}
@@ -1432,6 +1405,17 @@ void udp6_proc_exit(struct net *net) {
}
#endif /* CONFIG_PROC_FS */
+void udp_v6_clear_sk(struct sock *sk, int size)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ /* we do not want to clear pinet6 field, because of RCU lookups */
+ sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
+
+ size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
+ memset(&inet->pinet6 + 1, 0, size);
+}
+
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
@@ -1462,7 +1446,7 @@ struct proto udpv6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = sk_prot_clear_portaddr_nulls,
+ .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index d7571046bfc..4691ed50a92 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -31,6 +31,8 @@ extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
extern void udpv6_destroy_sock(struct sock *sk);
+extern void udp_v6_clear_sk(struct sock *sk, int size);
+
#ifdef CONFIG_PROC_FS
extern int udp6_seq_show(struct seq_file *seq, void *v);
#endif
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 3bb3a891a42..5d1b8d7ac99 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -46,11 +46,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
unsigned int mss;
unsigned int unfrag_ip6hlen, unfrag_len;
struct frag_hdr *fptr;
- u8 *mac_start, *prevhdr;
+ u8 *packet_start, *prevhdr;
u8 nexthdr;
u8 frag_hdr_sz = sizeof(struct frag_hdr);
int offset;
__wsum csum;
+ int tnl_hlen;
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
@@ -63,7 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
- SKB_GSO_GRE) ||
+ SKB_GSO_GRE |
+ SKB_GSO_MPLS) ||
!(type & (SKB_GSO_UDP))))
goto out;
@@ -83,9 +85,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
skb->ip_summed = CHECKSUM_NONE;
/* Check if there is enough headroom to insert fragment header. */
- if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
- pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
- goto out;
+ tnl_hlen = skb_tnl_header_len(skb);
+ if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+ goto out;
+ }
/* Find the unfragmentable header and shift it left by frag_hdr_sz
* bytes to insert fragment header.
@@ -93,11 +97,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
*prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
- unfrag_ip6hlen;
- mac_start = skb_mac_header(skb);
- memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+ unfrag_ip6hlen + tnl_hlen;
+ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
skb->mac_header -= frag_hdr_sz;
skb->network_header -= frag_hdr_sz;
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 1d08e21d9f6..dfcc4be4689 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -56,7 +56,7 @@ struct proto udplitev6_prot = {
.compat_setsockopt = compat_udpv6_setsockopt,
.compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .clear_sk = sk_prot_clear_portaddr_nulls,
+ .clear_sk = udp_v6_clear_sk,
};
static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4ef7bdb6544..23ed03d786c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -103,8 +103,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
dev_hold(dev);
xdst->u.rt6.rt6i_idev = in6_dev_get(dev);
- if (!xdst->u.rt6.rt6i_idev)
+ if (!xdst->u.rt6.rt6i_idev) {
+ dev_put(dev);
return -ENODEV;
+ }
rt6_transfer_peer(&xdst->u.rt6, rt);
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f547a47d381..7a1e0fc1bd4 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc)
static int ipxitf_device_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ipx_interface *i, *tmp;
if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 8c004161a84..9ea0c933b9f 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -544,7 +544,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
/*
* We now have some discovery info to deliver!
*/
- discovery = kmalloc(sizeof(discovery_t), GFP_ATOMIC);
+ discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC);
if (!discovery) {
IRDA_WARNING("%s: unable to malloc!\n", __func__);
return;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ae691651b72..168aff5e60d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -2293,7 +2293,7 @@ out_unlock:
static int afiucv_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- struct net_device *event_dev = (struct net_device *)ptr;
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
struct sock *sk;
struct iucv_sock *iucv;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5b1e5af2571..c5fbd758968 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2366,6 +2366,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
out:
xfrm_pol_put(xp);
+ if (err == 0)
+ xfrm_garbage_collect(net);
return err;
}
@@ -2615,6 +2617,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
out:
xfrm_pol_put(xp);
+ if (delete && err == 0)
+ xfrm_garbage_collect(net);
return err;
}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7cabaf261fe..cc117591f67 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -159,10 +159,11 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr,
+static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
bool check_dup)
{
- struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *iter;
u64 new, mask, tmp;
u8 *m;
int ret = 0;
@@ -184,12 +185,15 @@ static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr,
return ret;
mutex_lock(&local->iflist_mtx);
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
- !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
+ list_for_each_entry(iter, &local->interfaces, list) {
+ if (iter == sdata)
continue;
- m = sdata->vif.addr;
+ if (iter->vif.type == NL80211_IFTYPE_MONITOR &&
+ !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE))
+ continue;
+
+ m = iter->vif.addr;
tmp = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
@@ -218,7 +222,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr)
!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
check_dup = false;
- ret = ieee80211_verify_mac(sdata->local, sa->sa_data, check_dup);
+ ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup);
if (ret)
return ret;
@@ -1503,7 +1507,17 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
break;
}
+ /*
+ * Pick address of existing interface in case user changed
+ * MAC address manually, default to perm_addr.
+ */
m = local->hw.wiphy->perm_addr;
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ continue;
+ m = sdata->vif.addr;
+ break;
+ }
start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
@@ -1750,10 +1764,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
}
static int netdev_notify(struct notifier_block *nb,
- unsigned long state,
- void *ndev)
+ unsigned long state, void *ptr)
{
- struct net_device *dev = ndev;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ieee80211_sub_if_data *sdata;
if (state != NETDEV_CHANGENAME)
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
new file mode 100644
index 00000000000..37421db8896
--- /dev/null
+++ b/net/mpls/Kconfig
@@ -0,0 +1,9 @@
+#
+# MPLS configuration
+#
+config NET_MPLS_GSO
+ tristate "MPLS: GSO support"
+ help
+ This is helper module to allow segmentation of non-MPLS GSO packets
+ that have had MPLS stack entries pushed onto them and thus
+ become MPLS GSO packets.
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
new file mode 100644
index 00000000000..0a3c171be53
--- /dev/null
+++ b/net/mpls/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for MPLS.
+#
+obj-y += mpls_gso.o
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
new file mode 100644
index 00000000000..1bec1219ab8
--- /dev/null
+++ b/net/mpls/mpls_gso.c
@@ -0,0 +1,108 @@
+/*
+ * MPLS GSO Support
+ *
+ * Authors: Simon Horman (horms@verge.net.au)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Based on: GSO portions of net/ipv4/gre.c
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/netdev_features.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ netdev_features_t mpls_features;
+ __be16 mpls_protocol;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_TCPV4 |
+ SKB_GSO_TCPV6 |
+ SKB_GSO_UDP |
+ SKB_GSO_DODGY |
+ SKB_GSO_TCP_ECN |
+ SKB_GSO_GRE |
+ SKB_GSO_MPLS)))
+ goto out;
+
+ /* Setup inner SKB. */
+ mpls_protocol = skb->protocol;
+ skb->protocol = skb->inner_protocol;
+
+ /* Push back the mac header that skb_mac_gso_segment() has pulled.
+ * It will be re-pulled by the call to skb_mac_gso_segment() below
+ */
+ __skb_push(skb, skb->mac_len);
+
+ /* Segment inner packet. */
+ mpls_features = skb->dev->mpls_features & netif_skb_features(skb);
+ segs = skb_mac_gso_segment(skb, mpls_features);
+
+
+ /* Restore outer protocol. */
+ skb->protocol = mpls_protocol;
+
+ /* Re-pull the mac header that the call to skb_mac_gso_segment()
+ * above pulled. It will be re-pushed after returning
+ * skb_mac_gso_segment(), an indirect caller of this function.
+ */
+ __skb_push(skb, skb->data - skb_mac_header(skb));
+
+out:
+ return segs;
+}
+
+static int mpls_gso_send_check(struct sk_buff *skb)
+{
+ return 0;
+}
+
+static struct packet_offload mpls_mc_offload = {
+ .type = cpu_to_be16(ETH_P_MPLS_MC),
+ .callbacks = {
+ .gso_send_check = mpls_gso_send_check,
+ .gso_segment = mpls_gso_segment,
+ },
+};
+
+static struct packet_offload mpls_uc_offload = {
+ .type = cpu_to_be16(ETH_P_MPLS_UC),
+ .callbacks = {
+ .gso_send_check = mpls_gso_send_check,
+ .gso_segment = mpls_gso_segment,
+ },
+};
+
+static int __init mpls_gso_init(void)
+{
+ pr_info("MPLS GSO support\n");
+
+ dev_add_offload(&mpls_uc_offload);
+ dev_add_offload(&mpls_mc_offload);
+
+ return 0;
+}
+
+static void __exit mpls_gso_exit(void)
+{
+ dev_remove_offload(&mpls_uc_offload);
+ dev_remove_offload(&mpls_mc_offload);
+}
+
+module_init(mpls_gso_init);
+module_exit(mpls_gso_exit);
+
+MODULE_DESCRIPTION("MPLS GSO support");
+MODULE_AUTHOR("Simon Horman (horms@verge.net.au)");
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 07c865a31a3..2217363ab42 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -30,6 +30,8 @@ static DEFINE_MUTEX(afinfo_mutex);
const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
EXPORT_SYMBOL(nf_afinfo);
+const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ipv6_ops);
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
@@ -302,17 +304,26 @@ static struct pernet_operations netfilter_net_ops = {
.exit = netfilter_net_exit,
};
-void __init netfilter_init(void)
+int __init netfilter_init(void)
{
- int i, h;
+ int i, h, ret;
+
for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
- if (register_pernet_subsys(&netfilter_net_ops) < 0)
- panic("cannot create netfilter proc entry");
+ ret = register_pernet_subsys(&netfilter_net_ops);
+ if (ret < 0)
+ goto err;
+
+ ret = netfilter_log_init();
+ if (ret < 0)
+ goto err_pernet;
- if (netfilter_log_init() < 0)
- panic("cannot initialize nf_log");
+ return 0;
+err_pernet:
+ unregister_pernet_subsys(&netfilter_net_ops);
+err:
+ return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a083bda322b..c8c52a98590 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
return cp;
}
}
- rcu_read_unlock();
- rcu_read_lock();
+ cond_resched_rcu();
}
return NULL;
@@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
- rcu_read_unlock();
- rcu_read_lock();
+ cond_resched_rcu();
}
iter->l = NULL;
return NULL;
@@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net)
int idx;
struct ip_vs_conn *cp, *cp_c;
+ rcu_read_lock();
/*
* Randomly scan 1/32 of the whole table every second
*/
for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
unsigned int hash = net_random() & ip_vs_conn_tab_mask;
- /*
- * Lock is actually needed in this loop.
- */
- rcu_read_lock();
-
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
@@ -1252,8 +1246,9 @@ void ip_vs_random_dropentry(struct net *net)
__ip_vs_conn_put(cp);
}
}
- rcu_read_unlock();
+ cond_resched_rcu();
}
+ rcu_read_unlock();
}
@@ -1267,11 +1262,8 @@ static void ip_vs_conn_flush(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
+ rcu_read_lock();
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
- /*
- * Lock is actually needed in this loop.
- */
- rcu_read_lock();
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (!ip_vs_conn_net_eq(cp, net))
@@ -1286,8 +1278,9 @@ flush_again:
__ip_vs_conn_put(cp);
}
}
- rcu_read_unlock();
+ cond_resched_rcu();
}
+ rcu_read_unlock();
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 085b5880ab0..05565d2b3a6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1001,6 +1001,32 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
return th->rst;
}
+static inline bool is_new_conn(const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP: {
+ struct tcphdr _tcph, *th;
+
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return false;
+ return th->syn;
+ }
+ case IPPROTO_SCTP: {
+ sctp_chunkhdr_t *sch, schunk;
+
+ sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
+ sizeof(schunk), &schunk);
+ if (sch == NULL)
+ return false;
+ return sch->type == SCTP_CID_INIT;
+ }
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1612,6 +1638,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing connection entry
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
+ is_new_conn(skb, &iph)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ }
+
if (unlikely(!cp) && !iph.fragoffs) {
/* No (second) fragments need to enter here, as nf_defrag_ipv6
* replayed fragment zero will already have created the cp
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5b142fb1648..df05c1c276f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1487,9 +1487,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
* Currently only NETDEV_DOWN is handled to release refs to cached dsts
*/
static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+ void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_service *svc;
@@ -1716,9 +1716,9 @@ static struct ctl_table vs_vars[] = {
},
{
.procname = "sync_qlen_max",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "sync_sock_size",
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 0df269d7c99..a65edfe4b16 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -67,8 +67,8 @@ struct ip_vs_sh_bucket {
#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
struct ip_vs_sh_state {
- struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
struct rcu_head rcu_head;
+ struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
};
/*
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 6b217074237..b8a0924064e 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
struct nf_conntrack_expect *exp);
EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
-static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char);
-static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char);
+static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
+ char, unsigned int *);
+static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
+ char, unsigned int *);
+static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
+ char, unsigned int *);
static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
- char);
+ char, unsigned int *);
static struct ftp_search {
const char *pattern;
@@ -66,7 +70,7 @@ static struct ftp_search {
char skip;
char term;
enum nf_ct_ftp_type ftptype;
- int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char);
+ int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
} search[IP_CT_DIR_MAX][2] = {
[IP_CT_DIR_ORIGINAL] = {
{
@@ -90,10 +94,8 @@ static struct ftp_search {
{
.pattern = "227 ",
.plen = sizeof("227 ") - 1,
- .skip = '(',
- .term = ')',
.ftptype = NF_CT_FTP_PASV,
- .getnum = try_rfc959,
+ .getnum = try_rfc1123,
},
{
.pattern = "229 ",
@@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
i++;
else {
/* Unexpected character; true if it's the
- terminator and we're finished. */
- if (*data == term && i == array_size - 1)
+ terminator (or we don't care about one)
+ and we're finished. */
+ if ((*data == term || !term) && i == array_size - 1)
return len;
pr_debug("Char %u (got %u nums) `%u' unexpected\n",
@@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
static int try_rfc959(const char *data, size_t dlen,
- struct nf_conntrack_man *cmd, char term)
+ struct nf_conntrack_man *cmd, char term,
+ unsigned int *offset)
{
int length;
u_int32_t array[6];
@@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen,
return length;
}
+/*
+ * From RFC 1123:
+ * The format of the 227 reply to a PASV command is not
+ * well standardized. In particular, an FTP client cannot
+ * assume that the parentheses shown on page 40 of RFC-959
+ * will be present (and in fact, Figure 3 on page 43 omits
+ * them). Therefore, a User-FTP program that interprets
+ * the PASV reply must scan the reply for the first digit
+ * of the host and port numbers.
+ */
+static int try_rfc1123(const char *data, size_t dlen,
+ struct nf_conntrack_man *cmd, char term,
+ unsigned int *offset)
+{
+ int i;
+ for (i = 0; i < dlen; i++)
+ if (isdigit(data[i]))
+ break;
+
+ if (i == dlen)
+ return 0;
+
+ *offset += i;
+
+ return try_rfc959(data + i, dlen - i, cmd, 0, offset);
+}
+
/* Grab port: number up to delimiter */
static int get_port(const char *data, int start, size_t dlen, char delim,
__be16 *port)
@@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim,
/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
- char term)
+ char term, unsigned int *offset)
{
char delim;
int length;
@@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
/* Returns 0, or length of numbers: |||6446| */
static int try_epsv_response(const char *data, size_t dlen,
- struct nf_conntrack_man *cmd, char term)
+ struct nf_conntrack_man *cmd, char term,
+ unsigned int *offset)
{
char delim;
@@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen,
unsigned int *numlen,
struct nf_conntrack_man *cmd,
int (*getnum)(const char *, size_t,
- struct nf_conntrack_man *, char))
+ struct nf_conntrack_man *, char,
+ unsigned int *))
{
- size_t i;
+ size_t i = plen;
pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
if (dlen == 0)
@@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen,
pr_debug("Pattern matches!\n");
/* Now we've found the constant string, try to skip
to the 'skip' character */
- for (i = plen; data[i] != skip; i++)
- if (i == dlen - 1) return -1;
+ if (skip) {
+ for (i = plen; data[i] != skip; i++)
+ if (i == dlen - 1) return -1;
- /* Skip over the last character */
- i++;
+ /* Skip over the last character */
+ i++;
+ }
pr_debug("Skipped up to `%c'!\n", skip);
*numoff = i;
- *numlen = getnum(data + i, dlen - i, cmd, term);
+ *numlen = getnum(data + i, dlen - i, cmd, term, numoff);
if (!*numlen)
return -1;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 388656d5a9e..4b60a87b759 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -148,7 +148,7 @@ void nf_log_packet(struct net *net,
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
va_end(args);
- logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
+ logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
}
rcu_read_unlock();
}
@@ -368,17 +368,18 @@ static int __net_init nf_log_net_init(struct net *net)
return 0;
out_sysctl:
- /* For init_net: errors will trigger panic, don't unroll on error. */
- if (!net_eq(net, &init_net))
- remove_proc_entry("nf_log", net->nf.proc_netfilter);
-
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("nf_log", net->nf.proc_netfilter);
+#endif
return ret;
}
static void __net_exit nf_log_net_exit(struct net *net)
{
netfilter_log_sysctl_exit(net);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("nf_log", net->nf.proc_netfilter);
+#endif
}
static struct pernet_operations nf_log_net_ops = {
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 5fea563afe3..85e20a91908 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -104,7 +104,7 @@ static void mangle_contents(struct sk_buff *skb,
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
- skb->tail - (skb->network_header + dataoff +
+ skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff +
match_offset + match_len));
/* insert data from buffer */
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index faf1e9300d8..962e9792e31 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -602,7 +602,8 @@ static struct nf_loginfo default_loginfo = {
/* log handler for internal netfilter logging api */
void
-nfulnl_log_packet(u_int8_t pf,
+nfulnl_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -615,7 +616,6 @@ nfulnl_log_packet(u_int8_t pf,
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
- struct net *net = dev_net(in ? in : out);
struct nfnl_log_net *log = nfnl_log_pernet(net);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
@@ -1045,7 +1045,9 @@ static int __net_init nfnl_log_net_init(struct net *net)
static void __net_exit nfnl_log_net_exit(struct net *net)
{
+#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
+#endif
}
static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 2e0e835baf7..c011543bff5 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -41,6 +41,14 @@
#define NFQNL_QMAX_DEFAULT 1024
+/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
+ * includes the header length. Thus, the maximum packet length that we
+ * support is 65531 bytes. We send truncated packets if the specified length
+ * is larger than that. Userspace can check for presence of NFQA_CAP_LEN
+ * attribute to detect truncation.
+ */
+#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
+
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
@@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
inst->queue_num = queue_num;
inst->peer_portid = portid;
inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
- inst->copy_range = 0xffff;
+ inst->copy_range = NFQNL_MAX_COPY_RANGE;
inst->copy_mode = NFQNL_COPY_NONE;
spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list);
@@ -333,10 +341,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
return NULL;
data_len = ACCESS_ONCE(queue->copy_range);
- if (data_len == 0 || data_len > entskb->len)
+ if (data_len > entskb->len)
data_len = entskb->len;
-
if (!entskb->head_frag ||
skb_headlen(entskb) < L1_CACHE_BYTES ||
skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
@@ -465,7 +472,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
- if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+ if (cap_len > data_len &&
+ nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
goto nla_put_failure;
if (nfqnl_put_packet_info(skb, entskb))
@@ -509,10 +517,6 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
}
spin_lock_bh(&queue->lock);
- if (!queue->peer_portid) {
- err = -EINVAL;
- goto err_out_free_nskb;
- }
if (queue->queue_total >= queue->queue_maxlen) {
if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
failopen = 1;
@@ -731,13 +735,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
case NFQNL_COPY_PACKET:
queue->copy_mode = mode;
- /* We're using struct nlattr which has 16bit nla_len. Note that
- * nla_len includes the header length. Thus, the maximum packet
- * length that we support is 65531 bytes. We send truncated
- * packets if the specified length is larger than that.
- */
- if (range > 0xffff - NLA_HDRLEN)
- queue->copy_range = 0xffff - NLA_HDRLEN;
+ if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
+ queue->copy_range = NFQNL_MAX_COPY_RANGE;
else
queue->copy_range = range;
break;
@@ -800,7 +799,7 @@ static int
nfqnl_rcv_dev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
/* Drop any packets associated with the downed device */
if (event == NETDEV_DOWN)
@@ -1285,7 +1284,9 @@ static int __net_init nfnl_queue_net_init(struct net *net)
static void __net_exit nfnl_queue_net_exit(struct net *net)
{
+#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
+#endif
}
static struct pernet_operations nfnl_queue_net_ops = {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index a60261cb0e8..da35ac06a97 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
if (skb->nfct != NULL)
return XT_CONTINUE;
+ /* special case the untracked ct : we want the percpu object */
+ if (!ct)
+ ct = nf_ct_untracked_get();
atomic_inc(&ct->ct_general.use);
skb->nfct = &ct->ct_general;
skb->nfctinfo = IP_CT_NEW;
@@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
int ret = -EOPNOTSUPP;
if (info->flags & XT_CT_NOTRACK) {
- ct = nf_ct_untracked_get();
- atomic_inc(&ct->ct_general.use);
+ ct = NULL;
goto out;
}
@@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
struct nf_conn *ct = info->ct;
struct nf_conn_help *help;
- if (!nf_ct_is_untracked(ct)) {
+ if (ct && !nf_ct_is_untracked(ct)) {
help = nfct_help(ct);
if (help)
module_put(help->helper->me);
@@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par,
nf_ct_l3proto_module_put(par->family);
xt_ct_destroy_timeout(ct);
+ nf_ct_put(info->ct);
}
- nf_ct_put(info->ct);
}
static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fe573f6c9e9..5ab24843370 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -466,7 +466,8 @@ log_packet_common(struct sbuff *m,
static void
-ipt_log_packet(u_int8_t pf,
+ipt_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -475,7 +476,6 @@ ipt_log_packet(u_int8_t pf,
const char *prefix)
{
struct sbuff *m;
- struct net *net = dev_net(in ? in : out);
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net))
@@ -737,7 +737,7 @@ static void dump_ipv6_packet(struct sbuff *m,
dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
- if (!recurse && skb->mark)
+ if (recurse && skb->mark)
sb_add(m, "MARK=0x%x ", skb->mark);
}
@@ -797,7 +797,8 @@ fallback:
}
static void
-ip6t_log_packet(u_int8_t pf,
+ip6t_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -806,7 +807,6 @@ ip6t_log_packet(u_int8_t pf,
const char *prefix)
{
struct sbuff *m;
- struct net *net = dev_net(in ? in : out);
/* FIXME: Disabled from containers until syslog ns is supported */
if (!net_eq(net, &init_net))
@@ -833,17 +833,18 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
if (par->family == NFPROTO_IPV4)
- ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in,
+ ipt_log_packet(net, NFPROTO_IPV4, par->hooknum, skb, par->in,
par->out, &li, loginfo->prefix);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (par->family == NFPROTO_IPV6)
- ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in,
+ ip6t_log_packet(net, NFPROTO_IPV6, par->hooknum, skb, par->in,
par->out, &li, loginfo->prefix);
#endif
else
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a17dd0f589b..fb7497c928a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -26,13 +26,14 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nfulnl_log_packet(par->family, par->hooknum, skb, par->in,
+ nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
par->out, &li, info->prefix);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 25fd1c4e1ee..1eb1a44bfd3 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -30,17 +30,28 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
static unsigned int
tcpoptstrip_mangle_packet(struct sk_buff *skb,
- const struct xt_tcpoptstrip_target_info *info,
+ const struct xt_action_param *par,
unsigned int tcphoff, unsigned int minlen)
{
+ const struct xt_tcpoptstrip_target_info *info = par->targinfo;
unsigned int optl, i, j;
struct tcphdr *tcph;
u_int16_t n, o;
u_int8_t *opt;
+ int len;
+
+ /* This is a fragment, no TCP header is available */
+ if (par->fragoff != 0)
+ return XT_CONTINUE;
if (!skb_make_writable(skb, skb->len))
return NF_DROP;
+ len = skb->len - tcphoff;
+ if (len < (int)sizeof(struct tcphdr) ||
+ tcp_hdr(skb)->doff * 4 > len)
+ return NF_DROP;
+
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
opt = (u_int8_t *)tcph;
@@ -76,7 +87,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
static unsigned int
tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
- return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
+ return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb),
sizeof(struct iphdr) + sizeof(struct tcphdr));
}
@@ -94,7 +105,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (tcphoff < 0)
return NF_DROP;
- return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff,
+ return tcpoptstrip_mangle_packet(skb, par, tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
}
#endif
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index bd93e51d30a..292934d2348 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
static int tee_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct xt_tee_priv *priv;
priv = container_of(this, struct xt_tee_priv, notifier);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 49c5ff7f6dd..68ff29f6086 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -22,6 +22,7 @@
#include <net/ip6_fib.h>
#endif
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/xt_addrtype.h>
#include <linux/netfilter/x_tables.h>
@@ -33,12 +34,12 @@ MODULE_ALIAS("ip6t_addrtype");
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
- const struct in6_addr *addr)
+ const struct in6_addr *addr, u16 mask)
{
const struct nf_afinfo *afinfo;
struct flowi6 flow;
struct rt6_info *rt;
- u32 ret;
+ u32 ret = 0;
int route_err;
memset(&flow, 0, sizeof(flow));
@@ -49,12 +50,19 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
rcu_read_lock();
afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (afinfo != NULL)
+ if (afinfo != NULL) {
+ const struct nf_ipv6_ops *v6ops;
+
+ if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+ v6ops = nf_get_ipv6_ops();
+ if (v6ops && v6ops->chk_addr(net, addr, dev, true))
+ ret = XT_ADDRTYPE_LOCAL;
+ }
route_err = afinfo->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), !!dev);
- else
+ flowi6_to_flowi(&flow), false);
+ } else {
route_err = 1;
-
+ }
rcu_read_unlock();
if (route_err)
@@ -62,15 +70,12 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (rt->rt6i_flags & RTF_REJECT)
ret = XT_ADDRTYPE_UNREACHABLE;
- else
- ret = 0;
- if (rt->rt6i_flags & RTF_LOCAL)
+ if (dev == NULL && rt->rt6i_flags & RTF_LOCAL)
ret |= XT_ADDRTYPE_LOCAL;
if (rt->rt6i_flags & RTF_ANYCAST)
ret |= XT_ADDRTYPE_ANYCAST;
-
dst_release(&rt->dst);
return ret;
}
@@ -90,7 +95,7 @@ static bool match_type6(struct net *net, const struct net_device *dev,
if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
XT_ADDRTYPE_UNREACHABLE) & mask)
- return !!(mask & match_lookup_rt6(net, dev, addr));
+ return !!(mask & match_lookup_rt6(net, dev, addr, mask));
return true;
}
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index ed0db15ab00..7720b036d76 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -18,7 +18,7 @@ static bool
xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_rateest_match_info *info = par->matchinfo;
- struct gnet_stats_rate_est *r;
+ struct gnet_stats_rate_est64 *r;
u_int32_t bps1, bps2, pps1, pps2;
bool ret = true;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 63b2bdb59e9..02704245710 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -107,7 +107,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr _hdr, *hp = NULL;
- struct sock *sk;
+ struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -155,9 +155,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
}
#endif
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
- if (sk != NULL) {
+ if (!sk)
+ sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+ saddr, daddr, sport, dport,
+ par->in, NFT_LOOKUP_ANY);
+ if (sk) {
bool wildcard;
bool transparent = true;
@@ -173,7 +175,8 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
(sk->sk_state == TCP_TIME_WAIT &&
inet_twsk(sk)->tw_transparent));
- xt_socket_put_sk(sk);
+ if (sk != skb->sk)
+ xt_socket_put_sk(sk);
if (wildcard || !transparent)
sk = NULL;
@@ -260,7 +263,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
struct udphdr _hdr, *hp = NULL;
- struct sock *sk;
+ struct sock *sk = skb->sk;
struct in6_addr *daddr = NULL, *saddr = NULL;
__be16 uninitialized_var(dport), uninitialized_var(sport);
int thoff = 0, uninitialized_var(tproto);
@@ -291,9 +294,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
- if (sk != NULL) {
+ if (!sk)
+ sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ saddr, daddr, sport, dport,
+ par->in, NFT_LOOKUP_ANY);
+ if (sk) {
bool wildcard;
bool transparent = true;
@@ -309,7 +314,8 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
(sk->sk_state == TCP_TIME_WAIT &&
inet_twsk(sk)->tw_transparent));
- xt_socket_put_sk(sk);
+ if (sk != skb->sk)
+ xt_socket_put_sk(sk);
if (wildcard || !transparent)
sk = NULL;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index d8d42433755..6bb1d42f0fa 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -245,6 +245,71 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
}
}
+/**
+ * netlbl_domhsh_validate - Validate a new domain mapping entry
+ * @entry: the entry to validate
+ *
+ * This function validates the new domain mapping entry to ensure that it is
+ * a valid entry. Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry)
+{
+ struct netlbl_af4list *iter4;
+ struct netlbl_domaddr4_map *map4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct netlbl_af6list *iter6;
+ struct netlbl_domaddr6_map *map6;
+#endif /* IPv6 */
+
+ if (entry == NULL)
+ return -EINVAL;
+
+ switch (entry->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ if (entry->type_def.cipsov4 != NULL ||
+ entry->type_def.addrsel != NULL)
+ return -EINVAL;
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (entry->type_def.cipsov4 == NULL)
+ return -EINVAL;
+ break;
+ case NETLBL_NLTYPE_ADDRSELECT:
+ netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) {
+ map4 = netlbl_domhsh_addr4_entry(iter4);
+ switch (map4->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ if (map4->type_def.cipsov4 != NULL)
+ return -EINVAL;
+ break;
+ case NETLBL_NLTYPE_CIPSOV4:
+ if (map4->type_def.cipsov4 == NULL)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) {
+ map6 = netlbl_domhsh_addr6_entry(iter6);
+ switch (map6->type) {
+ case NETLBL_NLTYPE_UNLABELED:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+#endif /* IPv6 */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
* Domain Hash Table Functions
*/
@@ -311,6 +376,10 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
struct netlbl_af6list *tmp6;
#endif /* IPv6 */
+ ret_val = netlbl_domhsh_validate(entry);
+ if (ret_val != 0)
+ return ret_val;
+
/* XXX - we can remove this RCU read lock as the spinlock protects the
* entire function, but before we do we need to fixup the
* netlbl_af[4,6]list RCU functions to do "the right thing" with
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 8a6c6ea466d..af3531926ee 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -708,7 +708,7 @@ unlhsh_remove_return:
* netlbl_unlhsh_netdev_handler - Network device notification handler
* @this: notifier block
* @event: the event
- * @ptr: the network device (cast to void)
+ * @ptr: the netdevice notifier info (cast to void)
*
* Description:
* Handle network device events, although at present all we care about is a
@@ -717,10 +717,9 @@ unlhsh_remove_return:
*
*/
static int netlbl_unlhsh_netdev_handler(struct notifier_block *this,
- unsigned long event,
- void *ptr)
+ unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netlbl_unlhsh_iface *iface = NULL;
if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 12ac6b47a35..9b6b115e008 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -747,9 +747,13 @@ static void netlink_skb_destructor(struct sk_buff *skb)
atomic_dec(&ring->pending);
sock_put(sk);
- skb->data = NULL;
+ skb->head = NULL;
}
#endif
+ if (is_vmalloc_addr(skb->head)) {
+ vfree(skb->head);
+ skb->head = NULL;
+ }
if (skb->sk != NULL)
sock_rfree(skb);
}
@@ -854,16 +858,23 @@ netlink_unlock_table(void)
wake_up(&nl_table_wait);
}
+static bool netlink_compare(struct net *net, struct sock *sk)
+{
+ return net_eq(sock_net(sk), net);
+}
+
static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
- struct nl_portid_hash *hash = &nl_table[protocol].hash;
+ struct netlink_table *table = &nl_table[protocol];
+ struct nl_portid_hash *hash = &table->hash;
struct hlist_head *head;
struct sock *sk;
read_lock(&nl_table_lock);
head = nl_portid_hashfn(hash, portid);
sk_for_each(sk, head) {
- if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
+ if (table->compare(net, sk) &&
+ (nlk_sk(sk)->portid == portid)) {
sock_hold(sk);
goto found;
}
@@ -976,7 +987,8 @@ netlink_update_listeners(struct sock *sk)
static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
{
- struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ struct netlink_table *table = &nl_table[sk->sk_protocol];
+ struct nl_portid_hash *hash = &table->hash;
struct hlist_head *head;
int err = -EADDRINUSE;
struct sock *osk;
@@ -986,7 +998,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
head = nl_portid_hashfn(hash, portid);
len = 0;
sk_for_each(osk, head) {
- if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
+ if (table->compare(net, osk) &&
+ (nlk_sk(osk)->portid == portid))
break;
len++;
}
@@ -1161,6 +1174,7 @@ static int netlink_release(struct socket *sock)
kfree_rcu(old, rcu);
nl_table[sk->sk_protocol].module = NULL;
nl_table[sk->sk_protocol].bind = NULL;
+ nl_table[sk->sk_protocol].compare = NULL;
nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
@@ -1183,7 +1197,8 @@ static int netlink_autobind(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ struct netlink_table *table = &nl_table[sk->sk_protocol];
+ struct nl_portid_hash *hash = &table->hash;
struct hlist_head *head;
struct sock *osk;
s32 portid = task_tgid_vnr(current);
@@ -1195,7 +1210,7 @@ retry:
netlink_table_grab();
head = nl_portid_hashfn(hash, portid);
sk_for_each(osk, head) {
- if (!net_eq(sock_net(osk), net))
+ if (!table->compare(net, osk))
continue;
if (nlk_sk(osk)->portid == portid) {
/* Bind collision, search negative portid values. */
@@ -1420,6 +1435,35 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock;
}
+static struct sk_buff *netlink_alloc_large_skb(unsigned int size)
+{
+ struct sk_buff *skb;
+ void *data;
+
+ if (size <= NLMSG_GOODSIZE)
+ return alloc_skb(size, GFP_KERNEL);
+
+ skb = alloc_skb_head(GFP_KERNEL);
+ if (skb == NULL)
+ return NULL;
+
+ data = vmalloc(size);
+ if (data == NULL)
+ goto err;
+
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
+ skb->len = 0;
+ skb->destructor = netlink_skb_destructor;
+
+ return skb;
+err:
+ kfree_skb(skb);
+ return NULL;
+}
+
/*
* Attach a skb to a netlink socket.
* The caller must hold a reference to the destination socket. On error, the
@@ -1510,7 +1554,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
return skb;
delta = skb->end - skb->tail;
- if (delta * 2 < skb->truesize)
+ if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
return skb;
if (skb_shared(skb)) {
@@ -2096,7 +2140,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (len > sk->sk_sndbuf - 32)
goto out;
err = -ENOBUFS;
- skb = alloc_skb(len, GFP_KERNEL);
+ skb = netlink_alloc_large_skb(len);
if (skb == NULL)
goto out;
@@ -2282,9 +2326,12 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
rcu_assign_pointer(nl_table[unit].listeners, listeners);
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
+ nl_table[unit].compare = netlink_compare;
if (cfg) {
nl_table[unit].bind = cfg->bind;
nl_table[unit].flags = cfg->flags;
+ if (cfg->compare)
+ nl_table[unit].compare = cfg->compare;
}
nl_table[unit].registered = 1;
} else {
@@ -2707,6 +2754,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock *s;
struct nl_seq_iter *iter;
+ struct net *net;
int i, j;
++*pos;
@@ -2714,11 +2762,12 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (v == SEQ_START_TOKEN)
return netlink_seq_socket_idx(seq, 0);
+ net = seq_file_net(seq);
iter = seq->private;
s = v;
do {
s = sk_next(s);
- } while (s && sock_net(s) != seq_file_net(seq));
+ } while (s && !nl_table[s->sk_protocol].compare(net, s));
if (s)
return s;
@@ -2730,7 +2779,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
for (; j <= hash->mask; j++) {
s = sk_head(&hash->table[j]);
- while (s && sock_net(s) != seq_file_net(seq))
+
+ while (s && !nl_table[s->sk_protocol].compare(net, s))
s = sk_next(s);
if (s) {
iter->link = i;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index ed8522265f4..eaa88d187cd 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -73,6 +73,7 @@ struct netlink_table {
struct mutex *cb_mutex;
struct module *module;
void (*bind)(int group);
+ bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ec0c80fde69..698814bfa7a 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev)
*/
static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index fb799deaed4..a76f4533cb6 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -5,7 +5,6 @@
obj-$(CONFIG_NFC) += nfc.o
obj-$(CONFIG_NFC_NCI) += nci/
obj-$(CONFIG_NFC_HCI) += hci/
-#obj-$(CONFIG_NFC_LLCP) += llcp/
nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \
llcp_sock.o
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index ef4feec6cd8..c3235675f35 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
struct ovs_net *ovs_net;
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct vport *vport = NULL;
if (!ovs_is_internal_dev(dev))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8ec1bca7f85..79fe63246b2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3331,10 +3331,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
}
-static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
+static int packet_notifier(struct notifier_block *this,
+ unsigned long msg, void *ptr)
{
struct sock *sk;
- struct net_device *dev = data;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
rcu_read_lock();
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 45a7df6575d..56a6146ac94 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev)
/* notify Phonet of device events */
static int phonet_device_notify(struct notifier_block *me, unsigned long what,
- void *arg)
+ void *ptr)
{
- struct net_device *dev = arg;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (what) {
case NETDEV_REGISTER:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9c834745159..e98fcfbe600 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev)
/*
* Handle device status changes.
*/
-static int rose_device_event(struct notifier_block *this, unsigned long event,
- void *ptr)
+static int rose_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5d676edc22a..977c10e0631 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -243,7 +243,7 @@ nla_put_failure:
static int mirred_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tcf_mirred *m;
if (event == NETDEV_UNREGISTER)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 823463adbd2..189e3c5b3d0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -231,14 +231,14 @@ override:
}
if (R_tab) {
police->rate_present = true;
- psched_ratecfg_precompute(&police->rate, R_tab->rate.rate);
+ psched_ratecfg_precompute(&police->rate, &R_tab->rate);
qdisc_put_rtab(R_tab);
} else {
police->rate_present = false;
}
if (P_tab) {
police->peak_present = true;
- psched_ratecfg_precompute(&police->peak, P_tab->rate.rate);
+ psched_ratecfg_precompute(&police->peak, &P_tab->rate);
qdisc_put_rtab(P_tab);
} else {
police->peak_present = false;
@@ -376,9 +376,9 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
};
if (police->rate_present)
- opt.rate.rate = psched_ratecfg_getrate(&police->rate);
+ psched_ratecfg_getrate(&opt.rate, &police->rate);
if (police->peak_present)
- opt.peakrate.rate = psched_ratecfg_getrate(&police->peak);
+ psched_ratecfg_getrate(&opt.peakrate, &police->peak);
if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
goto nla_put_failure;
if (police->tcfp_result &&
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bc210ffcba..71a56886255 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -130,7 +130,7 @@ struct cbq_class {
psched_time_t penalized;
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est rate_est;
+ struct gnet_stats_rate_est64 rate_est;
struct tc_cbq_xstats xstats;
struct tcf_proto *filter_list;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 759b308d1a8..8302717ea30 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est rate_est;
+ struct gnet_stats_rate_est64 rate_est;
struct list_head alist;
struct Qdisc *qdisc;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index eac7e0ee23c..20224086cc2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -898,14 +898,16 @@ void dev_shutdown(struct net_device *dev)
WARN_ON(timer_pending(&dev->watchdog_timer));
}
-void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate)
+void psched_ratecfg_precompute(struct psched_ratecfg *r,
+ const struct tc_ratespec *conf)
{
u64 factor;
u64 mult;
int shift;
- r->rate_bps = (u64)rate << 3;
- r->shift = 0;
+ memset(r, 0, sizeof(*r));
+ r->overhead = conf->overhead;
+ r->rate_bps = (u64)conf->rate << 3;
r->mult = 1;
/*
* Calibrate mult, shift so that token counting is accurate
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 9facea03fae..c4075610502 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est rate_est;
+ struct gnet_stats_rate_est64 rate_est;
unsigned int level; /* class level in hierarchy */
struct tcf_proto *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 79b1876b6cd..162fb800754 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -78,7 +78,7 @@ struct htb_class {
/* general class parameters */
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est rate_est;
+ struct gnet_stats_rate_est64 rate_est;
struct tc_htb_xstats xstats; /* our special stats */
int refcnt; /* usage count of this class */
@@ -109,7 +109,7 @@ struct htb_class {
} un;
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
struct rb_node pq_node; /* node for event queue */
- psched_time_t pq_key;
+ s64 pq_key;
int prio_activity; /* for which prios are we active */
enum htb_cmode cmode; /* current mode of the class */
@@ -121,10 +121,10 @@ struct htb_class {
/* token bucket parameters */
struct psched_ratecfg rate;
struct psched_ratecfg ceil;
- s64 buffer, cbuffer; /* token bucket depth/rate */
- psched_tdiff_t mbuffer; /* max wait time */
- s64 tokens, ctokens; /* current number of tokens */
- psched_time_t t_c; /* checkpoint time */
+ s64 buffer, cbuffer; /* token bucket depth/rate */
+ s64 mbuffer; /* max wait time */
+ s64 tokens, ctokens; /* current number of tokens */
+ s64 t_c; /* checkpoint time */
};
struct htb_sched {
@@ -141,15 +141,15 @@ struct htb_sched {
struct rb_root wait_pq[TC_HTB_MAXDEPTH];
/* time of nearest event per level (row) */
- psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
+ s64 near_ev_cache[TC_HTB_MAXDEPTH];
int defcls; /* class where unclassified flows go to */
/* filters for qdisc itself */
struct tcf_proto *filter_list;
- int rate2quantum; /* quant = rate / rate2quantum */
- psched_time_t now; /* cached dequeue time */
+ int rate2quantum; /* quant = rate / rate2quantum */
+ s64 now; /* cached dequeue time */
struct qdisc_watchdog watchdog;
/* non shaped skbs; let them go directly thru */
@@ -664,8 +664,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
* next pending event (0 for no event in pq, q->now for too many events).
* Note: Applied are events whose have cl->pq_key <= q->now.
*/
-static psched_time_t htb_do_events(struct htb_sched *q, int level,
- unsigned long start)
+static s64 htb_do_events(struct htb_sched *q, int level,
+ unsigned long start)
{
/* don't run for longer than 2 jiffies; 2 is used instead of
* 1 to simplify things when jiffy is going to be incremented
@@ -857,7 +857,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
struct sk_buff *skb;
struct htb_sched *q = qdisc_priv(sch);
int level;
- psched_time_t next_event;
+ s64 next_event;
unsigned long start_at;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
@@ -880,7 +880,7 @@ ok:
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
- psched_time_t event;
+ s64 event;
if (q->now >= q->near_ev_cache[level]) {
event = htb_do_events(q, level, start_at);
@@ -1090,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
memset(&opt, 0, sizeof(opt));
- opt.rate.rate = psched_ratecfg_getrate(&cl->rate);
+ psched_ratecfg_getrate(&opt.rate, &cl->rate);
opt.buffer = PSCHED_NS2TICKS(cl->buffer);
- opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil);
+ psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
opt.quantum = cl->quantum;
opt.prio = cl->prio;
@@ -1117,8 +1117,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (!cl->level && cl->un.leaf.q)
cl->qstats.qlen = cl->un.leaf.q->q.qlen;
- cl->xstats.tokens = cl->tokens;
- cl->xstats.ctokens = cl->ctokens;
+ cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
+ cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
@@ -1200,7 +1200,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
- parent->t_c = psched_get_time();
+ parent->t_c = ktime_to_ns(ktime_get());
parent->cmode = HTB_CAN_SEND;
}
@@ -1417,8 +1417,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* set class to be in HTB_CAN_SEND state */
cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
- cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
- cl->t_c = psched_get_time();
+ cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
+ cl->t_c = ktime_to_ns(ktime_get());
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
@@ -1459,8 +1459,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->prio = TC_HTB_NUMPRIO - 1;
}
- psched_ratecfg_precompute(&cl->rate, hopt->rate.rate);
- psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate);
+ psched_ratecfg_precompute(&cl->rate, &hopt->rate);
+ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d51852bba01..7c195d972bf 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -138,7 +138,7 @@ struct qfq_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est rate_est;
+ struct gnet_stats_rate_est64 rate_est;
struct Qdisc *qdisc;
struct list_head alist; /* Link for active-classes list. */
struct qfq_aggregate *agg; /* Parent aggregate. */
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index c8388f3c342..1aaf1b6e51a 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -116,14 +116,57 @@ struct tbf_sched_data {
struct qdisc_watchdog watchdog; /* Watchdog timer */
};
+
+/* GSO packet is too big, segment it so that tbf can transmit
+ * each segment in time
+ */
+static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct tbf_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *segs, *nskb;
+ netdev_features_t features = netif_skb_features(skb);
+ int ret, nb;
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+ if (IS_ERR_OR_NULL(segs))
+ return qdisc_reshape_fail(skb, sch);
+
+ nb = 0;
+ while (segs) {
+ nskb = segs->next;
+ segs->next = NULL;
+ if (likely(segs->len <= q->max_size)) {
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ ret = qdisc_enqueue(segs, q->qdisc);
+ } else {
+ ret = qdisc_reshape_fail(skb, sch);
+ }
+ if (ret != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
+ } else {
+ nb++;
+ }
+ segs = nskb;
+ }
+ sch->q.qlen += nb;
+ if (nb > 1)
+ qdisc_tree_decrease_qlen(sch, 1 - nb);
+ consume_skb(skb);
+ return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+}
+
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
- if (qdisc_pkt_len(skb) > q->max_size)
+ if (qdisc_pkt_len(skb) > q->max_size) {
+ if (skb_is_gso(skb))
+ return tbf_segment(skb, sch);
return qdisc_reshape_fail(skb, sch);
-
+ }
ret = qdisc_enqueue(skb, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
@@ -298,9 +341,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->tokens = q->buffer;
q->ptokens = q->mtu;
- psched_ratecfg_precompute(&q->rate, rtab->rate.rate);
+ psched_ratecfg_precompute(&q->rate, &rtab->rate);
if (ptab) {
- psched_ratecfg_precompute(&q->peak, ptab->rate.rate);
+ psched_ratecfg_precompute(&q->peak, &ptab->rate);
q->peak_present = true;
} else {
q->peak_present = false;
@@ -350,9 +393,9 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
opt.limit = q->limit;
- opt.rate.rate = psched_ratecfg_getrate(&q->rate);
+ psched_ratecfg_getrate(&opt.rate, &q->rate);
if (q->peak_present)
- opt.peakrate.rate = psched_ratecfg_getrate(&q->peak);
+ psched_ratecfg_getrate(&opt.peakrate, &q->peak);
else
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
opt.mtu = PSCHED_NS2TICKS(q->mtu);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 4b2c83146aa..6533d81a638 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
struct sctp_association *asoc = NULL;
struct sctp_transport *transport;
struct inet_sock *inet;
- sk_buff_data_t saveip, savesctp;
+ __u16 saveip, savesctp;
int err;
struct net *net = dev_net(skb->dev);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 391a245d520..8ee553b499c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -153,7 +153,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct sctp_association *asoc;
struct sctp_transport *transport;
struct ipv6_pinfo *np;
- sk_buff_data_t saveip, savesctp;
+ __be16 saveip, savesctp;
int err;
struct net *net = dev_net(skb->dev);
diff --git a/net/socket.c b/net/socket.c
index 6b94633ca61..21fd29f63ed 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,6 +104,12 @@
#include <linux/route.h>
#include <linux/sockios.h>
#include <linux/atalk.h>
+#include <net/ll_poll.h>
+
+#ifdef CONFIG_NET_LL_RX_POLL
+unsigned long sysctl_net_ll_poll __read_mostly;
+EXPORT_SYMBOL_GPL(sysctl_net_ll_poll);
+#endif
static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -1956,7 +1962,7 @@ struct used_address {
unsigned int name_len;
};
-static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address)
{
@@ -2071,22 +2077,30 @@ out:
* BSD sendmsg interface
*/
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
{
int fput_needed, err;
struct msghdr msg_sys;
- struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ struct socket *sock;
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
- err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
+ err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
fput_light(sock->file, fput_needed);
out:
return err;
}
+SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_sendmsg(fd, msg, flags);
+}
+
/*
* Linux sendmmsg interface
*/
@@ -2117,15 +2131,16 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
while (datagrams < vlen) {
if (MSG_CMSG_COMPAT & flags) {
- err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
- &msg_sys, flags, &used_address);
+ err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+ &msg_sys, flags, &used_address);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
- err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
- &msg_sys, flags, &used_address);
+ err = ___sys_sendmsg(sock,
+ (struct msghdr __user *)entry,
+ &msg_sys, flags, &used_address);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
@@ -2149,10 +2164,12 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags)
{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
return __sys_sendmmsg(fd, mmsg, vlen, flags);
}
-static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags, int nosec)
{
struct compat_msghdr __user *msg_compat =
@@ -2244,23 +2261,31 @@ out:
* BSD recvmsg interface
*/
-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
- unsigned int, flags)
+long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
{
int fput_needed, err;
struct msghdr msg_sys;
- struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ struct socket *sock;
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
- err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+ err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
fput_light(sock->file, fput_needed);
out:
return err;
}
+SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+ unsigned int, flags)
+{
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+ return __sys_recvmsg(fd, msg, flags);
+}
+
/*
* Linux recvmmsg interface
*/
@@ -2298,17 +2323,18 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
* No need to ask LSM for more than the first datagram.
*/
if (MSG_CMSG_COMPAT & flags) {
- err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
- &msg_sys, flags & ~MSG_WAITFORONE,
- datagrams);
+ err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
+ &msg_sys, flags & ~MSG_WAITFORONE,
+ datagrams);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
- err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
- &msg_sys, flags & ~MSG_WAITFORONE,
- datagrams);
+ err = ___sys_recvmsg(sock,
+ (struct msghdr __user *)entry,
+ &msg_sys, flags & ~MSG_WAITFORONE,
+ datagrams);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
@@ -2375,6 +2401,9 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
int datagrams;
struct timespec timeout_sys;
+ if (flags & MSG_CMSG_COMPAT)
+ return -EINVAL;
+
if (!timeout)
return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
@@ -2612,7 +2641,9 @@ static int __init sock_init(void)
*/
#ifdef CONFIG_NETFILTER
- netfilter_init();
+ err = netfilter_init();
+ if (err)
+ goto out;
#endif
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 7da6b457f66..fc2f78d6a9b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -52,6 +52,8 @@
#include <linux/sunrpc/gss_api.h>
#include <asm/uaccess.h>
+#include "../netns.h"
+
static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
@@ -85,8 +87,6 @@ struct gss_auth {
};
/* pipe_version >= 0 if and only if someone has a pipe open. */
-static int pipe_version = -1;
-static atomic_t pipe_users = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(pipe_version_lock);
static struct rpc_wait_queue pipe_version_rpc_waitqueue;
static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
@@ -266,24 +266,27 @@ struct gss_upcall_msg {
char databuf[UPCALL_BUF_LEN];
};
-static int get_pipe_version(void)
+static int get_pipe_version(struct net *net)
{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret;
spin_lock(&pipe_version_lock);
- if (pipe_version >= 0) {
- atomic_inc(&pipe_users);
- ret = pipe_version;
+ if (sn->pipe_version >= 0) {
+ atomic_inc(&sn->pipe_users);
+ ret = sn->pipe_version;
} else
ret = -EAGAIN;
spin_unlock(&pipe_version_lock);
return ret;
}
-static void put_pipe_version(void)
+static void put_pipe_version(struct net *net)
{
- if (atomic_dec_and_lock(&pipe_users, &pipe_version_lock)) {
- pipe_version = -1;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) {
+ sn->pipe_version = -1;
spin_unlock(&pipe_version_lock);
}
}
@@ -291,9 +294,10 @@ static void put_pipe_version(void)
static void
gss_release_msg(struct gss_upcall_msg *gss_msg)
{
+ struct net *net = rpc_net_ns(gss_msg->auth->client);
if (!atomic_dec_and_test(&gss_msg->count))
return;
- put_pipe_version();
+ put_pipe_version(net);
BUG_ON(!list_empty(&gss_msg->list));
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
@@ -439,7 +443,10 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
struct rpc_clnt *clnt,
const char *service_name)
{
- if (pipe_version == 0)
+ struct net *net = rpc_net_ns(clnt);
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (sn->pipe_version == 0)
gss_encode_v0_msg(gss_msg);
else /* pipe_version == 1 */
gss_encode_v1_msg(gss_msg, clnt, service_name);
@@ -455,7 +462,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
if (gss_msg == NULL)
return ERR_PTR(-ENOMEM);
- vers = get_pipe_version();
+ vers = get_pipe_version(rpc_net_ns(clnt));
if (vers < 0) {
kfree(gss_msg);
return ERR_PTR(vers);
@@ -559,24 +566,34 @@ out:
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
+ struct net *net = rpc_net_ns(gss_auth->client);
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
struct rpc_cred *cred = &gss_cred->gc_base;
struct gss_upcall_msg *gss_msg;
+ unsigned long timeout;
DEFINE_WAIT(wait);
- int err = 0;
+ int err;
dprintk("RPC: %s for uid %u\n",
__func__, from_kuid(&init_user_ns, cred->cr_uid));
retry:
+ err = 0;
+ /* Default timeout is 15s unless we know that gssd is not running */
+ timeout = 15 * HZ;
+ if (!sn->gssd_running)
+ timeout = HZ >> 2;
gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
err = wait_event_interruptible_timeout(pipe_version_waitqueue,
- pipe_version >= 0, 15*HZ);
- if (pipe_version < 0) {
+ sn->pipe_version >= 0, timeout);
+ if (sn->pipe_version < 0) {
+ if (err == 0)
+ sn->gssd_running = 0;
warn_gssd();
err = -EACCES;
}
- if (err)
+ if (err < 0)
goto out;
goto retry;
}
@@ -707,20 +724,22 @@ out:
static int gss_pipe_open(struct inode *inode, int new_version)
{
+ struct net *net = inode->i_sb->s_fs_info;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret = 0;
spin_lock(&pipe_version_lock);
- if (pipe_version < 0) {
+ if (sn->pipe_version < 0) {
/* First open of any gss pipe determines the version: */
- pipe_version = new_version;
+ sn->pipe_version = new_version;
rpc_wake_up(&pipe_version_rpc_waitqueue);
wake_up(&pipe_version_waitqueue);
- } else if (pipe_version != new_version) {
+ } else if (sn->pipe_version != new_version) {
/* Trying to open a pipe of a different version */
ret = -EBUSY;
goto out;
}
- atomic_inc(&pipe_users);
+ atomic_inc(&sn->pipe_users);
out:
spin_unlock(&pipe_version_lock);
return ret;
@@ -740,6 +759,7 @@ static int gss_pipe_open_v1(struct inode *inode)
static void
gss_pipe_release(struct inode *inode)
{
+ struct net *net = inode->i_sb->s_fs_info;
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
struct gss_upcall_msg *gss_msg;
@@ -758,7 +778,7 @@ restart:
}
spin_unlock(&pipe->lock);
- put_pipe_version();
+ put_pipe_version(net);
}
static void
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 871c73c9216..29b4ba93ab3 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1287,7 +1287,7 @@ static bool use_gss_proxy(struct net *net)
#ifdef CONFIG_PROC_FS
-static bool set_gss_proxy(struct net *net, int type)
+static int set_gss_proxy(struct net *net, int type)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret = 0;
@@ -1317,10 +1317,12 @@ static inline bool gssp_ready(struct sunrpc_net *sn)
return false;
}
-static int wait_for_gss_proxy(struct net *net)
+static int wait_for_gss_proxy(struct net *net, struct file *file)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ if (file->f_flags & O_NONBLOCK && !gssp_ready(sn))
+ return -EAGAIN;
return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
}
@@ -1362,7 +1364,7 @@ static ssize_t read_gssp(struct file *file, char __user *buf,
size_t len;
int ret;
- ret = wait_for_gss_proxy(net);
+ ret = wait_for_gss_proxy(net, file);
if (ret)
return ret;
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 7111a4c9113..74d948f5d5a 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -28,7 +28,11 @@ struct sunrpc_net {
wait_queue_head_t gssp_wq;
struct rpc_clnt *gssp_clnt;
int use_gss_proxy;
+ int pipe_version;
+ atomic_t pipe_users;
struct proc_dir_entry *use_gssp_proc;
+
+ unsigned int gssd_running;
};
extern int sunrpc_net_id;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a9129f8d707..e7ce4b3eb0b 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -216,11 +216,14 @@ rpc_destroy_inode(struct inode *inode)
static int
rpc_pipe_open(struct inode *inode, struct file *filp)
{
+ struct net *net = inode->i_sb->s_fs_info;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
int first_open;
int res = -ENXIO;
mutex_lock(&inode->i_mutex);
+ sn->gssd_running = 1;
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -1069,6 +1072,8 @@ void rpc_pipefs_init_net(struct net *net)
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
mutex_init(&sn->pipefs_sb_lock);
+ sn->gssd_running = 1;
+ sn->pipe_version = -1;
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f8529fc8e54..5356b120dbf 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -324,11 +324,17 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
* Note: If the task is ASYNC, and is being made runnable after sitting on an
* rpc_wait_queue, this must be called with the queue spinlock held to protect
* the wait queue operation.
+ * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(),
+ * which is needed to ensure that __rpc_execute() doesn't loop (due to the
+ * lockless RPC_IS_QUEUED() test) before we've had a chance to test
+ * the RPC_TASK_RUNNING flag.
*/
static void rpc_make_runnable(struct rpc_task *task)
{
+ bool need_wakeup = !rpc_test_and_set_running(task);
+
rpc_clear_queued(task);
- if (rpc_test_and_set_running(task))
+ if (!need_wakeup)
return;
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index c3f9e1ef7f5..06bdf5a1082 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -810,11 +810,15 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
goto badcred;
argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
-
+ /*
+ * Note: we skip uid_valid()/gid_valid() checks here for
+ * backwards compatibility with clients that use -1 id's.
+ * Instead, -1 uid or gid is later mapped to the
+ * (export-specific) anonymous id by nfsd_setuser.
+ * Supplementary gid's will be left alone.
+ */
cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
- if (!uid_valid(cred->cr_uid) || !gid_valid(cred->cr_gid))
- goto badcred;
slen = svc_getnl(argv); /* gids length */
if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
@@ -823,8 +827,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
return SVC_CLOSE;
for (i = 0; i < slen; i++) {
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
- if (!gid_valid(kgid))
- goto badcred;
GROUP_AT(cred->cr_group_info, i) = kgid;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 120a676a336..fc60bea6316 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -251,9 +251,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr)
* specified device.
*/
static int recv_notification(struct notifier_block *nb, unsigned long evt,
- void *dv)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *)dv;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct eth_bearer *eb_ptr = &eth_bearers[0];
struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index 2a2864c25e1..baa9df4327d 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -244,9 +244,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr)
* specified device.
*/
static int recv_notification(struct notifier_block *nb, unsigned long evt,
- void *dv)
+ void *ptr)
{
- struct net_device *dev = (struct net_device *)dv;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct ib_bearer *ib_ptr = &ib_bearers[0];
struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 41cec1776f4..e4df7749022 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -775,10 +775,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
}
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
- unsigned long state,
- void *ndev)
+ unsigned long state, void *ptr)
{
- struct net_device *dev = ndev;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev;
int ret;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 37ca9694aab..1d964e23853 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev)
static int x25_device_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct x25_neigh *nb;
if (!net_eq(dev_net(dev), &init_net))
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index bcfda8921b5..eb4a8428864 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -64,6 +64,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
+ err = -EINVAL;
goto error;
}
@@ -88,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
err = x->type->output(x, skb);
if (err == -EINPROGRESS)
- goto out_exit;
+ goto out;
resume:
if (err) {
@@ -106,15 +107,14 @@ resume:
x = dst->xfrm;
} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
- err = 0;
+ return 0;
-out_exit:
- return err;
error:
spin_unlock_bh(&x->lock);
error_nolock:
kfree_skb(skb);
- goto out_exit;
+out:
+ return err;
}
int xfrm_output_resume(struct sk_buff *skb, int err)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 23cea0f7433..e52cab3591d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2557,11 +2557,12 @@ static void __xfrm_garbage_collect(struct net *net)
}
}
-static void xfrm_garbage_collect(struct net *net)
+void xfrm_garbage_collect(struct net *net)
{
flow_cache_flush();
__xfrm_garbage_collect(net);
}
+EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
@@ -2784,7 +2785,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct net_device *dev = ptr;
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
case NETDEV_DOWN:
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index aa778748c56..3f565e495ac 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1681,6 +1681,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
out:
xfrm_pol_put(xp);
+ if (delete && err == 0)
+ xfrm_garbage_collect(net);
return err;
}