summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c10
-rw-r--r--net/8021q/vlan.h22
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/8021q/vlan_dev.c197
-rw-r--r--net/8021q/vlan_netlink.c20
-rw-r--r--net/8021q/vlanproc.c5
-rw-r--r--net/atm/br2684.c2
-rw-r--r--net/atm/clip.c3
-rw-r--r--net/atm/lec.c3
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bluetooth/Makefile2
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_fdb.c15
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_if.c7
-rw-r--r--net/bridge/br_input.c10
-rw-r--r--net/bridge/br_multicast.c78
-rw-r--r--net/bridge/br_netfilter.c22
-rw-r--r--net/bridge/br_netlink.c10
-rw-r--r--net/bridge/br_notify.c6
-rw-r--r--net/bridge/br_private.h21
-rw-r--r--net/bridge/br_stp_bpdu.c8
-rw-r--r--net/bridge/netfilter/ebtable_broute.c3
-rw-r--r--net/bridge/netfilter/ebtables.c11
-rw-r--r--net/caif/Makefile8
-rw-r--r--net/caif/caif_config_util.c13
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_socket.c45
-rw-r--r--net/caif/cfcnfg.c17
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/caif/cfdbgl.c14
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/can/Makefile6
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/core/dev.c79
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/filter.c373
-rw-r--r--net/core/net-sysfs.c16
-rw-r--r--net/core/pktgen.c43
-rw-r--r--net/core/rtnetlink.c156
-rw-r--r--net/core/sock.c14
-rw-r--r--net/core/timestamping.c2
-rw-r--r--net/dccp/ackvec.c616
-rw-r--r--net/dccp/ackvec.h151
-rw-r--r--net/dccp/ccids/ccid2.c143
-rw-r--r--net/dccp/ccids/ccid2.h2
-rw-r--r--net/dccp/dccp.h11
-rw-r--r--net/dccp/input.c33
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/options.c100
-rw-r--r--net/dccp/output.c15
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/dn_dev.c100
-rw-r--r--net/decnet/dn_fib.c6
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/decnet/dn_route.c94
-rw-r--r--net/decnet/dn_rules.c2
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/dns_resolver/Makefile2
-rw-r--r--net/econet/Makefile2
-rw-r--r--net/ipv4/af_inet.c18
-rw-r--r--net/ipv4/arp.c12
-rw-r--r--net/ipv4/devinet.c75
-rw-r--r--net/ipv4/fib_frontend.c28
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/icmp.c35
-rw-r--r--net/ipv4/igmp.c286
-rw-r--r--net/ipv4/inet_connection_sock.c15
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/ip_gre.c43
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ipip.c20
-rw-r--r--net/ipv4/ipmr.c20
-rw-r--r--net/ipv4/netfilter.c8
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/route.c100
-rw-r--r--net/ipv4/syncookies.c15
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c22
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/udp.c20
-rw-r--r--net/ipv4/xfrm4_policy.c47
-rw-r--r--net/ipv6/addrconf.c166
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c32
-rw-r--r--net/ipv6/sit.c14
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/irda/ircomm/Makefile4
-rw-r--r--net/irda/irlan/Makefile2
-rw-r--r--net/irda/irnet/Makefile2
-rw-r--r--net/irda/irttp.c30
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/l2tp/l2tp_ip.c12
-rw-r--r--net/lapb/Makefile2
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c42
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/xt_TEE.c12
-rw-r--r--net/packet/af_packet.c94
-rw-r--r--net/phonet/Makefile4
-rw-r--r--net/rds/Makefile8
-rw-r--r--net/rds/loop.c4
-rw-r--r--net/rds/message.c4
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/rxrpc/Makefile4
-rw-r--r--net/rxrpc/ar-peer.c10
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_cgroup.c2
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/socket.c11
-rw-r--r--net/sunrpc/auth_gss/Makefile4
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/unix/af_unix.c34
-rw-r--r--net/wanrouter/Makefile2
-rw-r--r--net/x25/af_x25.c39
-rw-r--r--net/x25/x25_facilities.c20
-rw-r--r--net/x25/x25_in.c2
140 files changed, 2146 insertions, 1981 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 52077ca2207..dc1071327d8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -272,13 +272,11 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
}
- new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name,
- vlan_setup, real_dev->num_tx_queues);
+ new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
if (new_dev == NULL)
return -ENOBUFS;
- netif_copy_real_num_queues(new_dev, real_dev);
dev_net_set(new_dev, net);
/* need 4 bytes for extra VLAN header info,
* hope the underlying device can handle it.
@@ -334,6 +332,12 @@ static void vlan_transfer_features(struct net_device *dev,
vlandev->features &= ~dev->vlan_features;
vlandev->features |= dev->features & dev->vlan_features;
vlandev->gso_max_size = dev->gso_max_size;
+
+ if (dev->features & NETIF_F_HW_VLAN_TX)
+ vlandev->hard_header_len = dev->hard_header_len;
+ else
+ vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
+
#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
#endif
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index db01b3181fd..5687c9b95f3 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -19,19 +19,25 @@ struct vlan_priority_tci_mapping {
/**
- * struct vlan_rx_stats - VLAN percpu rx stats
+ * struct vlan_pcpu_stats - VLAN percpu rx/tx stats
* @rx_packets: number of received packets
* @rx_bytes: number of received bytes
* @rx_multicast: number of received multicast packets
+ * @tx_packets: number of transmitted packets
+ * @tx_bytes: number of transmitted bytes
* @syncp: synchronization point for 64bit counters
- * @rx_errors: number of errors
+ * @rx_errors: number of rx errors
+ * @tx_dropped: number of tx drops
*/
-struct vlan_rx_stats {
+struct vlan_pcpu_stats {
u64 rx_packets;
u64 rx_bytes;
u64 rx_multicast;
+ u64 tx_packets;
+ u64 tx_bytes;
struct u64_stats_sync syncp;
- unsigned long rx_errors;
+ u32 rx_errors;
+ u32 tx_dropped;
};
/**
@@ -45,9 +51,7 @@ struct vlan_rx_stats {
* @real_dev: underlying netdevice
* @real_dev_addr: address of underlying netdevice
* @dent: proc dir entry
- * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX
- * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX
- * @vlan_rx_stats: ptr to percpu rx stats
+ * @vlan_pcpu_stats: ptr to percpu rx stats
*/
struct vlan_dev_info {
unsigned int nr_ingress_mappings;
@@ -62,9 +66,7 @@ struct vlan_dev_info {
unsigned char real_dev_addr[ETH_ALEN];
struct proc_dir_entry *dent;
- unsigned long cnt_inc_headroom_on_tx;
- unsigned long cnt_encap_on_xmit;
- struct vlan_rx_stats __percpu *vlan_rx_stats;
+ struct vlan_pcpu_stats __percpu *vlan_pcpu_stats;
};
static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 69b2f79800a..ce8e3ab3e7a 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
struct sk_buff *skb = *skbp;
u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
struct net_device *vlan_dev;
- struct vlan_rx_stats *rx_stats;
+ struct vlan_pcpu_stats *rx_stats;
vlan_dev = vlan_find_dev(skb->dev, vlan_id);
if (!vlan_dev) {
@@ -26,7 +26,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
skb->vlan_tci = 0;
- rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
+ rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 14e3d1fa07a..be737539f34 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -141,7 +141,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype, struct net_device *orig_dev)
{
struct vlan_hdr *vhdr;
- struct vlan_rx_stats *rx_stats;
+ struct vlan_pcpu_stats *rx_stats;
struct net_device *vlan_dev;
u16 vlan_id;
u16 vlan_tci;
@@ -177,7 +177,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
} else {
skb->dev = vlan_dev;
- rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
+ rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
@@ -274,9 +274,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
u16 vlan_tci = 0;
int rc;
- if (WARN_ON(skb_headroom(skb) < dev->hard_header_len))
- return -ENOSPC;
-
if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
@@ -313,8 +310,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- int i = skb_get_queue_mapping(skb);
- struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
unsigned int len;
int ret;
@@ -326,71 +321,31 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
*/
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
- unsigned int orig_headroom = skb_headroom(skb);
u16 vlan_tci;
-
- vlan_dev_info(dev)->cnt_encap_on_xmit++;
-
vlan_tci = vlan_dev_info(dev)->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
- skb = __vlan_put_tag(skb, vlan_tci);
- if (!skb) {
- txq->tx_dropped++;
- return NETDEV_TX_OK;
- }
-
- if (orig_headroom < VLAN_HLEN)
- vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
+ skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
}
-
skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
len = skb->len;
ret = dev_queue_xmit(skb);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
- txq->tx_packets++;
- txq->tx_bytes += len;
- } else
- txq->tx_dropped++;
+ struct vlan_pcpu_stats *stats;
- return ret;
-}
-
-static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- int i = skb_get_queue_mapping(skb);
- struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- u16 vlan_tci;
- unsigned int len;
- int ret;
-
- vlan_tci = vlan_dev_info(dev)->vlan_id;
- vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
- skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
-
- skb->dev = vlan_dev_info(dev)->real_dev;
- len = skb->len;
- ret = dev_queue_xmit(skb);
-
- if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
- txq->tx_packets++;
- txq->tx_bytes += len;
- } else
- txq->tx_dropped++;
+ stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_packets++;
+ stats->tx_bytes += len;
+ u64_stats_update_begin(&stats->syncp);
+ } else {
+ this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+ }
return ret;
}
-static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb)
-{
- struct net_device *rdev = vlan_dev_info(dev)->real_dev;
- const struct net_device_ops *ops = rdev->netdev_ops;
-
- return ops->ndo_select_queue(rdev, skb);
-}
-
static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
{
/* TODO: gotta make sure the underlying layer can handle it,
@@ -719,8 +674,7 @@ static const struct header_ops vlan_header_ops = {
.parse = eth_header_parse,
};
-static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops,
- vlan_netdev_ops_sq, vlan_netdev_accel_ops_sq;
+static const struct net_device_ops vlan_netdev_ops;
static int vlan_dev_init(struct net_device *dev)
{
@@ -738,6 +692,7 @@ static int vlan_dev_init(struct net_device *dev)
(1<<__LINK_STATE_PRESENT);
dev->features |= real_dev->features & real_dev->vlan_features;
+ dev->features |= NETIF_F_LLTX;
dev->gso_max_size = real_dev->gso_max_size;
/* ipv6 shared card related stuff */
@@ -755,26 +710,20 @@ static int vlan_dev_init(struct net_device *dev)
if (real_dev->features & NETIF_F_HW_VLAN_TX) {
dev->header_ops = real_dev->header_ops;
dev->hard_header_len = real_dev->hard_header_len;
- if (real_dev->netdev_ops->ndo_select_queue)
- dev->netdev_ops = &vlan_netdev_accel_ops_sq;
- else
- dev->netdev_ops = &vlan_netdev_accel_ops;
} else {
dev->header_ops = &vlan_header_ops;
dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
- if (real_dev->netdev_ops->ndo_select_queue)
- dev->netdev_ops = &vlan_netdev_ops_sq;
- else
- dev->netdev_ops = &vlan_netdev_ops;
}
+ dev->netdev_ops = &vlan_netdev_ops;
+
if (is_vlan_dev(real_dev))
subclass = 1;
vlan_dev_set_lockdep_class(dev, subclass);
- vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats);
- if (!vlan_dev_info(dev)->vlan_rx_stats)
+ vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+ if (!vlan_dev_info(dev)->vlan_pcpu_stats)
return -ENOMEM;
return 0;
@@ -786,8 +735,8 @@ static void vlan_dev_uninit(struct net_device *dev)
struct vlan_dev_info *vlan = vlan_dev_info(dev);
int i;
- free_percpu(vlan->vlan_rx_stats);
- vlan->vlan_rx_stats = NULL;
+ free_percpu(vlan->vlan_pcpu_stats);
+ vlan->vlan_pcpu_stats = NULL;
for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
while ((pm = vlan->egress_priority_map[i]) != NULL) {
vlan->egress_priority_map[i] = pm->next;
@@ -825,33 +774,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
- dev_txq_stats_fold(dev, stats);
- if (vlan_dev_info(dev)->vlan_rx_stats) {
- struct vlan_rx_stats *p, accum = {0};
+ if (vlan_dev_info(dev)->vlan_pcpu_stats) {
+ struct vlan_pcpu_stats *p;
+ u32 rx_errors = 0, tx_dropped = 0;
int i;
for_each_possible_cpu(i) {
- u64 rxpackets, rxbytes, rxmulticast;
+ u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
unsigned int start;
- p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
+ p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
do {
start = u64_stats_fetch_begin_bh(&p->syncp);
rxpackets = p->rx_packets;
rxbytes = p->rx_bytes;
rxmulticast = p->rx_multicast;
+ txpackets = p->tx_packets;
+ txbytes = p->tx_bytes;
} while (u64_stats_fetch_retry_bh(&p->syncp, start));
- accum.rx_packets += rxpackets;
- accum.rx_bytes += rxbytes;
- accum.rx_multicast += rxmulticast;
- /* rx_errors is ulong, not protected by syncp */
- accum.rx_errors += p->rx_errors;
+
+ stats->rx_packets += rxpackets;
+ stats->rx_bytes += rxbytes;
+ stats->multicast += rxmulticast;
+ stats->tx_packets += txpackets;
+ stats->tx_bytes += txbytes;
+ /* rx_errors & tx_dropped are u32 */
+ rx_errors += p->rx_errors;
+ tx_dropped += p->tx_dropped;
}
- stats->rx_packets = accum.rx_packets;
- stats->rx_bytes = accum.rx_bytes;
- stats->rx_errors = accum.rx_errors;
- stats->multicast = accum.rx_multicast;
+ stats->rx_errors = rx_errors;
+ stats->tx_dropped = tx_dropped;
}
return stats;
}
@@ -908,80 +861,6 @@ static const struct net_device_ops vlan_netdev_ops = {
#endif
};
-static const struct net_device_ops vlan_netdev_accel_ops = {
- .ndo_change_mtu = vlan_dev_change_mtu,
- .ndo_init = vlan_dev_init,
- .ndo_uninit = vlan_dev_uninit,
- .ndo_open = vlan_dev_open,
- .ndo_stop = vlan_dev_stop,
- .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = vlan_dev_set_mac_address,
- .ndo_set_rx_mode = vlan_dev_set_rx_mode,
- .ndo_set_multicast_list = vlan_dev_set_rx_mode,
- .ndo_change_rx_flags = vlan_dev_change_rx_flags,
- .ndo_do_ioctl = vlan_dev_ioctl,
- .ndo_neigh_setup = vlan_dev_neigh_setup,
- .ndo_get_stats64 = vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
- .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
- .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
- .ndo_fcoe_enable = vlan_dev_fcoe_enable,
- .ndo_fcoe_disable = vlan_dev_fcoe_disable,
- .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
-#endif
-};
-
-static const struct net_device_ops vlan_netdev_ops_sq = {
- .ndo_select_queue = vlan_dev_select_queue,
- .ndo_change_mtu = vlan_dev_change_mtu,
- .ndo_init = vlan_dev_init,
- .ndo_uninit = vlan_dev_uninit,
- .ndo_open = vlan_dev_open,
- .ndo_stop = vlan_dev_stop,
- .ndo_start_xmit = vlan_dev_hard_start_xmit,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = vlan_dev_set_mac_address,
- .ndo_set_rx_mode = vlan_dev_set_rx_mode,
- .ndo_set_multicast_list = vlan_dev_set_rx_mode,
- .ndo_change_rx_flags = vlan_dev_change_rx_flags,
- .ndo_do_ioctl = vlan_dev_ioctl,
- .ndo_neigh_setup = vlan_dev_neigh_setup,
- .ndo_get_stats64 = vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
- .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
- .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
- .ndo_fcoe_enable = vlan_dev_fcoe_enable,
- .ndo_fcoe_disable = vlan_dev_fcoe_disable,
- .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
-#endif
-};
-
-static const struct net_device_ops vlan_netdev_accel_ops_sq = {
- .ndo_select_queue = vlan_dev_select_queue,
- .ndo_change_mtu = vlan_dev_change_mtu,
- .ndo_init = vlan_dev_init,
- .ndo_uninit = vlan_dev_uninit,
- .ndo_open = vlan_dev_open,
- .ndo_stop = vlan_dev_stop,
- .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = vlan_dev_set_mac_address,
- .ndo_set_rx_mode = vlan_dev_set_rx_mode,
- .ndo_set_multicast_list = vlan_dev_set_rx_mode,
- .ndo_change_rx_flags = vlan_dev_change_rx_flags,
- .ndo_do_ioctl = vlan_dev_ioctl,
- .ndo_neigh_setup = vlan_dev_neigh_setup,
- .ndo_get_stats64 = vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
- .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
- .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
- .ndo_fcoe_enable = vlan_dev_fcoe_enable,
- .ndo_fcoe_disable = vlan_dev_fcoe_disable,
- .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
-#endif
-};
-
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index ddc105734af..be9a5c19a77 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -101,25 +101,6 @@ static int vlan_changelink(struct net_device *dev,
return 0;
}
-static int vlan_get_tx_queues(struct net *net,
- struct nlattr *tb[],
- unsigned int *num_tx_queues,
- unsigned int *real_num_tx_queues)
-{
- struct net_device *real_dev;
-
- if (!tb[IFLA_LINK])
- return -EINVAL;
-
- real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
- if (!real_dev)
- return -ENODEV;
-
- *num_tx_queues = real_dev->num_tx_queues;
- *real_num_tx_queues = real_dev->real_num_tx_queues;
- return 0;
-}
-
static int vlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
@@ -237,7 +218,6 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
.maxtype = IFLA_VLAN_MAX,
.policy = vlan_policy,
.priv_size = sizeof(struct vlan_dev_info),
- .get_tx_queues = vlan_get_tx_queues,
.setup = vlan_setup,
.validate = vlan_validate,
.newlink = vlan_newlink,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 80e280f5668..d1314cf18ad 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -280,7 +280,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats;
- static const char fmt[] = "%30s %12lu\n";
static const char fmt64[] = "%30s %12llu\n";
int i;
@@ -299,10 +298,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
seq_puts(seq, "\n");
seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
- seq_printf(seq, fmt, "total headroom inc",
- dev_info->cnt_inc_headroom_on_tx);
- seq_printf(seq, fmt, "total encap on xmit",
- dev_info->cnt_encap_on_xmit);
seq_printf(seq, "Device: %s", dev_info->real_dev->name);
/* now show all PRIORITY mappings relating to this VLAN */
seq_printf(seq, "\nINGRESS priority mappings: "
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ad2b232a205..fce2eae8d47 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -97,7 +97,7 @@ static LIST_HEAD(br2684_devs);
static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
{
- return (struct br2684_dev *)netdev_priv(net_dev);
+ return netdev_priv(net_dev);
}
static inline struct net_device *list_entry_brdev(const struct list_head *le)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ff956d1115b..d257da50fcf 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -502,7 +502,8 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
struct atmarp_entry *entry;
int error;
struct clip_vcc *clip_vcc;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} };
+ struct flowi fl = { .fl4_dst = ip,
+ .fl4_tos = 1 };
struct rtable *rt;
if (vcc->push != clip_push) {
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 181d70c73d7..179e04bc99d 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -816,8 +816,7 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
return -EINVAL;
vcc->proto_data = dev_lec[arg];
- return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]),
- vcc);
+ return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
}
/* Initialize device. */
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 26eaebf4aaa..bb86d293239 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
ax25_cb *ax25;
int err = 0;
+ memset(fsa, 0, sizeof(fsa));
lock_sock(sk);
ax25 = ax25_sk(sk);
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_ax25.sax25_family = AF_AX25;
fsa->fsa_ax25.sax25_call = ax25->dest_addr;
- fsa->fsa_ax25.sax25_ndigis = 0;
if (ax25->digipeat != NULL) {
ndigi = ax25->digipeat->ndigi;
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index d1e433f7d67..7ca1f46a471 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP) += bnep/
obj-$(CONFIG_BT_CMTP) += cmtp/
obj-$(CONFIG_BT_HIDP) += hidp/
-bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
+bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index c8436fa3134..84bbb82599b 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,8 +22,6 @@
#include "br_private.h"
-int (*br_should_route_hook)(struct sk_buff *skb);
-
static const struct stp_proto br_stp_proto = {
.rcv = br_stp_rcv,
};
@@ -102,8 +100,6 @@ static void __exit br_deinit(void)
br_fdb_fini();
}
-EXPORT_SYMBOL(br_should_route_hook);
-
module_init(br_init)
module_exit(br_deinit)
MODULE_LICENSE("GPL");
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 90512ccfd3e..2872393b293 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -238,15 +238,18 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
{
struct net_bridge_fdb_entry *fdb;
+ struct net_bridge_port *port;
int ret;
- if (!br_port_exists(dev))
- return 0;
-
rcu_read_lock();
- fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
- ret = fdb && fdb->dst->dev != dev &&
- fdb->dst->state == BR_STATE_FORWARDING;
+ port = br_port_get_rcu(dev);
+ if (!port)
+ ret = 0;
+ else {
+ fdb = __br_fdb_get(port->br, addr);
+ ret = fdb && fdb->dst->dev != dev &&
+ fdb->dst->state == BR_STATE_FORWARDING;
+ }
rcu_read_unlock();
return ret;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index cbfe87f0f34..2bd11ec6d16 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -223,7 +223,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge_port_group *p;
struct hlist_node *rp;
- rp = rcu_dereference(br->router_list.first);
+ rp = rcu_dereference(hlist_first_rcu(&br->router_list));
p = mdst ? rcu_dereference(mdst->ports) : NULL;
while (p || rp) {
struct net_bridge_port *port, *lport, *rport;
@@ -242,7 +242,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
if ((unsigned long)lport >= (unsigned long)port)
p = rcu_dereference(p->next);
if ((unsigned long)rport >= (unsigned long)port)
- rp = rcu_dereference(rp->next);
+ rp = rcu_dereference(hlist_next_rcu(rp));
}
if (!prev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 89ad25a7620..d9d1e2bac1d 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -475,11 +475,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
- if (!br_port_exists(dev))
- return -EINVAL;
-
- p = br_port_get(dev);
- if (p->br != br)
+ p = br_port_get_rtnl(dev);
+ if (!p || p->br != br)
return -EINVAL;
del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 25207a1f182..6f6d8e1b776 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -21,6 +21,10 @@
/* Bridge group multicast address 802.1d (pg 51). */
const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+/* Hook for brouter */
+br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
+EXPORT_SYMBOL(br_should_route_hook);
+
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
@@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
{
struct net_bridge_port *p;
const unsigned char *dest = eth_hdr(skb)->h_dest;
- int (*rhook)(struct sk_buff *skb);
+ br_should_route_hook_t *rhook;
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return skb;
@@ -173,8 +177,8 @@ forward:
switch (p->state) {
case BR_STATE_FORWARDING:
rhook = rcu_dereference(br_should_route_hook);
- if (rhook != NULL) {
- if (rhook(skb))
+ if (rhook) {
+ if ((*rhook)(skb))
return skb;
dest = eth_hdr(skb)->h_dest;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eb5b256ffc8..326e599f83f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -33,6 +33,9 @@
#include "br_private.h"
+#define mlock_dereference(X, br) \
+ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
+
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
{
@@ -135,7 +138,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb)
{
- struct net_bridge_mdb_htable *mdb = br->mdb;
+ struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
struct br_ip ip;
if (br->multicast_disabled)
@@ -235,7 +238,8 @@ static void br_multicast_group_expired(unsigned long data)
if (mp->ports)
goto out;
- mdb = br->mdb;
+ mdb = mlock_dereference(br->mdb, br);
+
hlist_del_rcu(&mp->hlist[mdb->ver]);
mdb->size--;
@@ -249,16 +253,20 @@ out:
static void br_multicast_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg)
{
- struct net_bridge_mdb_htable *mdb = br->mdb;
+ struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
- struct net_bridge_port_group **pp;
+ struct net_bridge_port_group __rcu **pp;
+
+ mdb = mlock_dereference(br->mdb, br);
mp = br_mdb_ip_get(mdb, &pg->addr);
if (WARN_ON(!mp))
return;
- for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
if (p != pg)
continue;
@@ -294,10 +302,10 @@ out:
spin_unlock(&br->multicast_lock);
}
-static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max,
+static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
int elasticity)
{
- struct net_bridge_mdb_htable *old = *mdbp;
+ struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1);
struct net_bridge_mdb_htable *mdb;
int err;
@@ -569,7 +577,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
struct net_bridge *br, struct net_bridge_port *port,
struct br_ip *group, int hash)
{
- struct net_bridge_mdb_htable *mdb = br->mdb;
+ struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct hlist_node *p;
unsigned count = 0;
@@ -577,6 +585,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
int elasticity;
int err;
+ mdb = rcu_dereference_protected(br->mdb, 1);
hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
count++;
if (unlikely(br_ip_equal(group, &mp->addr)))
@@ -642,10 +651,11 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
struct net_bridge *br, struct net_bridge_port *port,
struct br_ip *group)
{
- struct net_bridge_mdb_htable *mdb = br->mdb;
+ struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
int hash;
+ mdb = rcu_dereference_protected(br->mdb, 1);
if (!mdb) {
if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
return NULL;
@@ -660,7 +670,7 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
case -EAGAIN:
rehash:
- mdb = br->mdb;
+ mdb = rcu_dereference_protected(br->mdb, 1);
hash = br_ip_hash(mdb, group);
break;
@@ -692,7 +702,7 @@ static int br_multicast_add_group(struct net_bridge *br,
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
- struct net_bridge_port_group **pp;
+ struct net_bridge_port_group __rcu **pp;
unsigned long now = jiffies;
int err;
@@ -712,7 +722,9 @@ static int br_multicast_add_group(struct net_bridge *br,
goto out;
}
- for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
if (p->port == port)
goto found;
if ((unsigned long)p->port < (unsigned long)port)
@@ -1106,7 +1118,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
struct igmpv3_query *ih3;
struct net_bridge_port_group *p;
- struct net_bridge_port_group **pp;
+ struct net_bridge_port_group __rcu **pp;
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
@@ -1145,7 +1157,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
if (!group)
goto out;
- mp = br_mdb_ip4_get(br->mdb, group);
+ mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
if (!mp)
goto out;
@@ -1157,7 +1169,9 @@ static int br_ip4_multicast_query(struct net_bridge *br,
try_to_del_timer_sync(&mp->timer) >= 0))
mod_timer(&mp->timer, now + max_delay);
- for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0)
@@ -1178,7 +1192,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
struct net_bridge_mdb_entry *mp;
struct mld2_query *mld2q;
- struct net_bridge_port_group *p, **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_port_group __rcu **pp;
unsigned long max_delay;
unsigned long now = jiffies;
struct in6_addr *group = NULL;
@@ -1214,7 +1229,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (!group)
goto out;
- mp = br_mdb_ip6_get(br->mdb, group);
+ mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
if (!mp)
goto out;
@@ -1225,7 +1240,9 @@ static int br_ip6_multicast_query(struct net_bridge *br,
try_to_del_timer_sync(&mp->timer) >= 0))
mod_timer(&mp->timer, now + max_delay);
- for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, br)) != NULL;
+ pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0)
@@ -1254,7 +1271,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
timer_pending(&br->multicast_querier_timer))
goto out;
- mdb = br->mdb;
+ mdb = mlock_dereference(br->mdb, br);
mp = br_mdb_ip_get(mdb, group);
if (!mp)
goto out;
@@ -1277,7 +1294,9 @@ static void br_multicast_leave_group(struct net_bridge *br,
goto out;
}
- for (p = mp->ports; p; p = p->next) {
+ for (p = mlock_dereference(mp->ports, br);
+ p != NULL;
+ p = mlock_dereference(p->next, br)) {
if (p->port != port)
continue;
@@ -1625,7 +1644,7 @@ void br_multicast_stop(struct net_bridge *br)
del_timer_sync(&br->multicast_query_timer);
spin_lock_bh(&br->multicast_lock);
- mdb = br->mdb;
+ mdb = mlock_dereference(br->mdb, br);
if (!mdb)
goto out;
@@ -1729,6 +1748,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
{
struct net_bridge_port *port;
int err = 0;
+ struct net_bridge_mdb_htable *mdb;
spin_lock(&br->multicast_lock);
if (br->multicast_disabled == !val)
@@ -1741,15 +1761,16 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
if (!netif_running(br->dev))
goto unlock;
- if (br->mdb) {
- if (br->mdb->old) {
+ mdb = mlock_dereference(br->mdb, br);
+ if (mdb) {
+ if (mdb->old) {
err = -EEXIST;
rollback:
br->multicast_disabled = !!val;
goto unlock;
}
- err = br_mdb_rehash(&br->mdb, br->mdb->max,
+ err = br_mdb_rehash(&br->mdb, mdb->max,
br->hash_elasticity);
if (err)
goto rollback;
@@ -1774,6 +1795,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
{
int err = -ENOENT;
u32 old;
+ struct net_bridge_mdb_htable *mdb;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev))
@@ -1782,7 +1804,9 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
err = -EINVAL;
if (!is_power_of_2(val))
goto unlock;
- if (br->mdb && val < br->mdb->size)
+
+ mdb = mlock_dereference(br->mdb, br);
+ if (mdb && val < mdb->size)
goto unlock;
err = 0;
@@ -1790,8 +1814,8 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
old = br->hash_max;
br->hash_max = val;
- if (br->mdb) {
- if (br->mdb->old) {
+ if (mdb) {
+ if (mdb->old) {
err = -EEXIST;
rollback:
br->hash_max = old;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 865fd7634b6..6e139209391 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -131,17 +131,18 @@ void br_netfilter_rtable_init(struct net_bridge *br)
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
- if (!br_port_exists(dev))
- return NULL;
- return &br_port_get_rcu(dev)->br->fake_rtable;
+ struct net_bridge_port *port;
+
+ port = br_port_get_rcu(dev);
+ return port ? &port->br->fake_rtable : NULL;
}
static inline struct net_device *bridge_parent(const struct net_device *dev)
{
- if (!br_port_exists(dev))
- return NULL;
+ struct net_bridge_port *port;
- return br_port_get_rcu(dev)->br->dev;
+ port = br_port_get_rcu(dev);
+ return port ? port->br->dev : NULL;
}
static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
@@ -412,13 +413,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
if (dnat_took_place(skb)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .daddr = iph->daddr,
- .saddr = 0,
- .tos = RT_TOS(iph->tos) },
- },
- .proto = 0,
+ .fl4_dst = iph->daddr,
+ .fl4_tos = RT_TOS(iph->tos),
};
struct in_device *in_dev = __in_dev_get_rcu(dev);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4a6a378c84e..f8bf4c7f842 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -119,11 +119,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
for_each_netdev(net, dev) {
+ struct net_bridge_port *port = br_port_get_rtnl(dev);
+
/* not a bridge port */
- if (!br_port_exists(dev) || idx < cb->args[0])
+ if (!port || idx < cb->args[0])
goto skip;
- if (br_fill_ifinfo(skb, br_port_get(dev),
+ if (br_fill_ifinfo(skb, port,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWLINK,
NLM_F_MULTI) < 0)
@@ -169,9 +171,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (!dev)
return -ENODEV;
- if (!br_port_exists(dev))
+ p = br_port_get_rtnl(dev);
+ if (!p)
return -EINVAL;
- p = br_port_get(dev);
/* if kernel STP is running, don't allow changes */
if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 404d4e14c6a..7d337c9b608 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,15 +32,15 @@ struct notifier_block br_device_notifier = {
static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- struct net_bridge_port *p = br_port_get(dev);
+ struct net_bridge_port *p;
struct net_bridge *br;
int err;
/* not a port of a bridge */
- if (!br_port_exists(dev))
+ p = br_port_get_rtnl(dev);
+ if (!p)
return NOTIFY_DONE;
- p = br_port_get(dev);
br = p->br;
switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 75c90edaf7d..84aac7734bf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -72,7 +72,7 @@ struct net_bridge_fdb_entry
struct net_bridge_port_group {
struct net_bridge_port *port;
- struct net_bridge_port_group *next;
+ struct net_bridge_port_group __rcu *next;
struct hlist_node mglist;
struct rcu_head rcu;
struct timer_list timer;
@@ -86,7 +86,7 @@ struct net_bridge_mdb_entry
struct hlist_node hlist[2];
struct hlist_node mglist;
struct net_bridge *br;
- struct net_bridge_port_group *ports;
+ struct net_bridge_port_group __rcu *ports;
struct rcu_head rcu;
struct timer_list timer;
struct timer_list query_timer;
@@ -151,11 +151,20 @@ struct net_bridge_port
#endif
};
-#define br_port_get_rcu(dev) \
- ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
-#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
+static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
+{
+ struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data);
+ return br_port_exists(dev) ? port : NULL;
+}
+
+static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
+{
+ return br_port_exists(dev) ?
+ rtnl_dereference(dev->rx_handler_data) : NULL;
+}
+
struct br_cpu_netstats {
u64 rx_packets;
u64 rx_bytes;
@@ -227,7 +236,7 @@ struct net_bridge
unsigned long multicast_startup_query_interval;
spinlock_t multicast_lock;
- struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_mdb_htable __rcu *mdb;
struct hlist_head router_list;
struct hlist_head mglist;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 35cf27087b5..3d9a55d3822 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -141,10 +141,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
struct net_bridge *br;
const unsigned char *buf;
- if (!br_port_exists(dev))
- goto err;
- p = br_port_get_rcu(dev);
-
if (!pskb_may_pull(skb, 4))
goto err;
@@ -153,6 +149,10 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0)
goto err;
+ p = br_port_get_rcu(dev);
+ if (!p)
+ goto err;
+
br = p->br;
spin_lock(&br->lock);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ae3f106c390..1bcaf36ad61 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void)
if (ret < 0)
return ret;
/* see br_input.c */
- rcu_assign_pointer(br_should_route_hook, ebt_broute);
+ rcu_assign_pointer(br_should_route_hook,
+ (br_should_route_hook_t *)ebt_broute);
return 0;
}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a1dcf83f0d5..cbc9f395ab1 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -128,6 +128,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *out)
{
const struct ethhdr *h = eth_hdr(skb);
+ const struct net_bridge_port *p;
__be16 ethproto;
int verdict, i;
@@ -148,13 +149,11 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
return 1;
/* rcu_read_lock()ed by nf_hook_slow */
- if (in && br_port_exists(in) &&
- FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
- EBT_ILOGICALIN))
+ if (in && (p = br_port_get_rcu(in)) != NULL &&
+ FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
return 1;
- if (out && br_port_exists(out) &&
- FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
- EBT_ILOGICALOUT))
+ if (out && (p = br_port_get_rcu(out)) != NULL &&
+ FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
return 1;
if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/caif/Makefile b/net/caif/Makefile
index f87481fb0e6..9d38e406e4a 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,8 +1,6 @@
-ifeq ($(CONFIG_CAIF_DEBUG),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG
-caif-objs := caif_dev.o \
+caif-y := caif_dev.o \
cfcnfg.o cfmuxl.o cfctrl.o \
cffrml.o cfveil.o cfdbgl.o\
cfserl.o cfdgml.o \
@@ -13,4 +11,4 @@ obj-$(CONFIG_CAIF) += caif.o
obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
obj-$(CONFIG_CAIF) += caif_socket.o
-export-objs := caif.o
+export-y := caif.o
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3..d522d8c1703 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
{
struct dev_info *dev_info;
enum cfcnfg_phy_preference pref;
+ int res;
+
memset(l, 0, sizeof(*l));
- l->priority = s->priority;
+ /* In caif protocol low value is high priority */
+ l->priority = CAIF_PRIO_MAX - s->priority + 1;
- if (s->link_name[0] != '\0')
- l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+ if (s->ifindex != 0){
+ res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
+ if (res < 0)
+ return res;
+ l->phyid = res;
+ }
else {
switch (s->link_selector) {
case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index b99369a055d..a42a408306e 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -307,6 +307,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
case NETDEV_UNREGISTER:
caifd = caif_get(dev);
+ if (caifd == NULL)
+ break;
netdev_info(dev, "unregister\n");
atomic_set(&caifd->state, what);
caif_device_destroy(dev);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2eca2dd0000..1bf0cf50379 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -716,8 +716,7 @@ static int setsockopt(struct socket *sock,
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
- int prio, linksel;
- struct ifreq ifreq;
+ int linksel;
if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
return -ENOPROTOOPT;
@@ -735,33 +734,6 @@ static int setsockopt(struct socket *sock,
release_sock(&cf_sk->sk);
return 0;
- case SO_PRIORITY:
- if (lvl != SOL_SOCKET)
- goto bad_sol;
- if (ol < sizeof(int))
- return -EINVAL;
- if (copy_from_user(&prio, ov, sizeof(int)))
- return -EINVAL;
- lock_sock(&(cf_sk->sk));
- cf_sk->conn_req.priority = prio;
- release_sock(&cf_sk->sk);
- return 0;
-
- case SO_BINDTODEVICE:
- if (lvl != SOL_SOCKET)
- goto bad_sol;
- if (ol < sizeof(struct ifreq))
- return -EINVAL;
- if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
- return -EFAULT;
- lock_sock(&(cf_sk->sk));
- strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
- sizeof(cf_sk->conn_req.link_name));
- cf_sk->conn_req.link_name
- [sizeof(cf_sk->conn_req.link_name)-1] = 0;
- release_sock(&cf_sk->sk);
- return 0;
-
case CAIFSO_REQ_PARAM:
if (lvl != SOL_CAIF)
goto bad_sol;
@@ -880,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
sk->sk_state = CAIF_CONNECTING;
+ /* Check priority value comming from socket */
+ /* if priority value is out of range it will be ajusted */
+ if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
+ cf_sk->conn_req.priority = CAIF_PRIO_MAX;
+ else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
+ cf_sk->conn_req.priority = CAIF_PRIO_MIN;
+ else
+ cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
+
+ /*ifindex = id of the interface.*/
+ cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
+
dbfs_atomic_inc(&cnt.num_connect_req);
cf_sk->layer.receive = caif_sktrecv_cb;
err = caif_connect_client(&cf_sk->conn_req,
@@ -905,6 +889,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
cf_sk->maxframe = mtu - (headroom + tailroom);
if (cf_sk->maxframe < 1) {
pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
+ err = -ENODEV;
goto out;
}
@@ -1142,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
set_rx_flow_on(cf_sk);
/* Set default options on configuration */
- cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+ cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
cf_sk->conn_req.protocol = protocol;
/* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 41adafd1891..21ede141018 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -173,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
return NULL;
}
-int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+
+int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
{
int i;
-
- /* Try to match with specified name */
- for (i = 0; i < MAX_PHY_LAYERS; i++) {
- if (cnfg->phy_layers[i].frm_layer != NULL
- && strcmp(cnfg->phy_layers[i].phy_layer->name,
- name) == 0)
- return cnfg->phy_layers[i].frm_layer->id;
- }
- return 0;
+ for (i = 0; i < MAX_PHY_LAYERS; i++)
+ if (cnfg->phy_layers[i].frm_layer != NULL &&
+ cnfg->phy_layers[i].ifindex == ifi)
+ return i;
+ return -ENODEV;
}
int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 08f267a109a..3cd8f978e30 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -361,11 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
struct cfctrl_request_info *p, *tmp;
struct cfctrl *ctrl = container_obj(layr);
spin_lock(&ctrl->info_list_lock);
- pr_warn("enter\n");
list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
if (p->client_layer == adap_layer) {
- pr_warn("cancel req :%d\n", p->sequence_no);
+ pr_debug("cancel req :%d\n", p->sequence_no);
list_del(&p->list);
kfree(p);
}
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 496fda9ac66..11a2af4c162 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -12,6 +12,8 @@
#include <net/caif/cfsrvl.h>
#include <net/caif/cfpkt.h>
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -38,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
{
+ struct cfsrvl *service = container_obj(layr);
+ struct caif_payload_info *info;
+ int ret;
+
+ if (!cfsrvl_ready(service, &ret))
+ return ret;
+
+ /* Add info for MUX-layer to route the packet out */
+ info = cfpkt_info(pkt);
+ info->channel_id = service->layer.id;
+ info->dev_info = &service->dev_info;
+
return layr->dn->transmit(layr->dn, pkt);
}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index bde8481e8d2..e2fb5fa7579 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
{
- caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size);
+ caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
/* Add info for MUX-layer to route the packet out. */
cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
diff --git a/net/can/Makefile b/net/can/Makefile
index 9cd3c4b3abd..2d3894b3274 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -3,10 +3,10 @@
#
obj-$(CONFIG_CAN) += can.o
-can-objs := af_can.o proc.o
+can-y := af_can.o proc.o
obj-$(CONFIG_CAN_RAW) += can-raw.o
-can-raw-objs := raw.o
+can-raw-y := raw.o
obj-$(CONFIG_CAN_BCM) += can-bcm.o
-can-bcm-objs := bcm.o
+can-bcm-y := bcm.o
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 08ffe9e4be2..6faa8256e10 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
struct list_head tx_ops;
unsigned long dropped_usr_msgs;
struct proc_dir_entry *bcm_proc_read;
- char procname [9]; /* pointer printed in ASCII with \0 */
+ char procname [20]; /* pointer printed in ASCII with \0 */
};
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb803..153bdec4083 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),)
obj-$(CONFIG_CEPH_LIB) += libceph.o
-libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
+libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
debugfs.o \
diff --git a/net/core/dev.c b/net/core/dev.c
index 35dfb831848..381b8e28016 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1794,16 +1794,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
__be16 type = skb->protocol;
+ int vlan_depth = ETH_HLEN;
int err;
- if (type == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veh;
+ while (type == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vh;
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+ if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
return ERR_PTR(-EINVAL);
- veh = (struct vlan_ethhdr *)skb->data;
- type = veh->h_vlan_encapsulated_proto;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
}
skb_reset_mac_header(skb);
@@ -1817,8 +1819,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
dev->ethtool_ops->get_drvinfo(dev, &info);
- WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
- "ip_summed=%d",
+ WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
info.driver, dev ? dev->features : 0L,
skb->sk ? skb->sk->sk_route_caps : 0L,
skb->len, skb->data_len, skb->ip_summed);
@@ -1967,6 +1968,23 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+{
+ __be16 protocol = skb->protocol;
+
+ if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else if (!skb->vlan_tci)
+ return dev->features;
+
+ if (protocol != htons(ETH_P_8021Q))
+ return dev->features & dev->vlan_features;
+ else
+ return 0;
+}
+EXPORT_SYMBOL(netif_get_vlan_features);
+
/*
* Returns true if either:
* 1. skb has frag_list and the device doesn't support FRAGLIST, or
@@ -1977,15 +1995,20 @@ static inline void skb_orphan_try(struct sk_buff *skb)
static inline int skb_needs_linearize(struct sk_buff *skb,
struct net_device *dev)
{
- int features = dev->features;
+ if (skb_is_nonlinear(skb)) {
+ int features = dev->features;
- if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
- features &= dev->vlan_features;
+ if (vlan_tx_tag_present(skb))
+ features &= dev->vlan_features;
- return skb_is_nonlinear(skb) &&
- ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
- (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
- illegal_highdma(dev, skb))));
+ return (skb_has_frag_list(skb) &&
+ !(features & NETIF_F_FRAGLIST)) ||
+ (skb_shinfo(skb)->nr_frags &&
+ (!(features & NETIF_F_SG) ||
+ illegal_highdma(dev, skb)));
+ }
+
+ return 0;
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -2131,7 +2154,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
} else {
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
+ if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
queue_index = 0;
if (dev->real_num_tx_queues > 1)
@@ -5029,12 +5052,8 @@ static int netif_alloc_rx_queues(struct net_device *dev)
}
dev->_rx = rx;
- /*
- * Set a pointer to first element in the array which holds the
- * reference count.
- */
for (i = 0; i < count; i++)
- rx[i].first = rx;
+ rx[i].dev = dev;
#endif
return 0;
}
@@ -5110,14 +5129,6 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- ret = netif_alloc_rx_queues(dev);
- if (ret)
- goto out;
-
- ret = netif_alloc_netdev_queues(dev);
- if (ret)
- goto out;
-
netdev_init_queues(dev);
/* Init, if this function is available */
@@ -5577,10 +5588,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
+ if (netif_alloc_netdev_queues(dev))
+ goto free_pcpu;
#ifdef CONFIG_RPS
dev->num_rx_queues = queue_count;
dev->real_num_rx_queues = queue_count;
+ if (netif_alloc_rx_queues(dev))
+ goto free_pcpu;
#endif
dev->gso_max_size = GSO_MAX_SIZE;
@@ -5597,6 +5612,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
free_pcpu:
free_percpu(dev->pcpu_refcnt);
+ kfree(dev->_tx);
+#ifdef CONFIG_RPS
+ kfree(dev->_rx);
+#endif
+
free_p:
kfree(p);
return NULL;
@@ -5618,6 +5638,9 @@ void free_netdev(struct net_device *dev)
release_net(dev_net(dev));
kfree(dev->_tx);
+#ifdef CONFIG_RPS
+ kfree(dev->_rx);
+#endif
kfree(rcu_dereference_raw(dev->ingress_queue));
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f..b99c7c7ffce 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
static struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
+ .priority = -10, /* must be called after other network notifiers */
};
void __init dst_init(void)
diff --git a/net/core/filter.c b/net/core/filter.c
index 7beaec36b54..a44d27f9f0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -37,6 +37,55 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
+#include <linux/reciprocal_div.h>
+
+enum {
+ BPF_S_RET_K = 1,
+ BPF_S_RET_A,
+ BPF_S_ALU_ADD_K,
+ BPF_S_ALU_ADD_X,
+ BPF_S_ALU_SUB_K,
+ BPF_S_ALU_SUB_X,
+ BPF_S_ALU_MUL_K,
+ BPF_S_ALU_MUL_X,
+ BPF_S_ALU_DIV_X,
+ BPF_S_ALU_AND_K,
+ BPF_S_ALU_AND_X,
+ BPF_S_ALU_OR_K,
+ BPF_S_ALU_OR_X,
+ BPF_S_ALU_LSH_K,
+ BPF_S_ALU_LSH_X,
+ BPF_S_ALU_RSH_K,
+ BPF_S_ALU_RSH_X,
+ BPF_S_ALU_NEG,
+ BPF_S_LD_W_ABS,
+ BPF_S_LD_H_ABS,
+ BPF_S_LD_B_ABS,
+ BPF_S_LD_W_LEN,
+ BPF_S_LD_W_IND,
+ BPF_S_LD_H_IND,
+ BPF_S_LD_B_IND,
+ BPF_S_LD_IMM,
+ BPF_S_LDX_W_LEN,
+ BPF_S_LDX_B_MSH,
+ BPF_S_LDX_IMM,
+ BPF_S_MISC_TAX,
+ BPF_S_MISC_TXA,
+ BPF_S_ALU_DIV_K,
+ BPF_S_LD_MEM,
+ BPF_S_LDX_MEM,
+ BPF_S_ST,
+ BPF_S_STX,
+ BPF_S_JMP_JA,
+ BPF_S_JMP_JEQ_K,
+ BPF_S_JMP_JEQ_X,
+ BPF_S_JMP_JGE_K,
+ BPF_S_JMP_JGE_X,
+ BPF_S_JMP_JGT_K,
+ BPF_S_JMP_JGT_X,
+ BPF_S_JMP_JSET_K,
+ BPF_S_JMP_JSET_X,
+};
/* No hurry in this branch */
static void *__load_pointer(struct sk_buff *skb, int k)
@@ -89,7 +138,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -103,48 +152,53 @@ EXPORT_SYMBOL(sk_filter);
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
- * @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
{
- struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ unsigned long memvalid = 0;
u32 tmp;
int k;
- int pc;
+ BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
- for (pc = 0; pc < flen; pc++) {
- fentry = &filter[pc];
+ for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define K (fentry->k)
+#else
+ const u32 K = fentry->k;
+#endif
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += fentry->k;
+ A += K;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= fentry->k;
+ A -= K;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= fentry->k;
+ A *= K;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -152,64 +206,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= fentry->k;
+ A = reciprocal_divide(A, K);
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= fentry->k;
+ A &= K;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= fentry->k;
+ A |= K;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= fentry->k;
+ A <<= K;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= fentry->k;
+ A >>= K;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += fentry->k;
+ fentry += K;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A > K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A >= K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A == K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A & K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
- pc += (A > X) ? fentry->jt : fentry->jf;
+ fentry += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_X:
- pc += (A >= X) ? fentry->jt : fentry->jf;
+ fentry += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_X:
- pc += (A == X) ? fentry->jt : fentry->jf;
+ fentry += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_X:
- pc += (A & X) ? fentry->jt : fentry->jf;
+ fentry += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = fentry->k;
+ k = K;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -218,7 +272,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = fentry->k;
+ k = K;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -227,7 +281,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = fentry->k;
+ k = K;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -242,32 +296,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = fentry->k;
+ A = K;
continue;
case BPF_S_LDX_IMM:
- X = fentry->k;
+ X = K;
continue;
case BPF_S_LD_MEM:
- A = mem[fentry->k];
+ A = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_LDX_MEM:
- X = mem[fentry->k];
+ X = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -276,14 +332,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return fentry->k;
+ return K;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- mem[fentry->k] = A;
+ memvalid |= 1UL << K;
+ mem[K] = A;
continue;
case BPF_S_STX:
- mem[fentry->k] = X;
+ memvalid |= 1UL << K;
+ mem[K] = X;
continue;
default:
WARN_ON(1);
@@ -377,7 +435,57 @@ EXPORT_SYMBOL(sk_run_filter);
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
- struct sock_filter *ftest;
+ /*
+ * Valid instructions are initialized to non-0.
+ * Invalid instructions are initialized to 0.
+ */
+ static const u8 codes[] = {
+ [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
+ [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
+ [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
+ [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
+ [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
+ [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
+ [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
+ [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
+ [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
+ [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
+ [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
+ [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
+ [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
+ [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
+ [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
+ [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
+ [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
+ [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
+ [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
+ [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
+ [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
+ [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
+ [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
+ [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
+ [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
+ [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
+ [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
+ [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
+ [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
+ [BPF_RET|BPF_K] = BPF_S_RET_K,
+ [BPF_RET|BPF_A] = BPF_S_RET_A,
+ [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
+ [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
+ [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
+ [BPF_ST] = BPF_S_ST,
+ [BPF_STX] = BPF_S_STX,
+ [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
+ [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
+ [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
+ [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
+ [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
+ [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
+ [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
+ [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+ [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
+ };
int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
@@ -385,136 +493,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
- ftest = &filter[pc];
-
- /* Only allow valid instructions */
- switch (ftest->code) {
- case BPF_ALU|BPF_ADD|BPF_K:
- ftest->code = BPF_S_ALU_ADD_K;
- break;
- case BPF_ALU|BPF_ADD|BPF_X:
- ftest->code = BPF_S_ALU_ADD_X;
- break;
- case BPF_ALU|BPF_SUB|BPF_K:
- ftest->code = BPF_S_ALU_SUB_K;
- break;
- case BPF_ALU|BPF_SUB|BPF_X:
- ftest->code = BPF_S_ALU_SUB_X;
- break;
- case BPF_ALU|BPF_MUL|BPF_K:
- ftest->code = BPF_S_ALU_MUL_K;
- break;
- case BPF_ALU|BPF_MUL|BPF_X:
- ftest->code = BPF_S_ALU_MUL_X;
- break;
- case BPF_ALU|BPF_DIV|BPF_X:
- ftest->code = BPF_S_ALU_DIV_X;
- break;
- case BPF_ALU|BPF_AND|BPF_K:
- ftest->code = BPF_S_ALU_AND_K;
- break;
- case BPF_ALU|BPF_AND|BPF_X:
- ftest->code = BPF_S_ALU_AND_X;
- break;
- case BPF_ALU|BPF_OR|BPF_K:
- ftest->code = BPF_S_ALU_OR_K;
- break;
- case BPF_ALU|BPF_OR|BPF_X:
- ftest->code = BPF_S_ALU_OR_X;
- break;
- case BPF_ALU|BPF_LSH|BPF_K:
- ftest->code = BPF_S_ALU_LSH_K;
- break;
- case BPF_ALU|BPF_LSH|BPF_X:
- ftest->code = BPF_S_ALU_LSH_X;
- break;
- case BPF_ALU|BPF_RSH|BPF_K:
- ftest->code = BPF_S_ALU_RSH_K;
- break;
- case BPF_ALU|BPF_RSH|BPF_X:
- ftest->code = BPF_S_ALU_RSH_X;
- break;
- case BPF_ALU|BPF_NEG:
- ftest->code = BPF_S_ALU_NEG;
- break;
- case BPF_LD|BPF_W|BPF_ABS:
- ftest->code = BPF_S_LD_W_ABS;
- break;
- case BPF_LD|BPF_H|BPF_ABS:
- ftest->code = BPF_S_LD_H_ABS;
- break;
- case BPF_LD|BPF_B|BPF_ABS:
- ftest->code = BPF_S_LD_B_ABS;
- break;
- case BPF_LD|BPF_W|BPF_LEN:
- ftest->code = BPF_S_LD_W_LEN;
- break;
- case BPF_LD|BPF_W|BPF_IND:
- ftest->code = BPF_S_LD_W_IND;
- break;
- case BPF_LD|BPF_H|BPF_IND:
- ftest->code = BPF_S_LD_H_IND;
- break;
- case BPF_LD|BPF_B|BPF_IND:
- ftest->code = BPF_S_LD_B_IND;
- break;
- case BPF_LD|BPF_IMM:
- ftest->code = BPF_S_LD_IMM;
- break;
- case BPF_LDX|BPF_W|BPF_LEN:
- ftest->code = BPF_S_LDX_W_LEN;
- break;
- case BPF_LDX|BPF_B|BPF_MSH:
- ftest->code = BPF_S_LDX_B_MSH;
- break;
- case BPF_LDX|BPF_IMM:
- ftest->code = BPF_S_LDX_IMM;
- break;
- case BPF_MISC|BPF_TAX:
- ftest->code = BPF_S_MISC_TAX;
- break;
- case BPF_MISC|BPF_TXA:
- ftest->code = BPF_S_MISC_TXA;
- break;
- case BPF_RET|BPF_K:
- ftest->code = BPF_S_RET_K;
- break;
- case BPF_RET|BPF_A:
- ftest->code = BPF_S_RET_A;
- break;
+ struct sock_filter *ftest = &filter[pc];
+ u16 code = ftest->code;
+ if (code >= ARRAY_SIZE(codes))
+ return -EINVAL;
+ code = codes[code];
+ if (!code)
+ return -EINVAL;
/* Some instructions need special checks */
-
+ switch (code) {
+ case BPF_S_ALU_DIV_K:
/* check for division by zero */
- case BPF_ALU|BPF_DIV|BPF_K:
if (ftest->k == 0)
return -EINVAL;
- ftest->code = BPF_S_ALU_DIV_K;
- break;
-
- /* check for invalid memory addresses */
- case BPF_LD|BPF_MEM:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_LD_MEM;
- break;
- case BPF_LDX|BPF_MEM:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_LDX_MEM;
- break;
- case BPF_ST:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_ST;
+ ftest->k = reciprocal_value(ftest->k);
break;
- case BPF_STX:
+ case BPF_S_LD_MEM:
+ case BPF_S_LDX_MEM:
+ case BPF_S_ST:
+ case BPF_S_STX:
+ /* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
- ftest->code = BPF_S_STX;
break;
-
- case BPF_JMP|BPF_JA:
+ case BPF_S_JMP_JA:
/*
* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
@@ -522,40 +525,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
- ftest->code = BPF_S_JMP_JA;
- break;
-
- case BPF_JMP|BPF_JEQ|BPF_K:
- ftest->code = BPF_S_JMP_JEQ_K;
break;
- case BPF_JMP|BPF_JEQ|BPF_X:
- ftest->code = BPF_S_JMP_JEQ_X;
- break;
- case BPF_JMP|BPF_JGE|BPF_K:
- ftest->code = BPF_S_JMP_JGE_K;
- break;
- case BPF_JMP|BPF_JGE|BPF_X:
- ftest->code = BPF_S_JMP_JGE_X;
- break;
- case BPF_JMP|BPF_JGT|BPF_K:
- ftest->code = BPF_S_JMP_JGT_K;
- break;
- case BPF_JMP|BPF_JGT|BPF_X:
- ftest->code = BPF_S_JMP_JGT_X;
- break;
- case BPF_JMP|BPF_JSET|BPF_K:
- ftest->code = BPF_S_JMP_JSET_K;
- break;
- case BPF_JMP|BPF_JSET|BPF_X:
- ftest->code = BPF_S_JMP_JSET_X;
- break;
-
- default:
- return -EINVAL;
- }
-
- /* for conditionals both must be safe */
- switch (ftest->code) {
case BPF_S_JMP_JEQ_K:
case BPF_S_JMP_JEQ_X:
case BPF_S_JMP_JGE_K:
@@ -564,10 +534,13 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
case BPF_S_JMP_JGT_X:
case BPF_S_JMP_JSET_X:
case BPF_S_JMP_JSET_K:
+ /* for conditionals both must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
+ break;
}
+ ftest->code = code;
}
/* last instruction must be a RET code */
@@ -575,15 +548,13 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
case BPF_S_RET_K:
case BPF_S_RET_A:
return 0;
- break;
- default:
- return -EINVAL;
- }
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
/**
- * sk_filter_rcu_release: Release a socket filter by rcu_head
+ * sk_filter_rcu_release - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
static void sk_filter_rcu_release(struct rcu_head *rcu)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a89f37..7abeb7ceaa4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -706,21 +706,24 @@ static struct attribute *rx_queue_default_attrs[] = {
static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
- struct netdev_rx_queue *first = queue->first;
struct rps_map *map;
struct rps_dev_flow_table *flow_table;
map = rcu_dereference_raw(queue->rps_map);
- if (map)
+ if (map) {
+ RCU_INIT_POINTER(queue->rps_map, NULL);
call_rcu(&map->rcu, rps_map_release);
+ }
flow_table = rcu_dereference_raw(queue->rps_flow_table);
- if (flow_table)
+ if (flow_table) {
+ RCU_INIT_POINTER(queue->rps_flow_table, NULL);
call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+ }
- if (atomic_dec_and_test(&first->count))
- kfree(first);
+ memset(kobj, 0, sizeof(*kobj));
+ dev_put(queue->dev);
}
static struct kobj_type rx_queue_ktype = {
@@ -732,7 +735,6 @@ static struct kobj_type rx_queue_ktype = {
static int rx_queue_add_kobject(struct net_device *net, int index)
{
struct netdev_rx_queue *queue = net->_rx + index;
- struct netdev_rx_queue *first = queue->first;
struct kobject *kobj = &queue->kobj;
int error = 0;
@@ -745,7 +747,7 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
- atomic_inc(&first->count);
+ dev_hold(queue->dev);
return error;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fbce4b05a53..2e57830cbeb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -378,6 +378,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
+ __u32 skb_priority; /* skb priority field */
int node; /* Memory node */
#ifdef CONFIG_XFRM
@@ -394,6 +395,8 @@ struct pktgen_hdr {
__be32 tv_usec;
};
+static bool pktgen_exiting __read_mostly;
+
struct pktgen_thread {
spinlock_t if_lock; /* for list of devices */
struct list_head if_list; /* All device here */
@@ -547,6 +550,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->queue_map_min,
pkt_dev->queue_map_max);
+ if (pkt_dev->skb_priority)
+ seq_printf(seq, " skb_priority: %u\n",
+ pkt_dev->skb_priority);
+
if (pkt_dev->flags & F_IPV6) {
char b1[128], b2[128], b3[128];
fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -887,7 +894,7 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- size_t copy = min(count, 1023);
+ size_t copy = min_t(size_t, count, 1023);
char tb[copy + 1];
if (copy_from_user(tb, user_buffer, copy))
return -EFAULT;
@@ -1711,6 +1718,18 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "skb_priority")) {
+ len = num_arg(&user_buffer[i], 9, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ pkt_dev->skb_priority = value;
+ sprintf(pg_result, "OK: skb_priority=%i",
+ pkt_dev->skb_priority);
+ return count;
+ }
+
sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
return -EINVAL;
}
@@ -2612,8 +2631,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2671,6 +2690,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
+ skb->priority = pkt_dev->skb_priority;
+
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2976,8 +2997,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
skb = __netdev_alloc_skb(odev,
pkt_dev->cur_pkt_size + 64
@@ -3016,6 +3037,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
+ skb->priority = pkt_dev->skb_priority;
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3431,11 +3453,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
remove_proc_entry(t->tsk->comm, pg_proc_dir);
- mutex_lock(&pktgen_thread_lock);
-
- list_del(&t->th_list);
-
- mutex_unlock(&pktgen_thread_lock);
}
static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3582,6 +3599,8 @@ static int pktgen_thread_worker(void *arg)
pkt_dev = next_to_run(t);
if (unlikely(!pkt_dev && t->control == 0)) {
+ if (pktgen_exiting)
+ break;
wait_event_interruptible_timeout(t->queue,
t->control != 0,
HZ/10);
@@ -3634,6 +3653,13 @@ static int pktgen_thread_worker(void *arg)
pr_debug("%s removing thread\n", t->tsk->comm);
pktgen_rem_thread(t);
+ /* Wait for kthread_stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
return 0;
}
@@ -3908,6 +3934,7 @@ static void __exit pg_cleanup(void)
struct list_head *q, *n;
/* Stop all interfaces & threads */
+ pktgen_exiting = true;
list_for_each_safe(q, n, &pktgen_threads) {
t = list_entry(q, struct pktgen_thread, th_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbd..bf69e5871b1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,106 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
if (!ops)
return 0;
- size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
- nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
+ size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+ nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
if (ops->get_size)
/* IFLA_INFO_DATA + nested data */
- size += nlmsg_total_size(sizeof(struct nlattr)) +
+ size += nla_total_size(sizeof(struct nlattr)) +
ops->get_size(dev);
if (ops->get_xstats_size)
- size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */
+ /* IFLA_INFO_XSTATS */
+ size += nla_total_size(ops->get_xstats_size(dev));
+
+ return size;
+}
+
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+ const struct rtnl_af_ops *ops;
+
+ list_for_each_entry(ops, &rtnl_af_ops, list) {
+ if (ops->family == family)
+ return ops;
+ }
+
+ return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+ list_add_tail(&ops->list, &rtnl_af_ops);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+ int err;
+
+ rtnl_lock();
+ err = __rtnl_af_register(ops);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+ list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+ rtnl_lock();
+ __rtnl_af_unregister(ops);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+ struct rtnl_af_ops *af_ops;
+ size_t size;
+
+ /* IFLA_AF_SPEC */
+ size = nla_total_size(sizeof(struct nlattr));
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->get_link_af_size) {
+ /* AF_* + nested data */
+ size += nla_total_size(sizeof(struct nlattr)) +
+ af_ops->get_link_af_size(dev);
+ }
+ }
return size;
}
@@ -670,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
+ nla_total_size(4) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
- + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -756,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct nlmsghdr *nlh;
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats;
- struct nlattr *attr;
+ struct nlattr *attr, *af_spec;
+ struct rtnl_af_ops *af_ops;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
@@ -865,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ goto nla_put_failure;
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->fill_link_af) {
+ struct nlattr *af;
+ int err;
+
+ if (!(af = nla_nest_start(skb, af_ops->family)))
+ goto nla_put_failure;
+
+ err = af_ops->fill_link_af(skb, dev);
+
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+ }
+
+ nla_nest_end(skb, af_spec);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -923,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
+ [IFLA_AF_SPEC] = { .type = NLA_NESTED },
};
EXPORT_SYMBOL(ifla_policy);
@@ -1224,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
modified = 1;
}
+
+ if (tb[IFLA_AF_SPEC]) {
+ struct nlattr *af;
+ int rem;
+
+ nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+ const struct rtnl_af_ops *af_ops;
+
+ if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ continue;
+
+ if (!af_ops->parse_link_af)
+ continue;
+
+ err = af_ops->parse_link_af(dev, af);
+ if (err < 0)
+ goto errout;
+
+ modified = 1;
+ }
+ }
err = 0;
errout:
diff --git a/net/core/sock.c b/net/core/sock.c
index 3eed5424e65..fb608011146 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
- int allocated;
+ long allocated;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_add_return(amt, prot->memory_allocated);
+ allocated = atomic_long_add_return(amt, prot->memory_allocated);
/* Under limit. */
if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
prot->memory_allocated);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
+ seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+ proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85..dac7ed687f6 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
if (likely(skb->dev &&
skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ return sk_run_filter(skb, ptp_filter);
else
return PTP_CLASS_NONE;
}
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 92a6fcb40d7..25b7a8d1ad5 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -1,444 +1,375 @@
/*
* net/dccp/ackvec.c
*
- * An implementation of the DCCP protocol
+ * An implementation of Ack Vectors for the DCCP protocol
+ * Copyright (c) 2007 University of Aberdeen, Scotland, UK
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License;
*/
-
-#include "ackvec.h"
#include "dccp.h"
-
-#include <linux/init.h>
-#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/skbuff.h>
#include <linux/slab.h>
-#include <net/sock.h>
-
static struct kmem_cache *dccp_ackvec_slab;
static struct kmem_cache *dccp_ackvec_record_slab;
-static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
{
- struct dccp_ackvec_record *avr =
- kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
+ struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
- if (avr != NULL)
- INIT_LIST_HEAD(&avr->avr_node);
-
- return avr;
+ if (av != NULL) {
+ av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
+ INIT_LIST_HEAD(&av->av_records);
+ }
+ return av;
}
-static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
+static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
{
- if (unlikely(avr == NULL))
- return;
- /* Check if deleting a linked record */
- WARN_ON(!list_empty(&avr->avr_node));
- kmem_cache_free(dccp_ackvec_record_slab, avr);
+ struct dccp_ackvec_record *cur, *next;
+
+ list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
+ kmem_cache_free(dccp_ackvec_record_slab, cur);
+ INIT_LIST_HEAD(&av->av_records);
}
-static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
- struct dccp_ackvec_record *avr)
+void dccp_ackvec_free(struct dccp_ackvec *av)
{
- /*
- * AVRs are sorted by seqno. Since we are sending them in order, we
- * just add the AVR at the head of the list.
- * -sorbo.
- */
- if (!list_empty(&av->av_records)) {
- const struct dccp_ackvec_record *head =
- list_entry(av->av_records.next,
- struct dccp_ackvec_record,
- avr_node);
- BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
+ if (likely(av != NULL)) {
+ dccp_ackvec_purge_records(av);
+ kmem_cache_free(dccp_ackvec_slab, av);
}
-
- list_add(&avr->avr_node, &av->av_records);
}
-int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+/**
+ * dccp_ackvec_update_records - Record information about sent Ack Vectors
+ * @av: Ack Vector records to update
+ * @seqno: Sequence number of the packet carrying the Ack Vector just sent
+ * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
+ */
+int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
- /* Figure out how many options do we need to represent the ackvec */
- const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
- u16 len = av->av_vec_len + 2 * nr_opts, i;
- u32 elapsed_time;
- const unsigned char *tail, *from;
- unsigned char *to;
struct dccp_ackvec_record *avr;
- suseconds_t delta;
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
- return -1;
-
- delta = ktime_us_delta(ktime_get_real(), av->av_time);
- elapsed_time = delta / 10;
- if (elapsed_time != 0 &&
- dccp_insert_option_elapsed_time(skb, elapsed_time))
- return -1;
-
- avr = dccp_ackvec_record_new();
+ avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
if (avr == NULL)
- return -1;
-
- DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
- to = skb_push(skb, len);
- len = av->av_vec_len;
- from = av->av_buf + av->av_buf_head;
- tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
-
- for (i = 0; i < nr_opts; ++i) {
- int copylen = len;
-
- if (len > DCCP_SINGLE_OPT_MAXLEN)
- copylen = DCCP_SINGLE_OPT_MAXLEN;
-
- *to++ = DCCPO_ACK_VECTOR_0;
- *to++ = copylen + 2;
-
- /* Check if buf_head wraps */
- if (from + copylen > tail) {
- const u16 tailsize = tail - from;
-
- memcpy(to, from, tailsize);
- to += tailsize;
- len -= tailsize;
- copylen -= tailsize;
- from = av->av_buf;
- }
-
- memcpy(to, from, copylen);
- from += copylen;
- to += copylen;
- len -= copylen;
- }
+ return -ENOBUFS;
+ avr->avr_ack_seqno = seqno;
+ avr->avr_ack_ptr = av->av_buf_head;
+ avr->avr_ack_ackno = av->av_buf_ackno;
+ avr->avr_ack_nonce = nonce_sum;
+ avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
/*
- * From RFC 4340, A.2:
- *
- * For each acknowledgement it sends, the HC-Receiver will add an
- * acknowledgement record. ack_seqno will equal the HC-Receiver
- * sequence number it used for the ack packet; ack_ptr will equal
- * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
- * equal buf_nonce.
+ * When the buffer overflows, we keep no more than one record. This is
+ * the simplest way of disambiguating sender-Acks dating from before the
+ * overflow from sender-Acks which refer to after the overflow; a simple
+ * solution is preferable here since we are handling an exception.
*/
- avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
- avr->avr_ack_ptr = av->av_buf_head;
- avr->avr_ack_ackno = av->av_buf_ackno;
- avr->avr_ack_nonce = av->av_buf_nonce;
- avr->avr_sent_len = av->av_vec_len;
-
- dccp_ackvec_insert_avr(av, avr);
+ if (av->av_overflow)
+ dccp_ackvec_purge_records(av);
+ /*
+ * Since GSS is incremented for each packet, the list is automatically
+ * arranged in descending order of @ack_seqno.
+ */
+ list_add(&avr->avr_node, &av->av_records);
- dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
- "ack_ackno=%llu\n",
- dccp_role(sk), avr->avr_sent_len,
+ dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
(unsigned long long)avr->avr_ack_seqno,
- (unsigned long long)avr->avr_ack_ackno);
+ (unsigned long long)avr->avr_ack_ackno,
+ avr->avr_ack_runlen);
return 0;
}
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
+static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
+ const u64 ackno)
{
- struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
-
- if (av != NULL) {
- av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
- av->av_buf_ackno = UINT48_MAX + 1;
- av->av_buf_nonce = 0;
- av->av_time = ktime_set(0, 0);
- av->av_vec_len = 0;
- INIT_LIST_HEAD(&av->av_records);
+ struct dccp_ackvec_record *avr;
+ /*
+ * Exploit that records are inserted in descending order of sequence
+ * number, start with the oldest record first. If @ackno is `before'
+ * the earliest ack_ackno, the packet is too old to be considered.
+ */
+ list_for_each_entry_reverse(avr, av_list, avr_node) {
+ if (avr->avr_ack_seqno == ackno)
+ return avr;
+ if (before48(ackno, avr->avr_ack_seqno))
+ break;
}
-
- return av;
+ return NULL;
}
-void dccp_ackvec_free(struct dccp_ackvec *av)
+/*
+ * Buffer index and length computation using modulo-buffersize arithmetic.
+ * Note that, as pointers move from right to left, head is `before' tail.
+ */
+static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
{
- if (unlikely(av == NULL))
- return;
-
- if (!list_empty(&av->av_records)) {
- struct dccp_ackvec_record *avr, *next;
-
- list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
- list_del_init(&avr->avr_node);
- dccp_ackvec_record_delete(avr);
- }
- }
-
- kmem_cache_free(dccp_ackvec_slab, av);
+ return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
}
-static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
- const u32 index)
+static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
{
- return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
+ return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
}
-static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
- const u32 index)
+u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
{
- return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
+ if (unlikely(av->av_overflow))
+ return DCCPAV_MAX_ACKVEC_LEN;
+ return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
}
-/*
- * If several packets are missing, the HC-Receiver may prefer to enter multiple
- * bytes with run length 0, rather than a single byte with a larger run length;
- * this simplifies table updates if one of the missing packets arrives.
+/**
+ * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
+ * @av: non-empty buffer to update
+ * @distance: negative or zero distance of @seqno from buf_ackno downward
+ * @seqno: the (old) sequence number whose record is to be updated
+ * @state: state in which packet carrying @seqno was received
*/
-static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
- const unsigned int packets,
- const unsigned char state)
+static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
+ u64 seqno, enum dccp_ackvec_states state)
{
- long gap;
- long new_head;
+ u16 ptr = av->av_buf_head;
- if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
- return -ENOBUFS;
+ BUG_ON(distance > 0);
+ if (unlikely(dccp_ackvec_is_empty(av)))
+ return;
- gap = packets - 1;
- new_head = av->av_buf_head - packets;
+ do {
+ u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
- if (new_head < 0) {
- if (gap > 0) {
- memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
- gap + new_head + 1);
- gap = -new_head;
+ if (distance + runlen >= 0) {
+ /*
+ * Only update the state if packet has not been received
+ * yet. This is OK as per the second table in RFC 4340,
+ * 11.4.1; i.e. here we are using the following table:
+ * RECEIVED
+ * 0 1 3
+ * S +---+---+---+
+ * T 0 | 0 | 0 | 0 |
+ * O +---+---+---+
+ * R 1 | 1 | 1 | 1 |
+ * E +---+---+---+
+ * D 3 | 0 | 1 | 3 |
+ * +---+---+---+
+ * The "Not Received" state was set by reserve_seats().
+ */
+ if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
+ av->av_buf[ptr] = state;
+ else
+ dccp_pr_debug("Not changing %llu state to %u\n",
+ (unsigned long long)seqno, state);
+ break;
}
- new_head += DCCP_MAX_ACKVEC_LEN;
- }
- av->av_buf_head = new_head;
+ distance += runlen + 1;
+ ptr = __ackvec_idx_add(ptr, 1);
- if (gap > 0)
- memset(av->av_buf + av->av_buf_head + 1,
- DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
+ } while (ptr != av->av_buf_tail);
+}
- av->av_buf[av->av_buf_head] = state;
- av->av_vec_len += packets;
- return 0;
+/* Mark @num entries after buf_head as "Not yet received". */
+static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
+{
+ u16 start = __ackvec_idx_add(av->av_buf_head, 1),
+ len = DCCPAV_MAX_ACKVEC_LEN - start;
+
+ /* check for buffer wrap-around */
+ if (num > len) {
+ memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
+ start = 0;
+ num -= len;
+ }
+ if (num)
+ memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
}
-/*
- * Implements the RFC 4340, Appendix A
+/**
+ * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
+ * @av: container of buffer to update (can be empty or non-empty)
+ * @num_packets: number of packets to register (must be >= 1)
+ * @seqno: sequence number of the first packet in @num_packets
+ * @state: state in which packet carrying @seqno was received
*/
-int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
- const u64 ackno, const u8 state)
+static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
+ u64 seqno, enum dccp_ackvec_states state)
{
- /*
- * Check at the right places if the buffer is full, if it is, tell the
- * caller to start dropping packets till the HC-Sender acks our ACK
- * vectors, when we will free up space in av_buf.
- *
- * We may well decide to do buffer compression, etc, but for now lets
- * just drop.
- *
- * From Appendix A.1.1 (`New Packets'):
- *
- * Of course, the circular buffer may overflow, either when the
- * HC-Sender is sending data at a very high rate, when the
- * HC-Receiver's acknowledgements are not reaching the HC-Sender,
- * or when the HC-Sender is forgetting to acknowledge those acks
- * (so the HC-Receiver is unable to clean up old state). In this
- * case, the HC-Receiver should either compress the buffer (by
- * increasing run lengths when possible), transfer its state to
- * a larger buffer, or, as a last resort, drop all received
- * packets, without processing them whatsoever, until its buffer
- * shrinks again.
- */
+ u32 num_cells = num_packets;
- /* See if this is the first ackno being inserted */
- if (av->av_vec_len == 0) {
- av->av_buf[av->av_buf_head] = state;
- av->av_vec_len = 1;
- } else if (after48(ackno, av->av_buf_ackno)) {
- const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
+ if (num_packets > DCCPAV_BURST_THRESH) {
+ u32 lost_packets = num_packets - 1;
+ DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
/*
- * Look if the state of this packet is the same as the
- * previous ackno and if so if we can bump the head len.
+ * We received 1 packet and have a loss of size "num_packets-1"
+ * which we squeeze into num_cells-1 rather than reserving an
+ * entire byte for each lost packet.
+ * The reason is that the vector grows in O(burst_length); when
+ * it grows too large there will no room left for the payload.
+ * This is a trade-off: if a few packets out of the burst show
+ * up later, their state will not be changed; it is simply too
+ * costly to reshuffle/reallocate/copy the buffer each time.
+ * Should such problems persist, we will need to switch to a
+ * different underlying data structure.
*/
- if (delta == 1 &&
- dccp_ackvec_state(av, av->av_buf_head) == state &&
- dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
- av->av_buf[av->av_buf_head]++;
- else if (dccp_ackvec_set_buf_head_state(av, delta, state))
- return -ENOBUFS;
- } else {
- /*
- * A.1.2. Old Packets
- *
- * When a packet with Sequence Number S <= buf_ackno
- * arrives, the HC-Receiver will scan the table for
- * the byte corresponding to S. (Indexing structures
- * could reduce the complexity of this scan.)
- */
- u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
- u32 index = av->av_buf_head;
+ for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
+ u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
- while (1) {
- const u8 len = dccp_ackvec_len(av, index);
- const u8 av_state = dccp_ackvec_state(av, index);
- /*
- * valid packets not yet in av_buf have a reserved
- * entry, with a len equal to 0.
- */
- if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
- len == 0 && delta == 0) { /* Found our
- reserved seat! */
- dccp_pr_debug("Found %llu reserved seat!\n",
- (unsigned long long)ackno);
- av->av_buf[index] = state;
- goto out;
- }
- /* len == 0 means one packet */
- if (delta < len + 1)
- goto out_duplicate;
-
- delta -= len + 1;
- if (++index == DCCP_MAX_ACKVEC_LEN)
- index = 0;
+ av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
+ av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
+
+ lost_packets -= len;
}
}
- av->av_buf_ackno = ackno;
- av->av_time = ktime_get_real();
-out:
- return 0;
+ if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
+ DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
+ av->av_overflow = true;
+ }
+
+ av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
+ if (av->av_overflow)
+ av->av_buf_tail = av->av_buf_head;
-out_duplicate:
- /* Duplicate packet */
- dccp_pr_debug("Received a dup or already considered lost "
- "packet: %llu\n", (unsigned long long)ackno);
- return -EILSEQ;
+ av->av_buf[av->av_buf_head] = state;
+ av->av_buf_ackno = seqno;
+
+ if (num_packets > 1)
+ dccp_ackvec_reserve_seats(av, num_packets - 1);
}
-static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
- struct dccp_ackvec_record *avr)
+/**
+ * dccp_ackvec_input - Register incoming packet in the buffer
+ */
+void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
{
- struct dccp_ackvec_record *next;
+ u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+ enum dccp_ackvec_states state = DCCPAV_RECEIVED;
- /* sort out vector length */
- if (av->av_buf_head <= avr->avr_ack_ptr)
- av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
- else
- av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
- av->av_buf_head + avr->avr_ack_ptr;
+ if (dccp_ackvec_is_empty(av)) {
+ dccp_ackvec_add_new(av, 1, seqno, state);
+ av->av_tail_ackno = seqno;
- /* free records */
- list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
- list_del_init(&avr->avr_node);
- dccp_ackvec_record_delete(avr);
- }
-}
+ } else {
+ s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
+ u8 *current_head = av->av_buf + av->av_buf_head;
-void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
- const u64 ackno)
-{
- struct dccp_ackvec_record *avr;
+ if (num_packets == 1 &&
+ dccp_ackvec_state(current_head) == state &&
+ dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
- /*
- * If we traverse backwards, it should be faster when we have large
- * windows. We will be receiving ACKs for stuff we sent a while back
- * -sorbo.
- */
- list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
- if (ackno == avr->avr_ack_seqno) {
- dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
- "ack_ackno=%llu, ACKED!\n",
- dccp_role(sk), 1,
- (unsigned long long)avr->avr_ack_seqno,
- (unsigned long long)avr->avr_ack_ackno);
- dccp_ackvec_throw_record(av, avr);
- break;
- } else if (avr->avr_ack_seqno > ackno)
- break; /* old news */
+ *current_head += 1;
+ av->av_buf_ackno = seqno;
+
+ } else if (num_packets > 0) {
+ dccp_ackvec_add_new(av, num_packets, seqno, state);
+ } else {
+ dccp_ackvec_update_old(av, num_packets, seqno, state);
+ }
}
}
-static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
- struct sock *sk, u64 *ackno,
- const unsigned char len,
- const unsigned char *vector)
+/**
+ * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
+ * This routine is called when the peer acknowledges the receipt of Ack Vectors
+ * up to and including @ackno. While based on on section A.3 of RFC 4340, here
+ * are additional precautions to prevent corrupted buffer state. In particular,
+ * we use tail_ackno to identify outdated records; it always marks the earliest
+ * packet of group (2) in 11.4.2.
+ */
+void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
{
- unsigned char i;
- struct dccp_ackvec_record *avr;
+ struct dccp_ackvec_record *avr, *next;
+ u8 runlen_now, eff_runlen;
+ s64 delta;
- /* Check if we actually sent an ACK vector */
- if (list_empty(&av->av_records))
+ avr = dccp_ackvec_lookup(&av->av_records, ackno);
+ if (avr == NULL)
return;
+ /*
+ * Deal with outdated acknowledgments: this arises when e.g. there are
+ * several old records and the acks from the peer come in slowly. In
+ * that case we may still have records that pre-date tail_ackno.
+ */
+ delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
+ if (delta < 0)
+ goto free_records;
+ /*
+ * Deal with overlapping Ack Vectors: don't subtract more than the
+ * number of packets between tail_ackno and ack_ackno.
+ */
+ eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
- i = len;
+ runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
/*
- * XXX
- * I think it might be more efficient to work backwards. See comment on
- * rcv_ackno. -sorbo.
+ * The run length of Ack Vector cells does not decrease over time. If
+ * the run length is the same as at the time the Ack Vector was sent, we
+ * free the ack_ptr cell. That cell can however not be freed if the run
+ * length has increased: in this case we need to move the tail pointer
+ * backwards (towards higher indices), to its next-oldest neighbour.
*/
- avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
- while (i--) {
- const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
- u64 ackno_end_rl;
+ if (runlen_now > eff_runlen) {
- dccp_set_seqno(&ackno_end_rl, *ackno - rl);
+ av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
+ av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
+ /* This move may not have cleared the overflow flag. */
+ if (av->av_overflow)
+ av->av_overflow = (av->av_buf_head == av->av_buf_tail);
+ } else {
+ av->av_buf_tail = avr->avr_ack_ptr;
/*
- * If our AVR sequence number is greater than the ack, go
- * forward in the AVR list until it is not so.
+ * We have made sure that avr points to a valid cell within the
+ * buffer. This cell is either older than head, or equals head
+ * (empty buffer): in both cases we no longer have any overflow.
*/
- list_for_each_entry_from(avr, &av->av_records, avr_node) {
- if (!after48(avr->avr_ack_seqno, *ackno))
- goto found;
- }
- /* End of the av_records list, not found, exit */
- break;
-found:
- if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
- const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
- if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
- dccp_pr_debug("%s ACK vector 0, len=%d, "
- "ack_seqno=%llu, ack_ackno=%llu, "
- "ACKED!\n",
- dccp_role(sk), len,
- (unsigned long long)
- avr->avr_ack_seqno,
- (unsigned long long)
- avr->avr_ack_ackno);
- dccp_ackvec_throw_record(av, avr);
- break;
- }
- /*
- * If it wasn't received, continue scanning... we might
- * find another one.
- */
- }
+ av->av_overflow = 0;
+ }
- dccp_set_seqno(ackno, ackno_end_rl - 1);
- ++vector;
+ /*
+ * The peer has acknowledged up to and including ack_ackno. Hence the
+ * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
+ */
+ av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
+
+free_records:
+ list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
+ list_del(&avr->avr_node);
+ kmem_cache_free(dccp_ackvec_record_slab, avr);
}
}
-int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- u64 *ackno, const u8 opt, const u8 *value, const u8 len)
+/*
+ * Routines to keep track of Ack Vectors received in an skb
+ */
+int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
{
- if (len > DCCP_SINGLE_OPT_MAXLEN)
- return -1;
+ struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
+
+ if (new == NULL)
+ return -ENOBUFS;
+ new->vec = vec;
+ new->len = len;
+ new->nonce = nonce;
- /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
- dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
- ackno, len, value);
+ list_add_tail(&new->node, head);
return 0;
}
+EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
+
+void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
+{
+ struct dccp_ackvec_parsed *cur, *next;
+
+ list_for_each_entry_safe(cur, next, parsed_chunks, node)
+ kfree(cur);
+ INIT_LIST_HEAD(parsed_chunks);
+}
+EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
int __init dccp_ackvec_init(void)
{
@@ -448,10 +379,9 @@ int __init dccp_ackvec_init(void)
if (dccp_ackvec_slab == NULL)
goto out_err;
- dccp_ackvec_record_slab =
- kmem_cache_create("dccp_ackvec_record",
- sizeof(struct dccp_ackvec_record),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
+ sizeof(struct dccp_ackvec_record),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
if (dccp_ackvec_record_slab == NULL)
goto out_destroy_slab;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 7ea557b7c6b..e2ab0627a5f 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -3,9 +3,9 @@
/*
* net/dccp/ackvec.h
*
- * An implementation of the DCCP protocol
+ * An implementation of Ack Vectors for the DCCP protocol
+ * Copyright (c) 2007 University of Aberdeen, Scotland, UK
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -13,99 +13,124 @@
#include <linux/dccp.h>
#include <linux/compiler.h>
-#include <linux/ktime.h>
#include <linux/list.h>
#include <linux/types.h>
-/* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
+/*
+ * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
+ * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
+ * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
+ * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ * The maximum value is bounded by the u16 types for indices and functions.
+ */
+#define DCCPAV_NUM_ACKVECS 2
+#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
/* Estimated minimum average Ack Vector length - used for updating MPS */
#define DCCPAV_MIN_OPTLEN 16
-#define DCCP_ACKVEC_STATE_RECEIVED 0
-#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
-#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
+/* Threshold for coping with large bursts of losses */
+#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8)
-#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */
-#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */
+enum dccp_ackvec_states {
+ DCCPAV_RECEIVED = 0x00,
+ DCCPAV_ECN_MARKED = 0x40,
+ DCCPAV_RESERVED = 0x80,
+ DCCPAV_NOT_RECEIVED = 0xC0
+};
+#define DCCPAV_MAX_RUNLEN 0x3F
-/** struct dccp_ackvec - ack vector
- *
- * This data structure is the one defined in RFC 4340, Appendix A.
- *
- * @av_buf_head - circular buffer head
- * @av_buf_tail - circular buffer tail
- * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
- * buffer (i.e. %av_buf_head)
- * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
- * by the buffer with State 0
- *
- * Additionally, the HC-Receiver must keep some information about the
- * Ack Vectors it has recently sent. For each packet sent carrying an
- * Ack Vector, it remembers four variables:
+static inline u8 dccp_ackvec_runlen(const u8 *cell)
+{
+ return *cell & DCCPAV_MAX_RUNLEN;
+}
+
+static inline u8 dccp_ackvec_state(const u8 *cell)
+{
+ return *cell & ~DCCPAV_MAX_RUNLEN;
+}
+
+/** struct dccp_ackvec - Ack Vector main data structure
*
- * @av_records - list of dccp_ackvec_record
- * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ * This implements a fixed-size circular buffer within an array and is largely
+ * based on Appendix A of RFC 4340.
*
- * @av_time - the time in usecs
- * @av_buf - circular buffer of acknowledgeable packets
+ * @av_buf: circular buffer storage area
+ * @av_buf_head: head index; begin of live portion in @av_buf
+ * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf
+ * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to
+ * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound
+ * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously)
*/
struct dccp_ackvec {
- u64 av_buf_ackno;
- struct list_head av_records;
- ktime_t av_time;
+ u8 av_buf[DCCPAV_MAX_ACKVEC_LEN];
u16 av_buf_head;
- u16 av_vec_len;
- u8 av_buf_nonce;
- u8 av_ack_nonce;
- u8 av_buf[DCCP_MAX_ACKVEC_LEN];
+ u16 av_buf_tail;
+ u64 av_buf_ackno:48;
+ u64 av_tail_ackno:48;
+ bool av_buf_nonce[DCCPAV_NUM_ACKVECS];
+ u8 av_overflow:1;
+ struct list_head av_records;
};
-/** struct dccp_ackvec_record - ack vector record
+/** struct dccp_ackvec_record - Records information about sent Ack Vectors
*
- * ACK vector record as defined in Appendix A of spec.
+ * These list entries define the additional information which the HC-Receiver
+ * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
*
- * The list is sorted by avr_ack_seqno
+ * @avr_node: the list node in @av_records
+ * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on
+ * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to
+ * @avr_ack_ptr: pointer into @av_buf where this record starts
+ * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
+ * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent
*
- * @avr_node - node in av_records
- * @avr_ack_seqno - sequence number of the packet this record was sent on
- * @avr_ack_ackno - sequence number being acknowledged
- * @avr_ack_ptr - pointer into av_buf where this record starts
- * @avr_ack_nonce - av_ack_nonce at the time this record was sent
- * @avr_sent_len - lenght of the record in av_buf
+ * The list as a whole is sorted in descending order by @avr_ack_seqno.
*/
struct dccp_ackvec_record {
struct list_head avr_node;
- u64 avr_ack_seqno;
- u64 avr_ack_ackno;
+ u64 avr_ack_seqno:48;
+ u64 avr_ack_ackno:48;
u16 avr_ack_ptr;
- u16 avr_sent_len;
- u8 avr_ack_nonce;
+ u8 avr_ack_runlen;
+ u8 avr_ack_nonce:1;
};
-struct sock;
-struct sk_buff;
-
extern int dccp_ackvec_init(void);
extern void dccp_ackvec_exit(void);
extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
extern void dccp_ackvec_free(struct dccp_ackvec *av);
-extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
- const u64 ackno, const u8 state);
-
-extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
- struct sock *sk, const u64 ackno);
-extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
- u64 *ackno, const u8 opt,
- const u8 *value, const u8 len);
+extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
+extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
+extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
-extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
-
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
{
- return av->av_vec_len;
+ return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
}
+
+/**
+ * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb
+ * @vec: start of vector (offset into skb)
+ * @len: length of @vec
+ * @nonce: whether @vec had an ECN nonce of 0 or 1
+ * @node: FIFO - arranged in descending order of ack_ackno
+ * This structure is used by CCIDs to access Ack Vectors in a received skb.
+ */
+struct dccp_ackvec_parsed {
+ u8 *vec,
+ len,
+ nonce:1;
+ struct list_head node;
+};
+
+extern int dccp_ackvec_parsed_add(struct list_head *head,
+ u8 *vec, u8 len, u8 nonce);
+extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 6576eae9e77..e96d5e81003 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
#endif
}
-/* XXX Lame code duplication!
- * returns -1 if none was found.
- * else returns the next offset to use in the function call.
- */
-static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
- unsigned char **vec, unsigned char *veclen)
-{
- const struct dccp_hdr *dh = dccp_hdr(skb);
- unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
- unsigned char *opt_ptr;
- const unsigned char *opt_end = (unsigned char *)dh +
- (dh->dccph_doff * 4);
- unsigned char opt, len;
- unsigned char *value;
-
- BUG_ON(offset < 0);
- options += offset;
- opt_ptr = options;
- if (opt_ptr >= opt_end)
- return -1;
-
- while (opt_ptr != opt_end) {
- opt = *opt_ptr++;
- len = 0;
- value = NULL;
-
- /* Check if this isn't a single byte option */
- if (opt > DCCPO_MAX_RESERVED) {
- if (opt_ptr == opt_end)
- goto out_invalid_option;
-
- len = *opt_ptr++;
- if (len < 3)
- goto out_invalid_option;
- /*
- * Remove the type and len fields, leaving
- * just the value size
- */
- len -= 2;
- value = opt_ptr;
- opt_ptr += len;
-
- if (opt_ptr > opt_end)
- goto out_invalid_option;
- }
-
- switch (opt) {
- case DCCPO_ACK_VECTOR_0:
- case DCCPO_ACK_VECTOR_1:
- *vec = value;
- *veclen = len;
- return offset + (opt_ptr - options);
- }
- }
-
- return -1;
-
-out_invalid_option:
- DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
- return -1;
-}
-
/**
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
* This code is almost identical with TCP's tcp_rtt_estimator(), since
@@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
}
+static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
+ u8 option, u8 *optval, u8 optlen)
+{
+ struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+
+ switch (option) {
+ case DCCPO_ACK_VECTOR_0:
+ case DCCPO_ACK_VECTOR_1:
+ return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
+ option - DCCPO_ACK_VECTOR_0);
+ }
+ return 0;
+}
+
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
+ struct dccp_ackvec_parsed *avp;
u64 ackno, seqno;
struct ccid2_seq *seqp;
- unsigned char *vector;
- unsigned char veclen;
- int offset = 0;
int done = 0;
unsigned int maxincr = 0;
@@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
/* check forward path congestion */
- /* still didn't send out new data packets */
- if (hc->tx_seqh == hc->tx_seqt)
+ if (dccp_packet_without_ack(skb))
return;
- switch (DCCP_SKB_CB(skb)->dccpd_type) {
- case DCCP_PKT_ACK:
- case DCCP_PKT_DATAACK:
- break;
- default:
- return;
- }
+ /* still didn't send out new data packets */
+ if (hc->tx_seqh == hc->tx_seqt)
+ goto done;
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
if (after48(ackno, hc->tx_high_ack))
@@ -509,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
/* go through all ack vectors */
- while ((offset = ccid2_ackvector(sk, skb, offset,
- &vector, &veclen)) != -1) {
+ list_for_each_entry(avp, &hc->tx_av_chunks, node) {
/* go through this ack vector */
- while (veclen--) {
- const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
- u64 ackno_end_rl = SUB48(ackno, rl);
+ for (; avp->len--; avp->vec++) {
+ u64 ackno_end_rl = SUB48(ackno,
+ dccp_ackvec_runlen(avp->vec));
- ccid2_pr_debug("ackvec start:%llu end:%llu\n",
+ ccid2_pr_debug("ackvec %llu |%u,%u|\n",
(unsigned long long)ackno,
- (unsigned long long)ackno_end_rl);
+ dccp_ackvec_state(avp->vec) >> 6,
+ dccp_ackvec_runlen(avp->vec));
/* if the seqno we are analyzing is larger than the
* current ackno, then move towards the tail of our
* seqnos.
@@ -537,17 +482,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* run length
*/
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
- const u8 state = *vector &
- DCCP_ACKVEC_STATE_MASK;
+ const u8 state = dccp_ackvec_state(avp->vec);
/* new packet received or marked */
- if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+ if (state != DCCPAV_NOT_RECEIVED &&
!seqp->ccid2s_acked) {
- if (state ==
- DCCP_ACKVEC_STATE_ECN_MARKED) {
+ if (state == DCCPAV_ECN_MARKED)
ccid2_congestion_event(sk,
seqp);
- } else
+ else
ccid2_new_ack(sk, seqp,
&maxincr);
@@ -566,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
break;
ackno = SUB48(ackno_end_rl, 1);
- vector++;
}
if (done)
break;
@@ -634,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
sk_stop_timer(sk, &hc->tx_rtotimer);
else
sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-
+done:
/* check if incoming Acks allow pending packets to be sent */
if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -666,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_last_cong = ccid2_time_stamp;
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
+ INIT_LIST_HEAD(&hc->tx_av_chunks);
return 0;
}
@@ -699,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
struct ccid_operations ccid2_ops = {
- .ccid_id = DCCPC_CCID2,
- .ccid_name = "TCP-like",
- .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
- .ccid_hc_tx_init = ccid2_hc_tx_init,
- .ccid_hc_tx_exit = ccid2_hc_tx_exit,
- .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
- .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
- .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
- .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
- .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
+ .ccid_id = DCCPC_CCID2,
+ .ccid_name = "TCP-like",
+ .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
+ .ccid_hc_tx_init = ccid2_hc_tx_init,
+ .ccid_hc_tx_exit = ccid2_hc_tx_exit,
+ .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
+ .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
+ .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
+ .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
+ .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
+ .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
};
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 25cb6b216ed..e9985dafc2c 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -55,6 +55,7 @@ struct ccid2_seq {
* @tx_rtt_seq: to decay RTTVAR at most once per flight
* @tx_rpseq: last consecutive seqno
* @tx_rpdupack: dupacks since rpseq
+ * @tx_av_chunks: list of Ack Vectors received on current skb
*/
struct ccid2_hc_tx_sock {
u32 tx_cwnd;
@@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock {
int tx_rpdupack;
u32 tx_last_cong;
u64 tx_high_ack;
+ struct list_head tx_av_chunks;
};
static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a8ed459508b..19fafd59746 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -457,12 +457,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
dp->dccps_awh = dp->dccps_gss;
}
+static inline int dccp_ackvec_pending(const struct sock *sk)
+{
+ return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
+ !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
+}
+
static inline int dccp_ack_pending(const struct sock *sk)
{
- const struct dccp_sock *dp = dccp_sk(sk);
- return (dp->dccps_hc_rx_ackvec != NULL &&
- dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
- inet_csk_ack_scheduled(sk);
+ return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
}
extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa..7d230d14ce2 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
}
-static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
{
- struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
- if (dp->dccps_hc_rx_ackvec != NULL)
- dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ if (av == NULL)
+ return;
+ if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+ dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ dccp_ackvec_input(av, skb);
}
static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
@@ -365,22 +367,13 @@ discard:
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len)
{
- struct dccp_sock *dp = dccp_sk(sk);
-
if (dccp_check_seqno(sk, skb))
goto discard;
if (dccp_parse_options(sk, NULL, skb))
return 1;
- if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
- dccp_event_ack_recv(sk, skb);
-
- if (dp->dccps_hc_rx_ackvec != NULL &&
- dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_ACKVEC_STATE_RECEIVED))
- goto discard;
+ dccp_handle_ackvec_processing(sk, skb);
dccp_deliver_input_to_ccids(sk, skb);
return __dccp_rcv_established(sk, skb, dh, len);
@@ -632,15 +625,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dccp_parse_options(sk, NULL, skb))
return 1;
- if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
- dccp_event_ack_recv(sk, skb);
-
- if (dp->dccps_hc_rx_ackvec != NULL &&
- dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
- DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_ACKVEC_STATE_RECEIVED))
- goto discard;
-
+ dccp_handle_ackvec_processing(sk, skb);
dccp_deliver_input_to_ccids(sk, skb);
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3f69ea11482..45a434f9416 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
{
struct rtable *rt;
struct flowi fl = { .oif = skb_rtable(skb)->rt_iif,
- .nl_u = { .ip4_u =
- { .daddr = ip_hdr(skb)->saddr,
- .saddr = ip_hdr(skb)->daddr,
- .tos = RT_CONN_FLAGS(sk) } },
+ .fl4_dst = ip_hdr(skb)->saddr,
+ .fl4_src = ip_hdr(skb)->daddr,
+ .fl4_tos = RT_CONN_FLAGS(sk),
.proto = sk->sk_protocol,
- .uli_u = { .ports =
- { .sport = dccp_hdr(skb)->dccph_dport,
- .dport = dccp_hdr(skb)->dccph_sport }
- }
+ .fl_ip_sport = dccp_hdr(skb)->dccph_dport,
+ .fl_ip_dport = dccp_hdr(skb)->dccph_sport
};
security_skb_classify_flow(skb, &fl);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index cd306181300..f06ffcfc8d7 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
struct dccp_sock *dp = dccp_sk(sk);
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
- u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh +
@@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
if (rc)
goto out_featneg_failed;
break;
- case DCCPO_ACK_VECTOR_0:
- case DCCPO_ACK_VECTOR_1:
- if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
- break;
- if (dp->dccps_hc_rx_ackvec != NULL &&
- dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
- goto out_invalid_option;
- break;
case DCCPO_TIMESTAMP:
if (len != 4)
goto out_invalid_option;
@@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
pkt_type, opt, value, len))
goto out_invalid_option;
break;
+ case DCCPO_ACK_VECTOR_0:
+ case DCCPO_ACK_VECTOR_1:
+ if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
+ break;
+ /*
+ * Ack vectors are processed by the TX CCID if it is
+ * interested. The RX CCID need not parse Ack Vectors,
+ * since it is only interested in clearing old state.
+ * Fall through.
+ */
case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
pkt_type, opt, value, len))
@@ -340,6 +341,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
}
+/* FIXME: This function is currently not used anywhere */
int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
{
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
@@ -424,6 +426,83 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
return 0;
}
+static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+ struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+ const u16 buflen = dccp_ackvec_buflen(av);
+ /* Figure out how many options do we need to represent the ackvec */
+ const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
+ u16 len = buflen + 2 * nr_opts;
+ u8 i, nonce = 0;
+ const unsigned char *tail, *from;
+ unsigned char *to;
+
+ if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+ DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
+ dccp_packet_name(dcb->dccpd_type));
+ return -1;
+ }
+ /*
+ * Since Ack Vectors are variable-length, we can not always predict
+ * their size. To catch exception cases where the space is running out
+ * on the skb, a separate Sync is scheduled to carry the Ack Vector.
+ */
+ if (len > DCCPAV_MIN_OPTLEN &&
+ len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
+ DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
+ "MPS=%u ==> reduce payload size?\n", len, skb->len,
+ dcb->dccpd_opt_len, dp->dccps_mss_cache);
+ dp->dccps_sync_scheduled = 1;
+ return 0;
+ }
+ dcb->dccpd_opt_len += len;
+
+ to = skb_push(skb, len);
+ len = buflen;
+ from = av->av_buf + av->av_buf_head;
+ tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
+
+ for (i = 0; i < nr_opts; ++i) {
+ int copylen = len;
+
+ if (len > DCCP_SINGLE_OPT_MAXLEN)
+ copylen = DCCP_SINGLE_OPT_MAXLEN;
+
+ /*
+ * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
+ * its type; ack_nonce is the sum of all individual buf_nonce's.
+ */
+ nonce ^= av->av_buf_nonce[i];
+
+ *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
+ *to++ = copylen + 2;
+
+ /* Check if buf_head wraps */
+ if (from + copylen > tail) {
+ const u16 tailsize = tail - from;
+
+ memcpy(to, from, tailsize);
+ to += tailsize;
+ len -= tailsize;
+ copylen -= tailsize;
+ from = av->av_buf;
+ }
+
+ memcpy(to, from, copylen);
+ from += copylen;
+ to += copylen;
+ len -= copylen;
+ }
+ /*
+ * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
+ */
+ if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
+ return -ENOBUFS;
+ return 0;
+}
+
/**
* dccp_insert_option_mandatory - Mandatory option (5.8.2)
* Note that since we are using skb_push, this function needs to be called
@@ -519,8 +598,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
if (dccp_insert_option_timestamp(skb))
return -1;
- } else if (dp->dccps_hc_rx_ackvec != NULL &&
- dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+ } else if (dccp_ackvec_pending(sk) &&
dccp_insert_option_ackvec(sk, skb)) {
return -1;
}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 45b91853f5a..d96dd9d362a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk)
* any local drop will eventually be reported via receiver feedback.
*/
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+
+ /*
+ * If the CCID needs to transfer additional header options out-of-band
+ * (e.g. Ack Vectors or feature-negotiation options), it activates this
+ * flag to schedule a Sync. The Sync will automatically incorporate all
+ * currently pending header options, thus clearing the backlog.
+ */
+ if (dp->dccps_sync_scheduled)
+ dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
}
/**
@@ -636,6 +645,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
+ /*
+ * Clear the flag in case the Sync was scheduled for out-of-band data,
+ * such as carrying a long Ack Vector.
+ */
+ dccp_sk(sk)->dccps_sync_scheduled = 0;
+
dccp_transmit_skb(sk, skb);
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790..9ecef9968c3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
static DEFINE_RWLOCK(dn_hash_lock);
static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
-static atomic_t decnet_memory_allocated;
+static atomic_long_t decnet_memory_allocated;
static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -1848,7 +1848,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu)
{
unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER;
if (dev) {
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
mtu -= LL_RESERVED_SPACE(dev);
if (dn_db->use_long)
mtu -= 21;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4c409b46aa3..0ba15633c41 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
if (table->extra1 == NULL)
return -EINVAL;
- dn_db = dev->dn_ptr;
+ dn_db = rcu_dereference_raw(dev->dn_ptr);
old = dn_db->parms.forwarding;
err = proc_dointvec(table, write, buffer, lenp, ppos);
@@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
return ifa;
}
-static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa)
+static void dn_dev_free_ifa_rcu(struct rcu_head *head)
{
- kfree(ifa);
+ kfree(container_of(head, struct dn_ifaddr, rcu));
}
-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy)
+static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
{
- struct dn_ifaddr *ifa1 = *ifap;
+ call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu);
+}
+
+static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
+{
+ struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
unsigned char mac_addr[6];
struct net_device *dev = dn_db->dev;
@@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
ASSERT_RTNL();
/* Check for duplicates */
- for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+ for (ifa1 = rtnl_dereference(dn_db->ifa_list);
+ ifa1 != NULL;
+ ifa1 = rtnl_dereference(ifa1->ifa_next)) {
if (ifa1->ifa_local == ifa->ifa_local)
return -EEXIST;
}
@@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
}
ifa->ifa_next = dn_db->ifa_list;
- dn_db->ifa_list = ifa;
+ rcu_assign_pointer(dn_db->ifa_list, ifa);
dn_ifaddr_notify(RTM_NEWADDR, ifa);
blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
{
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
int rv;
if (dn_db == NULL) {
@@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
struct dn_dev *dn_db;
struct net_device *dev;
- struct dn_ifaddr *ifa = NULL, **ifap = NULL;
+ struct dn_ifaddr *ifa = NULL;
+ struct dn_ifaddr __rcu **ifap = NULL;
int ret = 0;
if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
@@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
goto done;
}
- if ((dn_db = dev->dn_ptr) != NULL) {
- for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next)
+ if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
+ for (ifap = &dn_db->ifa_list;
+ (ifa = rtnl_dereference(*ifap)) != NULL;
+ ifap = &ifa->ifa_next)
if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
break;
}
@@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex)
dev = __dev_get_by_index(&init_net, ifindex);
if (dev)
- dn_dev = dev->dn_ptr;
+ dn_dev = rtnl_dereference(dev->dn_ptr);
return dn_dev;
}
@@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct nlattr *tb[IFA_MAX+1];
struct dn_dev *dn_db;
struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa, **ifap;
+ struct dn_ifaddr *ifa;
+ struct dn_ifaddr __rcu **ifap;
int err = -EINVAL;
if (!net_eq(net, &init_net))
@@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
goto errout;
err = -EADDRNOTAVAIL;
- for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) {
+ for (ifap = &dn_db->ifa_list;
+ (ifa = rtnl_dereference(*ifap)) != NULL;
+ ifap = &ifa->ifa_next) {
if (tb[IFA_LOCAL] &&
nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
continue;
@@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
return -ENODEV;
- if ((dn_db = dev->dn_ptr) == NULL) {
+ if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
dn_db = dn_dev_create(dev, &err);
if (!dn_db)
return err;
@@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
skip_naddr = 0;
}
- if ((dn_db = dev->dn_ptr) == NULL)
+ if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
goto cont;
- for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
- ifa = ifa->ifa_next, dn_idx++) {
+ for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
+ ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
if (dn_idx < skip_naddr)
continue;
@@ -773,21 +786,22 @@ done:
static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
{
- struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+ struct dn_dev *dn_db;
struct dn_ifaddr *ifa;
int rv = -ENODEV;
+ rcu_read_lock();
+ dn_db = rcu_dereference(dev->dn_ptr);
if (dn_db == NULL)
goto out;
- rtnl_lock();
- ifa = dn_db->ifa_list;
+ ifa = rcu_dereference(dn_db->ifa_list);
if (ifa != NULL) {
*addr = ifa->ifa_local;
rv = 0;
}
- rtnl_unlock();
out:
+ rcu_read_unlock();
return rv;
}
@@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
struct endnode_hello_message *msg;
struct sk_buff *skb = NULL;
__le16 *pktlen;
- struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
return;
@@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn
static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
{
int n;
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
struct sk_buff *skb;
size_t size;
@@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
{
- struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
if (dn_db->parms.forwarding == 0)
dn_send_endnode_hello(dev, ifa);
@@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
static int dn_eth_up(struct net_device *dev)
{
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
if (dn_db->parms.forwarding == 0)
dev_mc_add(dev, dn_rt_all_end_mcast);
@@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev)
static void dn_eth_down(struct net_device *dev)
{
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
if (dn_db->parms.forwarding == 0)
dev_mc_del(dev, dn_rt_all_end_mcast);
@@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev);
static void dn_dev_timer_func(unsigned long arg)
{
struct net_device *dev = (struct net_device *)arg;
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db;
struct dn_ifaddr *ifa;
+ rcu_read_lock();
+ dn_db = rcu_dereference(dev->dn_ptr);
if (dn_db->t3 <= dn_db->parms.t2) {
if (dn_db->parms.timer3) {
- for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+ for (ifa = rcu_dereference(dn_db->ifa_list);
+ ifa;
+ ifa = rcu_dereference(ifa->ifa_next)) {
if (!(ifa->ifa_flags & IFA_F_SECONDARY))
dn_db->parms.timer3(dev, ifa);
}
@@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg)
} else {
dn_db->t3 -= dn_db->parms.t2;
}
-
+ rcu_read_unlock();
dn_dev_set_timer(dev);
}
static void dn_dev_set_timer(struct net_device *dev)
{
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
if (dn_db->parms.t2 > dn_db->parms.t3)
dn_db->parms.t2 = dn_db->parms.t3;
@@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
return NULL;
memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
- smp_wmb();
- dev->dn_ptr = dn_db;
+
+ rcu_assign_pointer(dev->dn_ptr, dn_db);
dn_db->dev = dev;
init_timer(&dn_db->timer);
@@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
if (!dn_db->neigh_parms) {
- dev->dn_ptr = NULL;
+ rcu_assign_pointer(dev->dn_ptr, NULL);
kfree(dn_db);
return NULL;
}
@@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev)
struct dn_ifaddr *ifa;
__le16 addr = decnet_address;
int maybe_default = 0;
- struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+ struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
return;
@@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev)
static void dn_dev_delete(struct net_device *dev)
{
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
if (dn_db == NULL)
return;
@@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev)
void dn_dev_down(struct net_device *dev)
{
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
struct dn_ifaddr *ifa;
if (dn_db == NULL)
return;
- while((ifa = dn_db->ifa_list) != NULL) {
+ while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
dn_dev_free_ifa(ifa);
}
@@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev)
}
static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(rcu)
+ __acquires(RCU)
{
int i;
struct net_device *dev;
@@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void dn_dev_seq_stop(struct seq_file *seq, void *v)
- __releases(rcu)
+ __releases(RCU)
{
rcu_read_unlock();
}
@@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
struct net_device *dev = v;
char peer_buf[DN_ASCBUF_LEN];
char router_buf[DN_ASCBUF_LEN];
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
" %04hu %03d %02x %-10s %-7s %-7s\n",
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 4ab96c15166..0ef0a81bcd7 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
/* Scan device list */
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
- dn_db = dev->dn_ptr;
+ dn_db = rcu_dereference(dev->dn_ptr);
if (dn_db == NULL)
continue;
- for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) {
+ for (ifa2 = rcu_dereference(dn_db->ifa_list);
+ ifa2 != NULL;
+ ifa2 = rcu_dereference(ifa2->ifa_next)) {
if (ifa2->ifa_local == ifa->ifa_local) {
found_it = 1;
break;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index a085dbcf5c7..602dade7e9a 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
write_lock(&neigh->lock);
neigh->used = jiffies;
- dn_db = (struct dn_dev *)neigh->dev->dn_ptr;
+ dn_db = rcu_dereference(neigh->dev->dn_ptr);
if (!(neigh->nud_state & NUD_PERMANENT)) {
neigh->updated = jiffies;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index df0f3e54ff8..8280e43c886 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -93,7 +93,7 @@
struct dn_rt_hash_bucket
{
- struct dn_route *chain;
+ struct dn_route __rcu *chain;
spinlock_t lock;
};
@@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt)
static void dn_dst_check_expire(unsigned long dummy)
{
int i;
- struct dn_route *rt, **rtp;
+ struct dn_route *rt;
+ struct dn_route __rcu **rtp;
unsigned long now = jiffies;
unsigned long expire = 120 * HZ;
- for(i = 0; i <= dn_rt_hash_mask; i++) {
+ for (i = 0; i <= dn_rt_hash_mask; i++) {
rtp = &dn_rt_hash_table[i].chain;
spin_lock(&dn_rt_hash_table[i].lock);
- while((rt=*rtp) != NULL) {
+ while ((rt = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) ||
(now - rt->dst.lastuse) < expire) {
rtp = &rt->dst.dn_next;
@@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy)
static int dn_dst_gc(struct dst_ops *ops)
{
- struct dn_route *rt, **rtp;
+ struct dn_route *rt;
+ struct dn_route __rcu **rtp;
int i;
unsigned long now = jiffies;
unsigned long expire = 10 * HZ;
- for(i = 0; i <= dn_rt_hash_mask; i++) {
+ for (i = 0; i <= dn_rt_hash_mask; i++) {
spin_lock_bh(&dn_rt_hash_table[i].lock);
rtp = &dn_rt_hash_table[i].chain;
- while((rt=*rtp) != NULL) {
+ while ((rt = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) ||
(now - rt->dst.lastuse) < expire) {
rtp = &rt->dst.dn_next;
@@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
{
u32 min_mtu = 230;
struct dn_dev *dn = dst->neighbour ?
- (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL;
+ rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL;
if (dn && dn->use_long == 0)
min_mtu -= 6;
@@ -267,23 +271,25 @@ static void dn_dst_link_failure(struct sk_buff *skb)
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
{
- return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) |
- (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) |
+ return ((fl1->fld_dst ^ fl2->fld_dst) |
+ (fl1->fld_src ^ fl2->fld_src) |
(fl1->mark ^ fl2->mark) |
- (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) |
+ (fl1->fld_scope ^ fl2->fld_scope) |
(fl1->oif ^ fl2->oif) |
(fl1->iif ^ fl2->iif)) == 0;
}
static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
{
- struct dn_route *rth, **rthp;
+ struct dn_route *rth;
+ struct dn_route __rcu **rthp;
unsigned long now = jiffies;
rthp = &dn_rt_hash_table[hash].chain;
spin_lock_bh(&dn_rt_hash_table[hash].lock);
- while((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
if (compare_keys(&rth->fl, &rt->fl)) {
/* Put it first */
*rthp = rth->dst.dn_next;
@@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy)
int i;
struct dn_route *rt, *next;
- for(i = 0; i < dn_rt_hash_mask; i++) {
+ for (i = 0; i < dn_rt_hash_mask; i++) {
spin_lock_bh(&dn_rt_hash_table[i].lock);
- if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL)
+ if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
goto nothing_to_declare;
- for(; rt; rt=next) {
- next = rt->dst.dn_next;
- rt->dst.dn_next = NULL;
+ for(; rt; rt = next) {
+ next = rcu_dereference_raw(rt->dst.dn_next);
+ RCU_INIT_POINTER(rt->dst.dn_next, NULL);
dst_free((struct dst_entry *)rt);
}
@@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb)
*/
static int dn_route_rx_packet(struct sk_buff *skb)
{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+ struct dn_skb_cb *cb;
int err;
if ((err = dn_route_input(skb)) == 0)
return dst_input(skb);
+ cb = DN_SKB_CB(skb);
if (decnet_debug_level & 4) {
char *devname = skb->dev ? skb->dev->name : "???";
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
printk(KERN_DEBUG
"DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
(int)cb->rt_flags, devname, skb->len,
@@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
struct dn_skb_cb *cb;
unsigned char flags = 0;
__u16 len = le16_to_cpu(*(__le16 *)skb->data);
- struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
+ struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
unsigned char padlen = 0;
if (!net_eq(dev_net(dev), &init_net))
@@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
struct dst_entry *dst = skb_dst(skb);
- struct dn_dev *dn_db = dst->dev->dn_ptr;
+ struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
struct dn_route *rt;
struct neighbour *neigh = dst->neighbour;
int header_len;
@@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2)
static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
{
__le16 saddr = 0;
- struct dn_dev *dn_db = dev->dn_ptr;
+ struct dn_dev *dn_db;
struct dn_ifaddr *ifa;
int best_match = 0;
int ret;
- read_lock(&dev_base_lock);
- for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+ rcu_read_lock();
+ dn_db = rcu_dereference(dev->dn_ptr);
+ for (ifa = rcu_dereference(dn_db->ifa_list);
+ ifa != NULL;
+ ifa = rcu_dereference(ifa->ifa_next)) {
if (ifa->ifa_scope > scope)
continue;
if (!daddr) {
@@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int
if (best_match == 0)
saddr = ifa->ifa_local;
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return saddr;
}
@@ -872,11 +882,9 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re
static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard)
{
- struct flowi fl = { .nl_u = { .dn_u =
- { .daddr = oldflp->fld_dst,
- .saddr = oldflp->fld_src,
- .scope = RT_SCOPE_UNIVERSE,
- } },
+ struct flowi fl = { .fld_dst = oldflp->fld_dst,
+ .fld_src = oldflp->fld_src,
+ .fld_scope = RT_SCOPE_UNIVERSE,
.mark = oldflp->mark,
.iif = init_net.loopback_dev->ifindex,
.oif = oldflp->oif };
@@ -1020,7 +1028,7 @@ source_ok:
err = -ENODEV;
if (dev_out == NULL)
goto out;
- dn_db = dev_out->dn_ptr;
+ dn_db = rcu_dereference_raw(dev_out->dn_ptr);
/* Possible improvement - check all devices for local addr */
if (dn_dev_islocal(dev_out, fl.fld_dst)) {
dev_put(dev_out);
@@ -1171,7 +1179,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
if ((flp->fld_dst == rt->fl.fld_dst) &&
(flp->fld_src == rt->fl.fld_src) &&
(flp->mark == rt->fl.mark) &&
- (rt->fl.iif == 0) &&
+ dn_is_output_route(rt) &&
(rt->fl.oif == flp->oif)) {
dst_use(&rt->dst, jiffies);
rcu_read_unlock_bh();
@@ -1220,11 +1228,9 @@ static int dn_route_input_slow(struct sk_buff *skb)
int flags = 0;
__le16 gateway = 0;
__le16 local_src = 0;
- struct flowi fl = { .nl_u = { .dn_u =
- { .daddr = cb->dst,
- .saddr = cb->src,
- .scope = RT_SCOPE_UNIVERSE,
- } },
+ struct flowi fl = { .fld_dst = cb->dst,
+ .fld_src = cb->src,
+ .fld_scope = RT_SCOPE_UNIVERSE,
.mark = skb->mark,
.iif = skb->dev->ifindex };
struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
@@ -1233,7 +1239,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
dev_hold(in_dev);
- if ((dn_db = in_dev->dn_ptr) == NULL)
+ if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
goto out;
/* Zero source addresses are not allowed */
@@ -1502,7 +1508,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
rt->dst.error) < 0)
goto rtattr_failure;
- if (rt->fl.iif)
+ if (dn_is_input_route(rt))
RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1677,15 +1683,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
{
struct dn_rt_cache_iter_state *s = seq->private;
- rt = rt->dst.dn_next;
- while(!rt) {
+ rt = rcu_dereference_bh(rt->dst.dn_next);
+ while (!rt) {
rcu_read_unlock_bh();
if (--s->bucket < 0)
break;
rcu_read_lock_bh();
- rt = dn_rt_hash_table[s->bucket].chain;
+ rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
}
- return rcu_dereference_bh(rt);
+ return rt;
}
static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 48fdf10be7a..6eb91df3c55 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -175,7 +175,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
unsigned dnet_addr_type(__le16 addr)
{
- struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
+ struct flowi fl = { .fld_dst = addr };
struct dn_fib_res res;
unsigned ret = RTN_UNICAST;
struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e2328..28f8b5e5f73 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
/* Reasonable defaults, I hope, based on tcp's defaults */
-int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
.data = &sysctl_decnet_mem,
.maxlen = sizeof(sysctl_decnet_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "decnet_rmem",
diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
index c0ef4e71dc4..d5c13c2eb36 100644
--- a/net/dns_resolver/Makefile
+++ b/net/dns_resolver/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
-dns_resolver-objs := dns_key.o dns_query.o
+dns_resolver-y := dns_key.o dns_query.o
diff --git a/net/econet/Makefile b/net/econet/Makefile
index 39f0a77abdb..05fae8be2fe 100644
--- a/net/econet/Makefile
+++ b/net/econet/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_ECONET) += econet.o
-econet-objs := af_econet.o
+econet-y := af_econet.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f581f77d109..f2b61107df6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk)
struct flowi fl = {
.oif = sk->sk_bound_dev_if,
.mark = sk->sk_mark,
- .nl_u = {
- .ip4_u = {
- .daddr = daddr,
- .saddr = inet->inet_saddr,
- .tos = RT_CONN_FLAGS(sk),
- },
- },
+ .fl4_dst = daddr,
+ .fl4_src = inet->inet_saddr,
+ .fl4_tos = RT_CONN_FLAGS(sk),
.proto = sk->sk_protocol,
.flags = inet_sk_flowi_flags(sk),
- .uli_u = {
- .ports = {
- .sport = inet->inet_sport,
- .dport = inet->inet_dport,
- },
- },
+ .fl_ip_sport = inet->inet_sport,
+ .fl_ip_dport = inet->inet_dport,
};
security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d8e540c5b07..7833f17b648 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
{
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
- .saddr = tip } } };
+ struct flowi fl = { .fl4_dst = sip,
+ .fl4_src = tip };
struct rtable *rt;
int flag = 0;
/*unsigned long now; */
@@ -1061,8 +1061,8 @@ static int arp_req_set(struct net *net, struct arpreq *r,
if (r->arp_flags & ATF_PERM)
r->arp_flags |= ATF_COM;
if (dev == NULL) {
- struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
- .tos = RTO_ONLINK } };
+ struct flowi fl = { .fl4_dst = ip,
+ .fl4_tos = RTO_ONLINK };
struct rtable *rt;
err = ip_route_output_key(net, &rt, &fl);
if (err != 0)
@@ -1169,8 +1169,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
if (dev == NULL) {
- struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
- .tos = RTO_ONLINK } };
+ struct flowi fl = { .fl4_dst = ip,
+ .fl4_tos = RTO_ONLINK };
struct rtable *rt;
err = ip_route_output_key(net, &rt, &fl);
if (err != 0)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dc94b0316b7..71afc26c2df 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1256,6 +1256,72 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}
+static size_t inet_get_link_af_size(const struct net_device *dev)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return 0;
+
+ return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
+}
+
+static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct nlattr *nla;
+ int i;
+
+ if (!in_dev)
+ return -ENODATA;
+
+ nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ for (i = 0; i < IPV4_DEVCONF_MAX; i++)
+ ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+
+ return 0;
+}
+
+static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
+ [IFLA_INET_CONF] = { .type = NLA_NESTED },
+};
+
+static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct nlattr *a, *tb[IFLA_INET_MAX+1];
+ int err, rem;
+
+ if (!in_dev)
+ return -EOPNOTSUPP;
+
+ err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_INET_CONF]) {
+ nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
+ int cfgid = nla_type(a);
+
+ if (nla_len(a) < 4)
+ return -EINVAL;
+
+ if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
+ return -EINVAL;
+ }
+ }
+
+ if (tb[IFLA_INET_CONF]) {
+ nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
+ ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_SYSCTL
static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1619,6 +1685,13 @@ static __net_initdata struct pernet_operations devinet_ops = {
.exit = devinet_exit_net,
};
+static struct rtnl_af_ops inet_af_ops = {
+ .family = AF_INET,
+ .fill_link_af = inet_fill_link_af,
+ .get_link_af_size = inet_get_link_af_size,
+ .parse_link_af = inet_parse_link_af,
+};
+
void __init devinet_init(void)
{
register_pernet_subsys(&devinet_ops);
@@ -1626,6 +1699,8 @@ void __init devinet_init(void)
register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
+ rtnl_af_register(&inet_af_ops);
+
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb6f69a8f27..d3a1112b9d9 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -158,11 +158,7 @@ static void fib_flush(struct net *net)
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .daddr = addr
- }
- },
+ .fl4_dst = addr,
.flags = FLOWI_FLAG_MATCH_ANY_IIF
};
struct fib_result res = { 0 };
@@ -193,7 +189,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
const struct net_device *dev,
__be32 addr)
{
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+ struct flowi fl = { .fl4_dst = addr };
struct fib_result res;
unsigned ret = RTN_BROADCAST;
struct fib_table *local_table;
@@ -247,13 +243,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
{
struct in_device *in_dev;
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .daddr = src,
- .saddr = dst,
- .tos = tos
- }
- },
+ .fl4_dst = src,
+ .fl4_src = dst,
+ .fl4_tos = tos,
.mark = mark,
.iif = oif
};
@@ -853,13 +845,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
struct fib_result res;
struct flowi fl = {
.mark = frn->fl_mark,
- .nl_u = {
- .ip4_u = {
- .daddr = frn->fl_addr,
- .tos = frn->fl_tos,
- .scope = frn->fl_scope
- }
- }
+ .fl4_dst = frn->fl_addr,
+ .fl4_tos = frn->fl_tos,
+ .fl4_scope = frn->fl_scope,
};
#ifdef CONFIG_IP_MULTIPLE_TABLES
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c..c079cc0ec65 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
{
- if (res->fi != NULL)
- fib_info_put(res->fi);
+ /* we used to play games with refcounts, but we now use RCU */
res->fi = fi;
- if (fi != NULL)
- atomic_inc(&fi->fib_clntref);
}
#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3e0da3ef611..12d3dc3df1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
rcu_read_lock();
{
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .daddr = nh->nh_gw,
- .scope = cfg->fc_scope + 1,
- },
- },
+ .fl4_dst = nh->nh_gw,
+ .fl4_scope = cfg->fc_scope + 1,
.oif = nh->nh_oif,
};
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a..4aa1b7f01ea 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
daddr = icmp_param->replyopts.faddr;
}
{
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = rt->rt_spec_dst,
- .tos = RT_TOS(ip_hdr(skb)->tos) } },
+ struct flowi fl = { .fl4_dst= daddr,
+ .fl4_src = rt->rt_spec_dst,
+ .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
.proto = IPPROTO_ICMP };
security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(net, &rt, &fl))
@@ -506,8 +505,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
struct net_device *dev = NULL;
rcu_read_lock();
- if (rt->fl.iif &&
- net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
+ if (rt_is_input_route(rt) &&
+ net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
dev = dev_get_by_index_rcu(net, rt->fl.iif);
if (dev)
@@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .daddr = icmp_param.replyopts.srr ?
- icmp_param.replyopts.faddr :
- iph->saddr,
- .saddr = saddr,
- .tos = RT_TOS(tos)
- }
- },
+ .fl4_dst = icmp_param.replyopts.srr ?
+ icmp_param.replyopts.faddr : iph->saddr,
+ .fl4_src = saddr,
+ .fl4_tos = RT_TOS(tos),
.proto = IPPROTO_ICMP,
- .uli_u = {
- .icmpt = {
- .type = type,
- .code = code
- }
- }
+ .fl_icmp_type = type,
+ .fl_icmp_code = code,
};
int err;
struct rtable *rt2;
@@ -569,6 +559,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
/* No need to clone since we're just using its address. */
rt2 = rt;
+ if (!fl.nl_u.ip4_u.saddr)
+ fl.nl_u.ip4_u.saddr = rt->rt_src;
+
err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
switch (err) {
case 0:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c721..e0e77e297de 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,21 +149,37 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
int sfcount, __be32 *psfsrc, int delta);
+
+static void ip_mc_list_reclaim(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ip_mc_list, rcu));
+}
+
static void ip_ma_put(struct ip_mc_list *im)
{
if (atomic_dec_and_test(&im->refcnt)) {
in_dev_put(im->interface);
- kfree(im);
+ call_rcu(&im->rcu, ip_mc_list_reclaim);
}
}
+#define for_each_pmc_rcu(in_dev, pmc) \
+ for (pmc = rcu_dereference(in_dev->mc_list); \
+ pmc != NULL; \
+ pmc = rcu_dereference(pmc->next_rcu))
+
+#define for_each_pmc_rtnl(in_dev, pmc) \
+ for (pmc = rtnl_dereference(in_dev->mc_list); \
+ pmc != NULL; \
+ pmc = rtnl_dereference(pmc->next_rcu))
+
#ifdef CONFIG_IP_MULTICAST
/*
* Timer management
*/
-static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
+static void igmp_stop_timer(struct ip_mc_list *im)
{
spin_lock_bh(&im->lock);
if (del_timer(&im->timer))
@@ -284,6 +300,8 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
{
struct sk_buff *skb;
@@ -292,14 +310,20 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct igmpv3_report *pig;
struct net *net = dev_net(dev);
- skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
- if (skb == NULL)
- return NULL;
+ while (1) {
+ skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (skb)
+ break;
+ size >>= 1;
+ if (size < 256)
+ return NULL;
+ }
+ igmp_skb_size(skb) = size;
{
struct flowi fl = { .oif = dev->ifindex,
- .nl_u = { .ip4_u = {
- .daddr = IGMPV3_ALL_MCR } },
+ .fl4_dst = IGMPV3_ALL_MCR,
.proto = IPPROTO_IGMP };
if (ip_route_output_key(net, &rt, &fl)) {
kfree_skb(skb);
@@ -384,7 +408,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
return skb;
}
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
+#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
skb_tailroom(skb)) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
@@ -502,8 +526,8 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
int type;
if (!pmc) {
- read_lock(&in_dev->mc_list_lock);
- for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, pmc) {
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
spin_lock_bh(&pmc->lock);
@@ -514,7 +538,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
skb = add_grec(skb, pmc, type, 0, 0);
spin_unlock_bh(&pmc->lock);
}
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
} else {
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -556,7 +580,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
struct sk_buff *skb = NULL;
int type, dtype;
- read_lock(&in_dev->mc_list_lock);
+ rcu_read_lock();
spin_lock_bh(&in_dev->mc_tomb_lock);
/* deleted MCA's */
@@ -593,7 +617,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
spin_unlock_bh(&in_dev->mc_tomb_lock);
/* change recs */
- for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rcu(in_dev, pmc) {
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE]) {
type = IGMPV3_BLOCK_OLD_SOURCES;
@@ -616,7 +640,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
}
spin_unlock_bh(&pmc->lock);
}
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
if (!skb)
return;
@@ -644,7 +668,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
{
struct flowi fl = { .oif = dev->ifindex,
- .nl_u = { .ip4_u = { .daddr = dst } },
+ .fl4_dst = dst,
.proto = IPPROTO_IGMP };
if (ip_route_output_key(net, &rt, &fl))
return -1;
@@ -813,14 +837,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return;
- read_lock(&in_dev->mc_list_lock);
- for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, im) {
if (im->multiaddr == group) {
igmp_stop_timer(im);
break;
}
}
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
}
static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
@@ -906,8 +930,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* - Use the igmp->igmp_code field as the maximum
* delay possible
*/
- read_lock(&in_dev->mc_list_lock);
- for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, im) {
int changed;
if (group && group != im->multiaddr)
@@ -925,7 +949,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (changed)
igmp_mod_timer(im, max_delay);
}
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
}
/* called in rcu_read_lock() section */
@@ -961,7 +985,7 @@ int igmp_rcv(struct sk_buff *skb)
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
/* Is it our report looped back? */
- if (skb_rtable(skb)->fl.iif == 0)
+ if (rt_is_output_route(skb_rtable(skb)))
break;
/* don't rely on MC router hearing unicast reports */
if (skb->pkt_type == PACKET_MULTICAST ||
@@ -1110,8 +1134,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
kfree(pmc);
}
/* clear dead sources, too */
- read_lock(&in_dev->mc_list_lock);
- for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, pmc) {
struct ip_sf_list *psf, *psf_next;
spin_lock_bh(&pmc->lock);
@@ -1123,7 +1147,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
kfree(psf);
}
}
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
}
#endif
@@ -1209,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
ASSERT_RTNL();
- for (im=in_dev->mc_list; im; im=im->next) {
+ for_each_pmc_rtnl(in_dev, im) {
if (im->multiaddr == addr) {
im->users++;
ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
@@ -1217,7 +1241,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
}
}
- im = kmalloc(sizeof(*im), GFP_KERNEL);
+ im = kzalloc(sizeof(*im), GFP_KERNEL);
if (!im)
goto out;
@@ -1227,26 +1251,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
im->multiaddr = addr;
/* initial mode is (EX, empty) */
im->sfmode = MCAST_EXCLUDE;
- im->sfcount[MCAST_INCLUDE] = 0;
im->sfcount[MCAST_EXCLUDE] = 1;
- im->sources = NULL;
- im->tomb = NULL;
- im->crcount = 0;
atomic_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
- im->tm_running = 0;
setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
im->unsolicit_count = IGMP_Unsolicited_Report_Count;
- im->reporter = 0;
- im->gsquery = 0;
#endif
- im->loaded = 0;
- write_lock_bh(&in_dev->mc_list_lock);
- im->next = in_dev->mc_list;
- in_dev->mc_list = im;
+
+ im->next_rcu = in_dev->mc_list;
in_dev->mc_count++;
- write_unlock_bh(&in_dev->mc_list_lock);
+ rcu_assign_pointer(in_dev->mc_list, im);
+
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im->multiaddr);
#endif
@@ -1260,26 +1276,32 @@ EXPORT_SYMBOL(ip_mc_inc_group);
/*
* Resend IGMP JOIN report; used for bonding.
+ * Called with rcu_read_lock()
*/
-void ip_mc_rejoin_group(struct ip_mc_list *im)
+void ip_mc_rejoin_groups(struct in_device *in_dev)
{
#ifdef CONFIG_IP_MULTICAST
- struct in_device *in_dev = im->interface;
+ struct ip_mc_list *im;
+ int type;
- if (im->multiaddr == IGMP_ALL_HOSTS)
- return;
+ for_each_pmc_rcu(in_dev, im) {
+ if (im->multiaddr == IGMP_ALL_HOSTS)
+ continue;
- /* a failover is happening and switches
- * must be notified immediately */
- if (IGMP_V1_SEEN(in_dev))
- igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
- else if (IGMP_V2_SEEN(in_dev))
- igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
- else
- igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
+ /* a failover is happening and switches
+ * must be notified immediately
+ */
+ if (IGMP_V1_SEEN(in_dev))
+ type = IGMP_HOST_MEMBERSHIP_REPORT;
+ else if (IGMP_V2_SEEN(in_dev))
+ type = IGMPV2_HOST_MEMBERSHIP_REPORT;
+ else
+ type = IGMPV3_HOST_MEMBERSHIP_REPORT;
+ igmp_send_report(in_dev, im, type);
+ }
#endif
}
-EXPORT_SYMBOL(ip_mc_rejoin_group);
+EXPORT_SYMBOL(ip_mc_rejoin_groups);
/*
* A socket has left a multicast group on device dev
@@ -1287,17 +1309,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group);
void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
{
- struct ip_mc_list *i, **ip;
+ struct ip_mc_list *i;
+ struct ip_mc_list __rcu **ip;
ASSERT_RTNL();
- for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
+ for (ip = &in_dev->mc_list;
+ (i = rtnl_dereference(*ip)) != NULL;
+ ip = &i->next_rcu) {
if (i->multiaddr == addr) {
if (--i->users == 0) {
- write_lock_bh(&in_dev->mc_list_lock);
- *ip = i->next;
+ *ip = i->next_rcu;
in_dev->mc_count--;
- write_unlock_bh(&in_dev->mc_list_lock);
igmp_group_dropped(i);
if (!in_dev->dead)
@@ -1316,34 +1339,34 @@ EXPORT_SYMBOL(ip_mc_dec_group);
void ip_mc_unmap(struct in_device *in_dev)
{
- struct ip_mc_list *i;
+ struct ip_mc_list *pmc;
ASSERT_RTNL();
- for (i = in_dev->mc_list; i; i = i->next)
- igmp_group_dropped(i);
+ for_each_pmc_rtnl(in_dev, pmc)
+ igmp_group_dropped(pmc);
}
void ip_mc_remap(struct in_device *in_dev)
{
- struct ip_mc_list *i;
+ struct ip_mc_list *pmc;
ASSERT_RTNL();
- for (i = in_dev->mc_list; i; i = i->next)
- igmp_group_added(i);
+ for_each_pmc_rtnl(in_dev, pmc)
+ igmp_group_added(pmc);
}
/* Device going down */
void ip_mc_down(struct in_device *in_dev)
{
- struct ip_mc_list *i;
+ struct ip_mc_list *pmc;
ASSERT_RTNL();
- for (i=in_dev->mc_list; i; i=i->next)
- igmp_group_dropped(i);
+ for_each_pmc_rtnl(in_dev, pmc)
+ igmp_group_dropped(pmc);
#ifdef CONFIG_IP_MULTICAST
in_dev->mr_ifc_count = 0;
@@ -1374,7 +1397,6 @@ void ip_mc_init_dev(struct in_device *in_dev)
in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
#endif
- rwlock_init(&in_dev->mc_list_lock);
spin_lock_init(&in_dev->mc_tomb_lock);
}
@@ -1382,14 +1404,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
void ip_mc_up(struct in_device *in_dev)
{
- struct ip_mc_list *i;
+ struct ip_mc_list *pmc;
ASSERT_RTNL();
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
- for (i=in_dev->mc_list; i; i=i->next)
- igmp_group_added(i);
+ for_each_pmc_rtnl(in_dev, pmc)
+ igmp_group_added(pmc);
}
/*
@@ -1405,24 +1427,19 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
/* Deactivate timers */
ip_mc_down(in_dev);
- write_lock_bh(&in_dev->mc_list_lock);
- while ((i = in_dev->mc_list) != NULL) {
- in_dev->mc_list = i->next;
+ while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
+ in_dev->mc_list = i->next_rcu;
in_dev->mc_count--;
- write_unlock_bh(&in_dev->mc_list_lock);
+
igmp_group_dropped(i);
ip_ma_put(i);
-
- write_lock_bh(&in_dev->mc_list_lock);
}
- write_unlock_bh(&in_dev->mc_list_lock);
}
/* RTNL is locked */
static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
{
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = imr->imr_multiaddr.s_addr } } };
+ struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr };
struct rtable *rt;
struct net_device *dev = NULL;
struct in_device *idev = NULL;
@@ -1513,18 +1530,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
if (!in_dev)
return -ENODEV;
- read_lock(&in_dev->mc_list_lock);
- for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, pmc) {
if (*pmca == pmc->multiaddr)
break;
}
if (!pmc) {
/* MCA not found?? bug */
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
return -ESRCH;
}
spin_lock_bh(&pmc->lock);
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
#ifdef CONFIG_IP_MULTICAST
sf_markstate(pmc);
#endif
@@ -1685,18 +1702,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
if (!in_dev)
return -ENODEV;
- read_lock(&in_dev->mc_list_lock);
- for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, pmc) {
if (*pmca == pmc->multiaddr)
break;
}
if (!pmc) {
/* MCA not found?? bug */
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
return -ESRCH;
}
spin_lock_bh(&pmc->lock);
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
#ifdef CONFIG_IP_MULTICAST
sf_markstate(pmc);
@@ -1793,7 +1810,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
err = -EADDRINUSE;
ifindex = imr->imr_ifindex;
- for (i = inet->mc_list; i; i = i->next) {
+ for_each_pmc_rtnl(inet, i) {
if (i->multi.imr_multiaddr.s_addr == addr &&
i->multi.imr_ifindex == ifindex)
goto done;
@@ -1807,7 +1824,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
goto done;
memcpy(&iml->multi, imr, sizeof(*imr));
- iml->next = inet->mc_list;
+ iml->next_rcu = inet->mc_list;
iml->sflist = NULL;
iml->sfmode = MCAST_EXCLUDE;
rcu_assign_pointer(inet->mc_list, iml);
@@ -1821,17 +1838,14 @@ EXPORT_SYMBOL(ip_mc_join_group);
static void ip_sf_socklist_reclaim(struct rcu_head *rp)
{
- struct ip_sf_socklist *psf;
-
- psf = container_of(rp, struct ip_sf_socklist, rcu);
+ kfree(container_of(rp, struct ip_sf_socklist, rcu));
/* sk_omem_alloc should have been decreased by the caller*/
- kfree(psf);
}
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
struct in_device *in_dev)
{
- struct ip_sf_socklist *psf = iml->sflist;
+ struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist);
int err;
if (psf == NULL) {
@@ -1851,11 +1865,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
static void ip_mc_socklist_reclaim(struct rcu_head *rp)
{
- struct ip_mc_socklist *iml;
-
- iml = container_of(rp, struct ip_mc_socklist, rcu);
+ kfree(container_of(rp, struct ip_mc_socklist, rcu));
/* sk_omem_alloc should have been decreased by the caller*/
- kfree(iml);
}
@@ -1866,7 +1877,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp)
int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
{
struct inet_sock *inet = inet_sk(sk);
- struct ip_mc_socklist *iml, **imlp;
+ struct ip_mc_socklist *iml;
+ struct ip_mc_socklist __rcu **imlp;
struct in_device *in_dev;
struct net *net = sock_net(sk);
__be32 group = imr->imr_multiaddr.s_addr;
@@ -1876,7 +1888,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_lock();
in_dev = ip_mc_find_dev(net, imr);
ifindex = imr->imr_ifindex;
- for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
+ for (imlp = &inet->mc_list;
+ (iml = rtnl_dereference(*imlp)) != NULL;
+ imlp = &iml->next_rcu) {
if (iml->multi.imr_multiaddr.s_addr != group)
continue;
if (ifindex) {
@@ -1888,7 +1902,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
(void) ip_mc_leave_src(sk, iml, in_dev);
- rcu_assign_pointer(*imlp, iml->next);
+ *imlp = iml->next_rcu;
if (in_dev)
ip_mc_dec_group(in_dev, group);
@@ -1934,7 +1948,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
}
err = -EADDRNOTAVAIL;
- for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rtnl(inet, pmc) {
if ((pmc->multi.imr_multiaddr.s_addr ==
imr.imr_multiaddr.s_addr) &&
(pmc->multi.imr_ifindex == imr.imr_ifindex))
@@ -1958,7 +1972,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
pmc->sfmode = omode;
}
- psl = pmc->sflist;
+ psl = rtnl_dereference(pmc->sflist);
if (!add) {
if (!psl)
goto done; /* err = -EADDRNOTAVAIL */
@@ -2077,7 +2091,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
goto done;
}
- for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rtnl(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
pmc->multi.imr_ifindex == imr.imr_ifindex)
break;
@@ -2107,7 +2121,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
(void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
msf->imsf_fmode, 0, NULL, 0);
}
- psl = pmc->sflist;
+ psl = rtnl_dereference(pmc->sflist);
if (psl) {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
@@ -2155,7 +2169,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
}
err = -EADDRNOTAVAIL;
- for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rtnl(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
pmc->multi.imr_ifindex == imr.imr_ifindex)
break;
@@ -2163,7 +2177,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
if (!pmc) /* must have a prior join */
goto done;
msf->imsf_fmode = pmc->sfmode;
- psl = pmc->sflist;
+ psl = rtnl_dereference(pmc->sflist);
rtnl_unlock();
if (!psl) {
len = 0;
@@ -2208,7 +2222,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
err = -EADDRNOTAVAIL;
- for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rtnl(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == addr &&
pmc->multi.imr_ifindex == gsf->gf_interface)
break;
@@ -2216,7 +2230,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
if (!pmc) /* must have a prior join */
goto done;
gsf->gf_fmode = pmc->sfmode;
- psl = pmc->sflist;
+ psl = rtnl_dereference(pmc->sflist);
rtnl_unlock();
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
@@ -2257,7 +2271,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
goto out;
rcu_read_lock();
- for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
+ for_each_pmc_rcu(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
pmc->multi.imr_ifindex == dif)
break;
@@ -2265,7 +2279,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
ret = inet->mc_all;
if (!pmc)
goto unlock;
- psl = pmc->sflist;
+ psl = rcu_dereference(pmc->sflist);
ret = (pmc->sfmode == MCAST_EXCLUDE);
if (!psl)
goto unlock;
@@ -2300,16 +2314,14 @@ void ip_mc_drop_socket(struct sock *sk)
return;
rtnl_lock();
- while ((iml = inet->mc_list) != NULL) {
+ while ((iml = rtnl_dereference(inet->mc_list)) != NULL) {
struct in_device *in_dev;
- rcu_assign_pointer(inet->mc_list, iml->next);
+ inet->mc_list = iml->next_rcu;
in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
(void) ip_mc_leave_src(sk, iml, in_dev);
- if (in_dev != NULL) {
+ if (in_dev != NULL)
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
- in_dev_put(in_dev);
- }
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
@@ -2323,8 +2335,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
struct ip_sf_list *psf;
int rv = 0;
- read_lock(&in_dev->mc_list_lock);
- for (im=in_dev->mc_list; im; im=im->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(in_dev, im) {
if (im->multiaddr == mc_addr)
break;
}
@@ -2345,7 +2357,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
} else
rv = 1; /* unspecified source; tentatively allow */
}
- read_unlock(&in_dev->mc_list_lock);
+ rcu_read_unlock();
return rv;
}
@@ -2371,13 +2383,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
in_dev = __in_dev_get_rcu(state->dev);
if (!in_dev)
continue;
- read_lock(&in_dev->mc_list_lock);
- im = in_dev->mc_list;
+ im = rcu_dereference(in_dev->mc_list);
if (im) {
state->in_dev = in_dev;
break;
}
- read_unlock(&in_dev->mc_list_lock);
}
return im;
}
@@ -2385,11 +2395,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im)
{
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
- im = im->next;
- while (!im) {
- if (likely(state->in_dev != NULL))
- read_unlock(&state->in_dev->mc_list_lock);
+ im = rcu_dereference(im->next_rcu);
+ while (!im) {
state->dev = next_net_device_rcu(state->dev);
if (!state->dev) {
state->in_dev = NULL;
@@ -2398,8 +2406,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
state->in_dev = __in_dev_get_rcu(state->dev);
if (!state->in_dev)
continue;
- read_lock(&state->in_dev->mc_list_lock);
- im = state->in_dev->mc_list;
+ im = rcu_dereference(state->in_dev->mc_list);
}
return im;
}
@@ -2435,10 +2442,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
__releases(rcu)
{
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
- if (likely(state->in_dev != NULL)) {
- read_unlock(&state->in_dev->mc_list_lock);
- state->in_dev = NULL;
- }
+
+ state->in_dev = NULL;
state->dev = NULL;
rcu_read_unlock();
}
@@ -2460,7 +2465,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
querier = "NONE";
#endif
- if (state->in_dev->mc_list == im) {
+ if (rcu_dereference(state->in_dev->mc_list) == im) {
seq_printf(seq, "%d\t%-10s: %5d %7s\n",
state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
}
@@ -2519,8 +2524,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
idev = __in_dev_get_rcu(state->dev);
if (unlikely(idev == NULL))
continue;
- read_lock(&idev->mc_list_lock);
- im = idev->mc_list;
+ im = rcu_dereference(idev->mc_list);
if (likely(im != NULL)) {
spin_lock_bh(&im->lock);
psf = im->sources;
@@ -2531,7 +2535,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
}
spin_unlock_bh(&im->lock);
}
- read_unlock(&idev->mc_list_lock);
}
return psf;
}
@@ -2545,9 +2548,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
spin_unlock_bh(&state->im->lock);
state->im = state->im->next;
while (!state->im) {
- if (likely(state->idev != NULL))
- read_unlock(&state->idev->mc_list_lock);
-
state->dev = next_net_device_rcu(state->dev);
if (!state->dev) {
state->idev = NULL;
@@ -2556,8 +2556,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
state->idev = __in_dev_get_rcu(state->dev);
if (!state->idev)
continue;
- read_lock(&state->idev->mc_list_lock);
- state->im = state->idev->mc_list;
+ state->im = rcu_dereference(state->idev->mc_list);
}
if (!state->im)
break;
@@ -2603,10 +2602,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
spin_unlock_bh(&state->im->lock);
state->im = NULL;
}
- if (likely(state->idev != NULL)) {
- read_unlock(&state->idev->mc_list_lock);
- state->idev = NULL;
- }
+ state->idev = NULL;
state->dev = NULL;
rcu_read_unlock();
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7174370b119..06f5f8f482f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -358,17 +358,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct ip_options *opt = inet_rsk(req)->opt;
struct flowi fl = { .oif = sk->sk_bound_dev_if,
.mark = sk->sk_mark,
- .nl_u = { .ip4_u =
- { .daddr = ((opt && opt->srr) ?
- opt->faddr :
- ireq->rmt_addr),
- .saddr = ireq->loc_addr,
- .tos = RT_CONN_FLAGS(sk) } },
+ .fl4_dst = ((opt && opt->srr) ?
+ opt->faddr : ireq->rmt_addr),
+ .fl4_src = ireq->loc_addr,
+ .fl4_tos = RT_CONN_FLAGS(sk),
.proto = sk->sk_protocol,
.flags = inet_sk_flowi_flags(sk),
- .uli_u = { .ports =
- { .sport = inet_sk(sk)->inet_sport,
- .dport = ireq->rmt_port } } };
+ .fl_ip_sport = inet_sk(sk)->inet_sport,
+ .fl_ip_dport = ireq->rmt_port };
struct net *net = sock_net(sk);
security_req_classify_flow(req, &fl);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba804266584..2ada17129fc 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
struct inet_sock *inet = inet_sk(sk);
entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
entry.dport = ntohs(inet->inet_dport);
entry.userlocks = sk->sk_userlocks;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- struct rtattr *bc = NULL;
+ const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
- bc = (struct rtattr *)(r + 1);
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(RTA_DATA(bc),
- RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc),
+ nla_len(bc), &entry))
continue;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 70ff77f02ee..897210adaa7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -634,7 +634,7 @@ static int ipgre_rcv(struct sk_buff *skb)
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
/* Looped back packet, drop it! */
- if (skb_rtable(skb)->fl.iif == 0)
+ if (rt_is_output_route(skb_rtable(skb)))
goto drop;
tunnel->dev->stats.multicast++;
skb->pkt_type = PACKET_BROADCAST;
@@ -772,16 +772,11 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
{
struct flowi fl = {
.oif = tunnel->parms.link,
- .nl_u = {
- .ip4_u = {
- .daddr = dst,
- .saddr = tiph->saddr,
- .tos = RT_TOS(tos)
- }
- },
- .proto = IPPROTO_GRE
- }
-;
+ .fl4_dst = dst,
+ .fl4_src = tiph->saddr,
+ .fl4_tos = RT_TOS(tos),
+ .fl_gre_key = tunnel->parms.o_key
+ };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
dev->stats.tx_carrier_errors++;
goto tx_error;
@@ -951,14 +946,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
if (iph->daddr) {
struct flowi fl = {
.oif = tunnel->parms.link,
- .nl_u = {
- .ip4_u = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos)
- }
- },
- .proto = IPPROTO_GRE
+ .fl4_dst = iph->daddr,
+ .fl4_src = iph->saddr,
+ .fl4_tos = RT_TOS(iph->tos),
+ .proto = IPPROTO_GRE,
+ .fl_gre_key = tunnel->parms.o_key
};
struct rtable *rt;
@@ -1216,14 +1208,11 @@ static int ipgre_open(struct net_device *dev)
if (ipv4_is_multicast(t->parms.iph.daddr)) {
struct flowi fl = {
.oif = t->parms.link,
- .nl_u = {
- .ip4_u = {
- .daddr = t->parms.iph.daddr,
- .saddr = t->parms.iph.saddr,
- .tos = RT_TOS(t->parms.iph.tos)
- }
- },
- .proto = IPPROTO_GRE
+ .fl4_dst = t->parms.iph.daddr,
+ .fl4_src = t->parms.iph.saddr,
+ .fl4_tos = RT_TOS(t->parms.iph.tos),
+ .proto = IPPROTO_GRE,
+ .fl_gre_key = t->parms.o_key
};
struct rtable *rt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 439d2a34ee4..5090c7ff525 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -341,15 +341,13 @@ int ip_queue_xmit(struct sk_buff *skb)
{
struct flowi fl = { .oif = sk->sk_bound_dev_if,
.mark = sk->sk_mark,
- .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = inet->inet_saddr,
- .tos = RT_CONN_FLAGS(sk) } },
+ .fl4_dst = daddr,
+ .fl4_src = inet->inet_saddr,
+ .fl4_tos = RT_CONN_FLAGS(sk),
.proto = sk->sk_protocol,
.flags = inet_sk_flowi_flags(sk),
- .uli_u = { .ports =
- { .sport = inet->inet_sport,
- .dport = inet->inet_dport } } };
+ .fl_ip_sport = inet->inet_sport,
+ .fl_ip_dport = inet->inet_dport };
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
@@ -1404,14 +1402,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
{
struct flowi fl = { .oif = arg->bound_dev_if,
- .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = rt->rt_spec_dst,
- .tos = RT_TOS(ip_hdr(skb)->tos) } },
- /* Not quite clean, but right. */
- .uli_u = { .ports =
- { .sport = tcp_hdr(skb)->dest,
- .dport = tcp_hdr(skb)->source } },
+ .fl4_dst = daddr,
+ .fl4_src = rt->rt_spec_dst,
+ .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .fl_ip_sport = tcp_hdr(skb)->dest,
+ .fl_ip_dport = tcp_hdr(skb)->source,
.proto = sk->sk_protocol,
.flags = ip_reply_arg_flowi_flags(arg) };
security_skb_classify_flow(skb, &fl);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index cd300aaee78..e70ad581398 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct flowi fl = {
.oif = tunnel->parms.link,
- .nl_u = {
- .ip4_u = {
- .daddr = dst,
- .saddr = tiph->saddr,
- .tos = RT_TOS(tos)
- }
- },
+ .fl4_dst = dst,
+ .fl4_src= tiph->saddr,
+ .fl4_tos = RT_TOS(tos),
.proto = IPPROTO_IPIP
};
@@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
if (iph->daddr) {
struct flowi fl = {
.oif = tunnel->parms.link,
- .nl_u = {
- .ip4_u = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos)
- }
- },
+ .fl4_dst = iph->daddr,
+ .fl4_src = iph->saddr,
+ .fl4_tos = RT_TOS(iph->tos),
.proto = IPPROTO_IPIP
};
struct rtable *rt;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 86dd5691af4..3f3a9afd73e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
if (vif->flags & VIFF_TUNNEL) {
struct flowi fl = {
.oif = vif->link,
- .nl_u = {
- .ip4_u = {
- .daddr = vif->remote,
- .saddr = vif->local,
- .tos = RT_TOS(iph->tos)
- }
- },
+ .fl4_dst = vif->remote,
+ .fl4_src = vif->local,
+ .fl4_tos = RT_TOS(iph->tos),
.proto = IPPROTO_IPIP
};
@@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
} else {
struct flowi fl = {
.oif = vif->link,
- .nl_u = {
- .ip4_u = {
- .daddr = iph->daddr,
- .tos = RT_TOS(iph->tos)
- }
- },
+ .fl4_dst = iph->daddr,
+ .fl4_tos = RT_TOS(iph->tos),
.proto = IPPROTO_IPIP
};
@@ -1654,7 +1646,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
if (mrt->vif_table[vif].dev != skb->dev) {
int true_vifi;
- if (skb_rtable(skb)->fl.iif == 0) {
+ if (rt_is_output_route(skb_rtable(skb))) {
/* It is our own packet, looped back.
* Very complicated situation...
*
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index d88a46c54fd..994a1f29ebb 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
* packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
*/
if (addr_type == RTN_LOCAL) {
- fl.nl_u.ip4_u.daddr = iph->daddr;
+ fl.fl4_dst = iph->daddr;
if (type == RTN_LOCAL)
- fl.nl_u.ip4_u.saddr = iph->saddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+ fl.fl4_src = iph->saddr;
+ fl.fl4_tos = RT_TOS(iph->tos);
fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
fl.mark = skb->mark;
fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
@@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
- fl.nl_u.ip4_u.daddr = iph->saddr;
+ fl.fl4_dst = iph->saddr;
if (ip_route_output_key(net, &rt, &fl) != 0)
return -1;
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 48111594ee9..19eb59d0103 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,15 +3,15 @@
#
# objects for l3 independent conntrack
-nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
ifeq ($(CONFIG_PROC_FS),y)
nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
endif
endif
-nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
+nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
+iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace..3fac340a28d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d8..a846d633b3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e4..c04787ce1a7 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
return rcu_dereference(nf_nat_protos[protonum]);
}
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
- const struct nf_nat_protocol *p;
-
- rcu_read_lock();
- p = __nf_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &nf_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
- module_put(p->me);
-}
-
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
+static const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+ const struct nf_nat_protocol *p;
+
+ rcu_read_lock();
+ p = __nf_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &nf_nat_unknown_protocol;
+ rcu_read_unlock();
+
+ return p;
+}
+
+static void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+ module_put(p->me);
+}
+
static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7c..1b48eb1ed45 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
local_bh_enable();
socket_seq_show(seq);
- seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+ seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d mem %d\n",
+ atomic_long_read(&tcp_memory_allocated));
+ seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_read(&udp_memory_allocated));
+ atomic_long_read(&udp_memory_allocated));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1f85ef28989..a3d5ab786e8 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
{
struct flowi fl = { .oif = ipc.oif,
.mark = sk->sk_mark,
- .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = saddr,
- .tos = tos } },
+ .fl4_dst = daddr,
+ .fl4_src = saddr,
+ .fl4_tos = tos,
.proto = inet->hdrincl ? IPPROTO_RAW :
sk->sk_protocol,
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 987bf9adb31..ec2333fb637 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -140,13 +140,15 @@ static unsigned long expires_ljiffies;
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static void ipv4_dst_destroy(struct dst_entry *dst);
-static void ipv4_dst_ifdown(struct dst_entry *dst,
- struct net_device *dev, int how);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
static int rt_garbage_collect(struct dst_ops *ops);
+static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+ int how)
+{
+}
static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
@@ -621,7 +623,7 @@ static inline int rt_fast_clean(struct rtable *rth)
/* Kill broadcast/multicast entries very aggresively, if they
collide in hash table with more useful entries */
return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
- rth->fl.iif && rth->dst.rt_next;
+ rt_is_input_route(rth) && rth->dst.rt_next;
}
static inline int rt_valuable(struct rtable *rth)
@@ -666,7 +668,7 @@ static inline u32 rt_score(struct rtable *rt)
if (rt_valuable(rt))
score |= (1<<31);
- if (!rt->fl.iif ||
+ if (rt_is_output_route(rt) ||
!(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
score |= (1<<30);
@@ -682,17 +684,17 @@ static inline bool rt_caching(const struct net *net)
static inline bool compare_hash_inputs(const struct flowi *fl1,
const struct flowi *fl2)
{
- return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
- ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
+ return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
+ ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
(fl1->iif ^ fl2->iif)) == 0);
}
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
{
- return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
- ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
+ return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
+ ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
(fl1->mark ^ fl2->mark) |
- (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
+ (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
(fl1->oif ^ fl2->oif) |
(fl1->iif ^ fl2->iif)) == 0;
}
@@ -1124,7 +1126,7 @@ restart:
*/
rt->dst.flags |= DST_NOCACHE;
- if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+ if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
int err = arp_bind_neighbour(&rt->dst);
if (err) {
if (net_ratelimit())
@@ -1222,7 +1224,7 @@ restart:
/* Try to bind route to arp only if it is output
route or unicast forwarding path.
*/
- if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+ if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
int err = arp_bind_neighbour(&rt->dst);
if (err) {
spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1404,7 +1406,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
if (rth->fl.fl4_dst != daddr ||
rth->fl.fl4_src != skeys[i] ||
rth->fl.oif != ikeys[k] ||
- rth->fl.iif != 0 ||
+ rt_is_input_route(rth) ||
rt_is_expired(rth) ||
!net_eq(dev_net(rth->dst.dev), net)) {
rthp = &rth->dst.rt_next;
@@ -1433,8 +1435,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rt->dst.child = NULL;
if (rt->dst.dev)
dev_hold(rt->dst.dev);
- if (rt->idev)
- in_dev_hold(rt->idev);
rt->dst.obsolete = -1;
rt->dst.lastuse = jiffies;
rt->dst.path = &rt->dst;
@@ -1666,7 +1666,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
rth->rt_dst != daddr ||
rth->rt_src != iph->saddr ||
rth->fl.oif != ikeys[k] ||
- rth->fl.iif != 0 ||
+ rt_is_input_route(rth) ||
dst_metric_locked(&rth->dst, RTAX_MTU) ||
!net_eq(dev_net(rth->dst.dev), net) ||
rt_is_expired(rth))
@@ -1728,33 +1728,13 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *) dst;
struct inet_peer *peer = rt->peer;
- struct in_device *idev = rt->idev;
if (peer) {
rt->peer = NULL;
inet_putpeer(peer);
}
-
- if (idev) {
- rt->idev = NULL;
- in_dev_put(idev);
- }
}
-static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int how)
-{
- struct rtable *rt = (struct rtable *) dst;
- struct in_device *idev = rt->idev;
- if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
- struct in_device *loopback_idev =
- in_dev_get(dev_net(dev)->loopback_dev);
- if (loopback_idev) {
- rt->idev = loopback_idev;
- in_dev_put(idev);
- }
- }
-}
static void ipv4_link_failure(struct sk_buff *skb)
{
@@ -1790,7 +1770,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
__be32 src;
struct fib_result res;
- if (rt->fl.iif == 0)
+ if (rt_is_output_route(rt))
src = rt->rt_src;
else {
rcu_read_lock();
@@ -1910,7 +1890,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.iif = dev->ifindex;
rth->dst.dev = init_net.loopback_dev;
dev_hold(rth->dst.dev);
- rth->idev = in_dev_get(rth->dst.dev);
rth->fl.oif = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
@@ -2050,7 +2029,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->fl.iif = in_dev->dev->ifindex;
rth->dst.dev = (out_dev)->dev;
dev_hold(rth->dst.dev);
- rth->idev = in_dev_get(rth->dst.dev);
rth->fl.oif = 0;
rth->rt_spec_dst= spec_dst;
@@ -2111,12 +2089,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = saddr,
- .tos = tos,
- .scope = RT_SCOPE_UNIVERSE,
- } },
+ struct flowi fl = { .fl4_dst = daddr,
+ .fl4_src = saddr,
+ .fl4_tos = tos,
+ .fl4_scope = RT_SCOPE_UNIVERSE,
.mark = skb->mark,
.iif = dev->ifindex };
unsigned flags = 0;
@@ -2231,7 +2207,6 @@ local_input:
rth->fl.iif = dev->ifindex;
rth->dst.dev = net->loopback_dev;
dev_hold(rth->dst.dev);
- rth->idev = in_dev_get(rth->dst.dev);
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
rth->dst.input= ip_local_deliver;
@@ -2417,9 +2392,6 @@ static int __mkroute_output(struct rtable **result,
if (!rth)
return -ENOBUFS;
- in_dev_hold(in_dev);
- rth->idev = in_dev;
-
atomic_set(&rth->dst.__refcnt, 1);
rth->dst.flags= DST_HOST;
if (IN_DEV_CONF_GET(in_dev, NOXFRM))
@@ -2506,14 +2478,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
const struct flowi *oldflp)
{
u32 tos = RT_FL_TOS(oldflp);
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = oldflp->fl4_dst,
- .saddr = oldflp->fl4_src,
- .tos = tos & IPTOS_RT_MASK,
- .scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK :
- RT_SCOPE_UNIVERSE),
- } },
+ struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
+ .fl4_src = oldflp->fl4_src,
+ .fl4_tos = tos & IPTOS_RT_MASK,
+ .fl4_scope = ((tos & RTO_ONLINK) ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
.mark = oldflp->mark,
.iif = net->loopback_dev->ifindex,
.oif = oldflp->oif };
@@ -2695,7 +2664,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
rth = rcu_dereference_bh(rth->dst.rt_next)) {
if (rth->fl.fl4_dst == flp->fl4_dst &&
rth->fl.fl4_src == flp->fl4_src &&
- rth->fl.iif == 0 &&
+ rt_is_output_route(rth) &&
rth->fl.oif == flp->oif &&
rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
@@ -2759,9 +2728,6 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
rt->fl = ort->fl;
- rt->idev = ort->idev;
- if (rt->idev)
- in_dev_hold(rt->idev);
rt->rt_genid = rt_genid(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
@@ -2853,7 +2819,7 @@ static int rt_fill_info(struct net *net,
if (rt->dst.tclassid)
NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
#endif
- if (rt->fl.iif)
+ if (rt_is_input_route(rt))
NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
else if (rt->rt_src != rt->fl.fl4_src)
NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
@@ -2878,7 +2844,7 @@ static int rt_fill_info(struct net *net,
}
}
- if (rt->fl.iif) {
+ if (rt_is_input_route(rt)) {
#ifdef CONFIG_IP_MROUTE
__be32 dst = rt->rt_dst;
@@ -2973,13 +2939,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
err = -rt->dst.error;
} else {
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .daddr = dst,
- .saddr = src,
- .tos = rtm->rtm_tos,
- },
- },
+ .fl4_dst = dst,
+ .fl4_src = src,
+ .fl4_tos = rtm->rtm_tos,
.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
.mark = mark,
};
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650cace2180..47519205a01 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
*/
{
struct flowi fl = { .mark = sk->sk_mark,
- .nl_u = { .ip4_u =
- { .daddr = ((opt && opt->srr) ?
- opt->faddr :
- ireq->rmt_addr),
- .saddr = ireq->loc_addr,
- .tos = RT_CONN_FLAGS(sk) } },
+ .fl4_dst = ((opt && opt->srr) ?
+ opt->faddr : ireq->rmt_addr),
+ .fl4_src = ireq->loc_addr,
+ .fl4_tos = RT_CONN_FLAGS(sk),
.proto = IPPROTO_TCP,
.flags = inet_sk_flowi_flags(sk),
- .uli_u = { .ports =
- { .sport = th->dest,
- .dport = th->source } } };
+ .fl_ip_sport = th->dest,
+ .fl_ip_dport = th->source };
security_req_classify_flow(req, &fl);
if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17..e91911d7aae 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "tcp_wmem",
@@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb..2bb46d55f40 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
-atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
/*
@@ -1193,7 +1193,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
- KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
+ "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
#endif
@@ -1477,10 +1477,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
* shouldn't happen.
*/
if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
- KERN_INFO "recvmsg bug: copied %X "
- "seq %X rcvnxt %X fl %X\n", *seq,
- TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
- flags))
+ "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+ *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
+ flags))
break;
offset = *seq - TCP_SKB_CB(skb)->seq;
@@ -1490,10 +1489,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto found_ok_skb;
if (tcp_hdr(skb)->fin)
goto found_fin_ok;
- WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: "
- "copied %X seq %X rcvnxt %X fl %X\n",
- *seq, TCP_SKB_CB(skb)->seq,
- tp->rcv_nxt, flags);
+ WARN(!(flags & MSG_PEEK),
+ "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+ *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
}
/* Well, if we have backlog, try to process it now yet. */
@@ -2246,7 +2244,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < 8 || val > MAX_TCP_WINDOW) {
+ if (val < 64 || val > MAX_TCP_WINDOW) {
err = -EINVAL;
break;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353..6d8ab1c4efc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
sizeof(struct sk_buff);
- if (sk->sk_sndbuf < 3 * sndmem)
- sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+ if (sk->sk_sndbuf < 3 * sndmem) {
+ sk->sk_sndbuf = 3 * sndmem;
+ if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+ sk->sk_sndbuf = sysctl_tcp_wmem[2];
+ }
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d4168..69ccbc1dde9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
!icsk->icsk_backoff)
break;
+ if (sock_owned_by_user(sk))
+ break;
+
icsk->icsk_backoff--;
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (remaining) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
- } else if (sock_owned_by_user(sk)) {
- /* RTO revert clocked out retransmission,
- * but socket is locked. Will defer. */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- HZ/20, TCP_RTO_MAX);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf3676..bb8f547fc7d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2592,6 +2592,7 @@ int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
+ int err;
tcp_connect_init(sk);
@@ -2614,7 +2615,9 @@ int tcp_connect(struct sock *sk)
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
tp->packets_out += tcp_skb_pcount(buff);
- tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
+ err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
+ if (err == -ECONNREFUSED)
+ return err;
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 6211e211417..85ee7eb7e38 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static int tcpprobe_sprint(char *tbuf, int n)
struct timespec tv
= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
- return snprintf(tbuf, n,
+ return scnprintf(tbuf, n,
"%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
(unsigned long) tv.tv_sec,
(unsigned long) tv.tv_nsec,
@@ -174,7 +174,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
return -EINVAL;
while (cnt < len) {
- char tbuf[128];
+ char tbuf[164];
int width;
/* Wait for data in buffer */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 28cb2d733a3..b37181da487 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
-int sysctl_udp_mem[3] __read_mostly;
+long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
int sysctl_udp_wmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_wmem_min);
-atomic_t udp_memory_allocated;
+atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
@@ -430,7 +430,7 @@ begin:
if (result) {
exact_match:
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
daddr, hnum, dif) < badness)) {
@@ -500,7 +500,7 @@ begin:
goto begin;
if (result) {
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score(result, net, saddr, hnum, sport,
daddr, dport, dif) < badness)) {
@@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (rt == NULL) {
struct flowi fl = { .oif = ipc.oif,
.mark = sk->sk_mark,
- .nl_u = { .ip4_u =
- { .daddr = faddr,
- .saddr = saddr,
- .tos = tos } },
+ .fl4_dst = faddr,
+ .fl4_src = saddr,
+ .fl4_tos = tos,
.proto = sk->sk_protocol,
.flags = inet_sk_flowi_flags(sk),
- .uli_u = { .ports =
- { .sport = inet->inet_sport,
- .dport = dport } } };
+ .fl_ip_sport = inet->inet_sport,
+ .fl_ip_dport = dport };
struct net *net = sock_net(sk);
security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 4464f3bff6a..b057d40adde 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/inetdevice.h>
+#include <linux/if_tunnel.h>
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
@@ -22,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
xfrm_address_t *daddr)
{
struct flowi fl = {
- .nl_u = {
- .ip4_u = {
- .tos = tos,
- .daddr = daddr->a4,
- },
- },
+ .fl4_dst = daddr->a4,
+ .fl4_tos = tos,
};
struct dst_entry *dst;
struct rtable *rt;
@@ -80,10 +77,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.dst.dev = dev;
dev_hold(dev);
- xdst->u.rt.idev = in_dev_get(dev);
- if (!xdst->u.rt.idev)
- return -ENODEV;
-
xdst->u.rt.peer = rt->peer;
if (rt->peer)
atomic_inc(&rt->peer->refcnt);
@@ -158,6 +151,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
}
break;
+
+ case IPPROTO_GRE:
+ if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
+ __be16 *greflags = (__be16 *)xprth;
+ __be32 *gre_hdr = (__be32 *)xprth;
+
+ if (greflags[0] & GRE_KEY) {
+ if (greflags[0] & GRE_CSUM)
+ gre_hdr++;
+ fl->fl_gre_key = gre_hdr[1];
+ }
+ }
+ break;
+
default:
fl->fl_ipsec_spi = 0;
break;
@@ -189,8 +196,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt.idev))
- in_dev_put(xdst->u.rt.idev);
if (likely(xdst->u.rt.peer))
inet_putpeer(xdst->u.rt.peer);
xfrm_dst_destroy(xdst);
@@ -199,27 +204,9 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int unregister)
{
- struct xfrm_dst *xdst;
-
if (!unregister)
return;
- xdst = (struct xfrm_dst *)dst;
- if (xdst->u.rt.idev->dev == dev) {
- struct in_device *loopback_idev =
- in_dev_get(dev_net(dev)->loopback_dev);
- BUG_ON(!loopback_idev);
-
- do {
- in_dev_put(xdst->u.rt.idev);
- xdst->u.rt.idev = loopback_idev;
- in_dev_hold(loopback_idev);
- xdst = (struct xfrm_dst *)xdst->u.dst.child;
- } while (xdst->u.dst.xfrm);
-
- __in_dev_put(loopback_idev);
- }
-
xfrm_dst_ifdown(dst, dev);
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e048ec62d10..4cf760598c2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -98,7 +98,11 @@
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
-#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
+
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+ return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
#define ADDRCONF_TIMER_FUZZ (HZ / 4)
@@ -2740,10 +2744,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* Flag it for later restoration when link comes up */
ifa->flags |= IFA_F_TENTATIVE;
ifa->state = INET6_IFADDR_STATE_DAD;
-
- write_unlock_bh(&idev->lock);
-
- in6_ifa_hold(ifa);
} else {
list_del(&ifa->if_list);
@@ -2758,19 +2758,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
ifa->state = INET6_IFADDR_STATE_DEAD;
spin_unlock_bh(&ifa->state_lock);
- if (state == INET6_IFADDR_STATE_DEAD)
- goto put_ifa;
- }
-
- __ipv6_ifa_notify(RTM_DELADDR, ifa);
- if (ifa->state == INET6_IFADDR_STATE_DEAD)
- atomic_notifier_call_chain(&inet6addr_chain,
- NETDEV_DOWN, ifa);
-
-put_ifa:
- in6_ifa_put(ifa);
+ if (state != INET6_IFADDR_STATE_DEAD) {
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ atomic_notifier_call_chain(&inet6addr_chain,
+ NETDEV_DOWN, ifa);
+ }
- write_lock_bh(&idev->lock);
+ in6_ifa_put(ifa);
+ write_lock_bh(&idev->lock);
+ }
}
list_splice(&keep_list, &idev->addr_list);
@@ -3452,10 +3448,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
{
struct ifa_cacheinfo ci;
- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ ci.cstamp = cstamp_delta(cstamp);
+ ci.tstamp = cstamp_delta(tstamp);
ci.ifa_prefered = preferred;
ci.ifa_valid = valid;
@@ -3806,8 +3800,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_AUTOCONF] = cnf->autoconf;
array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+ array[DEVCONF_RTR_SOLICIT_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_solicit_interval);
+ array[DEVCONF_RTR_SOLICIT_DELAY] =
+ jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
#ifdef CONFIG_IPV6_PRIVACY
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
@@ -3821,7 +3817,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
#ifdef CONFIG_IPV6_ROUTER_PREF
array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
+ array[DEVCONF_RTR_PROBE_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_probe_interval);
#ifdef CONFIG_IPV6_ROUTE_INFO
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
#endif
@@ -3839,6 +3836,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
}
+static inline size_t inet6_ifla6_size(void)
+{
+ return nla_total_size(4) /* IFLA_INET6_FLAGS */
+ + nla_total_size(sizeof(struct ifla_cacheinfo))
+ + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+}
+
static inline size_t inet6_if_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -3846,13 +3852,7 @@ static inline size_t inet6_if_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
- + nla_total_size( /* IFLA_PROTINFO */
- nla_total_size(4) /* IFLA_INET6_FLAGS */
- + nla_total_size(sizeof(struct ifla_cacheinfo))
- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
- );
+ + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
}
static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
@@ -3899,15 +3899,75 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+ struct ifla_cacheinfo ci;
+
+ NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
+
+ ci.max_reasm_len = IPV6_MAXPLEN;
+ ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+ ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
+ NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+
+ nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+ if (nla == NULL)
+ goto nla_put_failure;
+ ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+ /* XXX - MC not implemented */
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static size_t inet6_get_link_af_size(const struct net_device *dev)
+{
+ if (!__in6_dev_get(dev))
+ return 0;
+
+ return inet6_ifla6_size();
+}
+
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev)
+ return -ENODATA;
+
+ if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+ return -EOPNOTSUPP;
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 pid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
- struct nlattr *nla;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
- struct ifla_cacheinfo ci;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
if (nlh == NULL)
@@ -3934,31 +3994,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
if (protoinfo == NULL)
goto nla_put_failure;
- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
-
- ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
- ci.reachable_time = idev->nd_parms->reachable_time;
- ci.retrans_time = idev->nd_parms->retrans_time;
- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
-
- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
- if (nla == NULL)
- goto nla_put_failure;
- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
-
- /* XXX - MC not implemented */
-
- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
- if (nla == NULL)
+ if (inet6_fill_ifla6_attrs(skb, idev) < 0)
goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
- if (nla == NULL)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
nla_nest_end(skb, protoinfo);
return nlmsg_end(skb, nlh);
@@ -4629,6 +4666,13 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_inet6addr_notifier);
+static struct rtnl_af_ops inet6_ops = {
+ .family = AF_INET6,
+ .fill_link_af = inet6_fill_link_af,
+ .get_link_af_size = inet6_get_link_af_size,
+ .parse_link_af = inet6_parse_link_af,
+};
+
/*
* Init / cleanup code
*/
@@ -4680,6 +4724,10 @@ int __init addrconf_init(void)
addrconf_verify(0);
+ err = rtnl_af_register(&inet6_ops);
+ if (err < 0)
+ goto errout_af;
+
err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
if (err < 0)
goto errout;
@@ -4695,6 +4743,8 @@ int __init addrconf_init(void)
return 0;
errout:
+ rtnl_af_unregister(&inet6_ops);
+errout_af:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
unregister_pernet_subsys(&addrconf_ops);
@@ -4715,6 +4765,8 @@ void addrconf_cleanup(void)
rtnl_lock();
+ __rtnl_af_unregister(&inet6_ops);
+
/* clean dev list */
for_each_netdev(&init_net, dev) {
if (__in6_dev_get(dev) == NULL)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6f32ffce702..9fab274019c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
fl = (struct flowi) {
.oif = vif->link,
- .nl_u = { .ip6_u =
- { .daddr = ipv6h->daddr, }
- }
+ .fl6_dst = ipv6h->daddr,
};
dst = ip6_route_output(net, NULL, &fl);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d1444b95ad7..9c5074528a7 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -257,7 +257,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
return NULL;
idev = __in6_dev_get(dev);
if (!idev)
- return NULL;;
+ return NULL;
read_lock_bh(&idev->lock);
if (idev->dead) {
read_unlock_bh(&idev->lock);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 7155b2451d7..35915e8617f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb)
struct flowi fl = {
.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
.mark = skb->mark,
- .nl_u =
- { .ip6_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr, } },
+ .fl6_dst = iph->daddr,
+ .fl6_src = iph->saddr,
};
dst = ip6_route_output(net, skb->sk, &fl);
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 0a432c9b079..abfee91ce81 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
# objects for l3 independent conntrack
-nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
# defrag
-nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
# matches
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 51df035897e..455582384ec 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3a3f129a44c..79d43aa8fa8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -286,7 +286,7 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+ (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c7ba3149633..0f276645375 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+ (FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25661f968f3..c346ccf66ae 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -558,11 +558,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
{
struct flowi fl = {
.oif = oif,
- .nl_u = {
- .ip6_u = {
- .daddr = *daddr,
- },
- },
+ .fl6_dst = *daddr,
};
struct dst_entry *dst;
int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
@@ -778,13 +774,9 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
- .nl_u = {
- .ip6_u = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
- },
- },
+ .fl6_dst = iph->daddr,
+ .fl6_src = iph->saddr,
+ .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
.mark = skb->mark,
.proto = iph->nexthdr,
};
@@ -1463,12 +1455,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
struct ip6rd_flowi rdfl = {
.fl = {
.oif = dev->ifindex,
- .nl_u = {
- .ip6_u = {
- .daddr = *dest,
- .saddr = *src,
- },
- },
+ .fl6_dst = *dest,
+ .fl6_src = *src,
},
};
@@ -1945,8 +1933,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
struct neighbour *neigh;
- if (rt == NULL)
+ if (rt == NULL) {
+ if (net_ratelimit())
+ pr_warning("IPv6: Maximum number of routes reached,"
+ " consider increasing route/max_size.\n");
return ERR_PTR(-ENOMEM);
+ }
dev_hold(net->loopback_dev);
in6_dev_hold(idev);
@@ -2741,6 +2733,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
kfree(net->ipv6.ip6_prohibit_entry);
kfree(net->ipv6.ip6_blk_hole_entry);
#endif
+ dst_entries_destroy(&net->ipv6.ip6_dst_ops);
}
static struct pernet_operations ip6_route_net_ops = {
@@ -2832,5 +2825,6 @@ void ip6_route_cleanup(void)
xfrm6_fini();
fib6_gc_cleanup();
unregister_pernet_subsys(&ip6_route_net_ops);
+ dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d6bfaec3bbb..6e48a80d0f2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -730,10 +730,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
{
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = dst,
- .saddr = tiph->saddr,
- .tos = RT_TOS(tos) } },
+ struct flowi fl = { .fl4_dst = dst,
+ .fl4_src = tiph->saddr,
+ .fl4_tos = RT_TOS(tos),
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -855,10 +854,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph = &tunnel->parms.iph;
if (iph->daddr) {
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos) } },
+ struct flowi fl = { .fl4_dst = iph->daddr,
+ .fl4_src = iph->saddr,
+ .fl4_tos = RT_TOS(iph->tos),
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
struct rtable *rt;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91def93bec8..b541a4e009f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -227,7 +227,7 @@ begin:
if (result) {
exact_match:
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
daddr, hnum, dif) < badness)) {
@@ -294,7 +294,7 @@ begin:
goto begin;
if (result) {
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, saddr, sport,
daddr, dport, dif) < badness)) {
diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile
index 48689458c08..ab23b5ba7e3 100644
--- a/net/irda/ircomm/Makefile
+++ b/net/irda/ircomm/Makefile
@@ -4,5 +4,5 @@
obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o
-ircomm-objs := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
-ircomm-tty-objs := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
+ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
+ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile
index 77549bc8641..94eefbc8e6b 100644
--- a/net/irda/irlan/Makefile
+++ b/net/irda/irlan/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_IRLAN) += irlan.o
-irlan-objs := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
+irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile
index b3ee01e0def..61c365c8a2a 100644
--- a/net/irda/irnet/Makefile
+++ b/net/irda/irnet/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_IRNET) += irnet.o
-irnet-objs := irnet_ppp.o irnet_irda.o
+irnet-y := irnet_ppp.o irnet_irda.o
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 285761e77d9..f6054f9ccbe 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,22 +550,30 @@ EXPORT_SYMBOL(irttp_close_tsap);
*/
int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
{
+ int ret;
+
IRDA_ASSERT(self != NULL, return -1;);
IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
IRDA_ASSERT(skb != NULL, return -1;);
IRDA_DEBUG(4, "%s()\n", __func__);
+ /* Take shortcut on zero byte packets */
+ if (skb->len == 0) {
+ ret = 0;
+ goto err;
+ }
+
/* Check that nothing bad happens */
- if ((skb->len == 0) || (!self->connected)) {
- IRDA_DEBUG(1, "%s(), No data, or not connected\n",
- __func__);
+ if (!self->connected) {
+ IRDA_WARNING("%s(), Not connected\n", __func__);
+ ret = -ENOTCONN;
goto err;
}
if (skb->len > self->max_seg_size) {
- IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
- __func__);
+ IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__);
+ ret = -EMSGSIZE;
goto err;
}
@@ -576,7 +584,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
err:
dev_kfree_skb(skb);
- return -1;
+ return ret;
}
EXPORT_SYMBOL(irttp_udata_request);
@@ -599,9 +607,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__,
skb_queue_len(&self->tx_queue));
+ /* Take shortcut on zero byte packets */
+ if (skb->len == 0) {
+ ret = 0;
+ goto err;
+ }
+
/* Check that nothing bad happens */
- if ((skb->len == 0) || (!self->connected)) {
- IRDA_WARNING("%s: No data, or not connected\n", __func__);
+ if (!self->connected) {
+ IRDA_WARNING("%s: Not connected\n", __func__);
ret = -ENOTCONN;
goto err;
}
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d..b8dbae82fab 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
struct seq_file *seq;
int rc = -ENOMEM;
- pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (pd == NULL)
goto out;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0bf6a59545a..04635e88e8e 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -476,15 +476,13 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
{
struct flowi fl = { .oif = sk->sk_bound_dev_if,
- .nl_u = { .ip4_u = {
- .daddr = daddr,
- .saddr = inet->inet_saddr,
- .tos = RT_CONN_FLAGS(sk) } },
+ .fl4_dst = daddr,
+ .fl4_src = inet->inet_saddr,
+ .fl4_tos = RT_CONN_FLAGS(sk),
.proto = sk->sk_protocol,
.flags = inet_sk_flowi_flags(sk),
- .uli_u = { .ports = {
- .sport = inet->inet_sport,
- .dport = inet->inet_dport } } };
+ .fl_ip_sport = inet->inet_sport,
+ .fl_ip_dport = inet->inet_dport };
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
diff --git a/net/lapb/Makefile b/net/lapb/Makefile
index 53f7c90db16..fff797dfc88 100644
--- a/net/lapb/Makefile
+++ b/net/lapb/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_LAPB) += lapb.o
-lapb-objs := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o
+lapb-y := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 85dabb86be6..32fcbe290c0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -173,9 +173,11 @@ next_hook:
outdev, &elem, okfn, hook_thresh);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
- } else if (verdict == NF_DROP) {
+ } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
- ret = -EPERM;
+ ret = -(verdict >> NF_VERDICT_BITS);
+ if (ret == 0)
+ ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index a22dac22705..70bd1d0774c 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,6 +4,7 @@
menuconfig IP_VS
tristate "IP virtual server support"
depends on NET && INET && NETFILTER
+ depends on (NF_CONNTRACK || NF_CONNTRACK=n)
---help---
IP Virtual Server support will let you build a high-performance
virtual server based on cluster of two or more real servers. This
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5f5daa30b0a..c6f29363922 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -110,10 +110,8 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
struct rt6_info *rt;
struct flowi fl = {
.oif = 0,
- .nl_u = {
- .ip6_u = {
- .daddr = *addr,
- .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+ .fl6_dst = *addr,
+ .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
};
rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index de04ea39cde..5325a3fbe4a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -96,12 +96,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
if (!(rt = (struct rtable *)
__ip_vs_dst_check(dest, rtos))) {
struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip4_u = {
- .daddr = dest->addr.ip,
- .saddr = 0,
- .tos = rtos, } },
+ .fl4_dst = dest->addr.ip,
+ .fl4_tos = rtos,
};
if (ip_route_output_key(net, &rt, &fl)) {
@@ -118,12 +114,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
spin_unlock(&dest->dst_lock);
} else {
struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip4_u = {
- .daddr = daddr,
- .saddr = 0,
- .tos = rtos, } },
+ .fl4_dst = daddr,
+ .fl4_tos = rtos,
};
if (ip_route_output_key(net, &rt, &fl)) {
@@ -169,7 +161,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
struct net *net = dev_net(dev);
struct iphdr *iph = ip_hdr(skb);
- if (rt->fl.iif) {
+ if (rt_is_input_route(rt)) {
unsigned long orefdst = skb->_skb_refdst;
if (ip_route_input(skb, iph->daddr, iph->saddr,
@@ -178,14 +170,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
refdst_drop(orefdst);
} else {
struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip4_u = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos),
- }
- },
+ .fl4_dst = iph->daddr,
+ .fl4_src = iph->saddr,
+ .fl4_tos = RT_TOS(iph->tos),
.mark = skb->mark,
};
struct rtable *rt;
@@ -216,12 +203,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
{
struct dst_entry *dst;
struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip6_u = {
- .daddr = *daddr,
- },
- },
+ .fl6_dst = *daddr,
};
dst = ip6_route_output(net, NULL, &fl);
@@ -552,7 +534,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#endif
/* From world but DNAT to loopback address? */
- if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
+ if (local && ipv4_is_loopback(rt->rt_dst) &&
+ rt_is_input_route(skb_rtable(skb))) {
IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
"stopping DNAT to loopback address");
goto tx_error_put;
@@ -1165,7 +1148,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#endif
/* From world but DNAT to loopback address? */
- if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
+ if (local && ipv4_is_loopback(rt->rt_dst) &&
+ rt_is_input_route(skb_rtable(skb))) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI4\n",
__func__, &cp->daddr.ip);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1eacf8d9966..27a5ea6b6a0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1312,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
if (!hash) {
*vmalloced = 1;
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
- hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
}
if (hash && nulls)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d9295802..dc7bb74110d 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
for (i = 0; i < MAX_NF_CT_PROTO; i++)
proto_array[i] = &nf_conntrack_l4proto_generic;
+
+ /* Before making proto_array visible to lockless readers,
+ * we must make sure its content is committed to memory.
+ */
+ smp_wmb();
+
nf_ct_protos[l4proto->l3proto] = proto_array;
} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
&nf_conntrack_l4proto_generic) {
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 22a2d421e7e..5128a6c4cb2 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -70,9 +70,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
return false;
fl.oif = info->priv->oif;
}
- fl.nl_u.ip4_u.daddr = info->gw.ip;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+ fl.fl4_dst = info->gw.ip;
+ fl.fl4_tos = RT_TOS(iph->tos);
+ fl.fl4_scope = RT_SCOPE_UNIVERSE;
if (ip_route_output_key(net, &rt, &fl) != 0)
return false;
@@ -150,9 +150,9 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
return false;
fl.oif = info->priv->oif;
}
- fl.nl_u.ip6_u.daddr = info->gw.in6;
- fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
- (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+ fl.fl6_dst = info->gw.in6;
+ fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+ (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
dst = ip6_route_output(net, NULL, &fl);
if (dst == NULL)
return false;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27b9d4..422705d62b5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -61,6 +61,7 @@
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
@@ -163,8 +164,14 @@ struct packet_mreq_max {
static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
int closing, int tx_ring);
+#define PGV_FROM_VMALLOC 1
+struct pgv {
+ char *buffer;
+ unsigned char flags;
+};
+
struct packet_ring_buffer {
- char **pg_vec;
+ struct pgv *pg_vec;
unsigned int head;
unsigned int frames_per_block;
unsigned int frame_size;
@@ -283,7 +290,8 @@ static void *packet_lookup_frame(struct packet_sock *po,
pg_vec_pos = position / rb->frames_per_block;
frame_offset = position % rb->frames_per_block;
- h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
+ h.raw = rb->pg_vec[pg_vec_pos].buffer +
+ (frame_offset * rb->frame_size);
if (status != __packet_get_status(po, h.raw))
return NULL;
@@ -511,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter != NULL)
- res = sk_run_filter(skb, filter->insns, filter->len);
+ res = sk_run_filter(skb, filter->insns);
rcu_read_unlock_bh();
return res;
@@ -1610,9 +1618,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
err = -EINVAL;
vnet_hdr_len = sizeof(vnet_hdr);
- if ((len -= vnet_hdr_len) < 0)
+ if (len < vnet_hdr_len)
goto out_free;
+ len -= vnet_hdr_len;
+
if (skb_is_gso(skb)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
@@ -1719,7 +1729,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
if (dev)
- strlcpy(uaddr->sa_data, dev->name, 15);
+ strncpy(uaddr->sa_data, dev->name, 14);
else
memset(uaddr->sa_data, 0, 14);
rcu_read_unlock();
@@ -1742,6 +1752,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
sll->sll_family = AF_PACKET;
sll->sll_ifindex = po->ifindex;
sll->sll_protocol = po->num;
+ sll->sll_pkttype = 0;
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
if (dev) {
@@ -2322,37 +2333,74 @@ static const struct vm_operations_struct packet_mmap_ops = {
.close = packet_mm_close,
};
-static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
+static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
+ unsigned int len)
{
int i;
for (i = 0; i < len; i++) {
- if (likely(pg_vec[i]))
- free_pages((unsigned long) pg_vec[i], order);
+ if (likely(pg_vec[i].buffer)) {
+ if (pg_vec[i].flags & PGV_FROM_VMALLOC)
+ vfree(pg_vec[i].buffer);
+ else
+ free_pages((unsigned long)pg_vec[i].buffer,
+ order);
+ pg_vec[i].buffer = NULL;
+ }
}
kfree(pg_vec);
}
-static inline char *alloc_one_pg_vec_page(unsigned long order)
+static inline char *alloc_one_pg_vec_page(unsigned long order,
+ unsigned char *flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
+ char *buffer = NULL;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
+ __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
+
+ buffer = (char *) __get_free_pages(gfp_flags, order);
+
+ if (buffer)
+ return buffer;
+
+ /*
+ * __get_free_pages failed, fall back to vmalloc
+ */
+ *flags |= PGV_FROM_VMALLOC;
+ buffer = vzalloc((1 << order) * PAGE_SIZE);
+
+ if (buffer)
+ return buffer;
+
+ /*
+ * vmalloc failed, lets dig into swap here
+ */
+ *flags = 0;
+ gfp_flags &= ~__GFP_NORETRY;
+ buffer = (char *)__get_free_pages(gfp_flags, order);
+ if (buffer)
+ return buffer;
- return (char *) __get_free_pages(gfp_flags, order);
+ /*
+ * complete and utter failure
+ */
+ return NULL;
}
-static char **alloc_pg_vec(struct tpacket_req *req, int order)
+static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
{
unsigned int block_nr = req->tp_block_nr;
- char **pg_vec;
+ struct pgv *pg_vec;
int i;
- pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+ pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
if (unlikely(!pg_vec))
goto out;
for (i = 0; i < block_nr; i++) {
- pg_vec[i] = alloc_one_pg_vec_page(order);
- if (unlikely(!pg_vec[i]))
+ pg_vec[i].buffer = alloc_one_pg_vec_page(order,
+ &pg_vec[i].flags);
+ if (unlikely(!pg_vec[i].buffer))
goto out_free_pgvec;
}
@@ -2361,6 +2409,7 @@ out:
out_free_pgvec:
free_pg_vec(pg_vec, order, block_nr);
+ kfree(pg_vec);
pg_vec = NULL;
goto out;
}
@@ -2368,7 +2417,7 @@ out_free_pgvec:
static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
int closing, int tx_ring)
{
- char **pg_vec = NULL;
+ struct pgv *pg_vec = NULL;
struct packet_sock *po = pkt_sk(sk);
int was_running, order = 0;
struct packet_ring_buffer *rb;
@@ -2530,15 +2579,22 @@ static int packet_mmap(struct file *file, struct socket *sock,
continue;
for (i = 0; i < rb->pg_vec_len; i++) {
- struct page *page = virt_to_page(rb->pg_vec[i]);
+ struct page *page;
+ void *kaddr = rb->pg_vec[i].buffer;
int pg_num;
for (pg_num = 0; pg_num < rb->pg_vec_pages;
- pg_num++, page++) {
+ pg_num++) {
+ if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
+ page = vmalloc_to_page(kaddr);
+ else
+ page = virt_to_page(kaddr);
+
err = vm_insert_page(vma, start, page);
if (unlikely(err))
goto out;
start += PAGE_SIZE;
+ kaddr += PAGE_SIZE;
}
}
}
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index d62bbba649b..e10b1b182ce 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
-phonet-objs := \
+phonet-y := \
pn_dev.o \
pn_netlink.o \
socket.o \
@@ -8,4 +8,4 @@ phonet-objs := \
sysctl.o \
af_phonet.o
-pn_pep-objs := pep.o pep-gprs.o
+pn_pep-y := pep.o pep-gprs.o
diff --git a/net/rds/Makefile b/net/rds/Makefile
index b46eca10968..56d3f6023ce 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -4,7 +4,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
loop.o page.o rdma.o
obj-$(CONFIG_RDS_RDMA) += rds_rdma.o
-rds_rdma-objs := rdma_transport.o \
+rds_rdma-y := rdma_transport.o \
ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
ib_sysctl.o ib_rdma.o \
iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
@@ -12,10 +12,8 @@ rds_rdma-objs := rdma_transport.o \
obj-$(CONFIG_RDS_TCP) += rds_tcp.o
-rds_tcp-objs := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
+rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
tcp_send.o tcp_stats.o
-ifeq ($(CONFIG_RDS_DEBUG), y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG
diff --git a/net/rds/loop.c b/net/rds/loop.c
index c390156b426..aeec1d483b1 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
static void rds_loop_conn_free(void *arg)
{
struct rds_loop_connection *lc = arg;
+ unsigned long flags;
+
rdsdebug("lc %p\n", lc);
+ spin_lock_irqsave(&loop_conns_lock, flags);
list_del(&lc->loop_node);
+ spin_unlock_irqrestore(&loop_conns_lock, flags);
kfree(lc);
}
diff --git a/net/rds/message.c b/net/rds/message.c
index 848cff45183..1fd3d29023d 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -249,8 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
rm->data.op_nents = ceil(total_len, PAGE_SIZE);
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
- if (!rm->data.op_sg)
+ if (!rm->data.op_sg) {
+ rds_message_put(rm);
return ERR_PTR(-ENOMEM);
+ }
for (i = 0; i < rm->data.op_nents; ++i) {
sg_set_page(&rm->data.op_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 8920f2a8332..4e37c1cbe8b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -567,7 +567,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
goto out;
}
- if (args->nr_local > (u64)UINT_MAX) {
+ if (args->nr_local > UIO_MAXIOV) {
ret = -EMSGSIZE;
goto out;
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08a8c6cf2d1..8e0a32001c9 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
+
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ list_del(&tc->t_tcp_node);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+
kmem_cache_free(rds_tcp_conn_slab, tc);
}
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index c46867c61c9..d1c3429b69e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -2,7 +2,7 @@
# Makefile for Linux kernel RxRPC
#
-af-rxrpc-objs := \
+af-rxrpc-y := \
af_rxrpc.o \
ar-accept.o \
ar-ack.o \
@@ -21,7 +21,7 @@ af-rxrpc-objs := \
ar-transport.o
ifeq ($(CONFIG_PROC_FS),y)
-af-rxrpc-objs += ar-proc.o
+af-rxrpc-y += ar-proc.o
endif
obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 9f1729bd60d..a53fb25a64e 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -47,12 +47,12 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
case AF_INET:
fl.oif = 0;
fl.proto = IPPROTO_UDP,
- fl.nl_u.ip4_u.saddr = 0;
- fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
- fl.nl_u.ip4_u.tos = 0;
+ fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr;
+ fl.fl4_src = 0;
+ fl.fl4_tos = 0;
/* assume AFS.CM talking to AFS.FS */
- fl.uli_u.ports.sport = htons(7001);
- fl.uli_u.ports.dport = htons(7000);
+ fl.fl_ip_sport = htons(7001);
+ fl.fl_ip_dport = htons(7000);
break;
default:
BUG();
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd05..f23d9155b1e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+ goto nla_put_failure;
+
return skb->len;
nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb..d49c40fb7e0 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
.populate = cgrp_populate,
#ifdef CONFIG_NET_CLS_CGROUP
.subsys_id = net_cls_subsys_id,
-#else
-#define net_cls_subsys_id net_cls_subsys.subsys_id
#endif
.module = THIS_MODULE,
};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 76325325741..ea8f566e720 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
{
- textsearch_destroy(EM_TEXT_PRIV(m)->config);
+ if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+ textsearch_destroy(EM_TEXT_PRIV(m)->config);
}
static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ef29c74d85..e58f9476f29 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
struct kmem_cache *sctp_chunk_cachep __read_mostly;
struct kmem_cache *sctp_bucket_cachep __read_mostly;
-int sysctl_sctp_mem[3];
+long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e34ca9cc116..6bd554323a3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
extern struct kmem_cache *sctp_bucket_cachep;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
static int sctp_memory_pressure;
-static atomic_t sctp_memory_allocated;
+static atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c..50cb57f0919 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
.data = &sysctl_sctp_mem,
.maxlen = sizeof(sysctl_sctp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "sctp_rmem",
diff --git a/net/socket.c b/net/socket.c
index 3ca2fd9e372..c898df76e92 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = {
*/
static DEFINE_SPINLOCK(net_family_lock);
-static const struct net_proto_family *net_families[NPROTO] __read_mostly;
+static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
@@ -1200,7 +1200,7 @@ int __sock_create(struct net *net, int family, int type, int protocol,
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
- if (net_families[family] == NULL)
+ if (rcu_access_pointer(net_families[family]) == NULL)
request_module("net-pf-%d", family);
#endif
@@ -2332,10 +2332,11 @@ int sock_register(const struct net_proto_family *ops)
}
spin_lock(&net_family_lock);
- if (net_families[ops->family])
+ if (rcu_dereference_protected(net_families[ops->family],
+ lockdep_is_held(&net_family_lock)))
err = -EEXIST;
else {
- net_families[ops->family] = ops;
+ rcu_assign_pointer(net_families[ops->family], ops);
err = 0;
}
spin_unlock(&net_family_lock);
@@ -2363,7 +2364,7 @@ void sock_unregister(int family)
BUG_ON(family < 0 || family >= NPROTO);
spin_lock(&net_family_lock);
- net_families[family] = NULL;
+ rcu_assign_pointer(net_families[family], NULL);
spin_unlock(&net_family_lock);
synchronize_rcu();
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 7350d86a32e..9e4cb59ef9f 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -4,10 +4,10 @@
obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
-auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
+auth_rpcgss-y := auth_gss.o gss_generic_token.o \
gss_mech_switch.o svcauth_gss.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
-rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 33217fc3d69..e9f0d500448 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -396,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct tipc_sock *tsock = tipc_sk(sock->sk);
+ memset(addr, 0, sizeof(*addr));
if (peer) {
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3c95304a081..7ff31c60186 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -316,7 +316,8 @@ static void unix_write_space(struct sock *sk)
if (unix_writable(sk)) {
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
- wake_up_interruptible_sync(&wq->wait);
+ wake_up_interruptible_sync_poll(&wq->wait,
+ POLLOUT | POLLWRNORM | POLLWRBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
@@ -1710,7 +1711,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out_unlock;
}
- wake_up_interruptible_sync(&u->peer_wait);
+ wake_up_interruptible_sync_poll(&u->peer_wait,
+ POLLOUT | POLLWRNORM | POLLWRBAND);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
@@ -2072,13 +2074,12 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
@@ -2090,20 +2091,19 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
return mask;
}
- /* writable? */
- writable = unix_writable(sk);
- if (writable) {
- other = unix_peer_get(sk);
- if (other) {
- if (unix_peer(other) != sk) {
- sock_poll_wait(file, &unix_sk(other)->peer_wait,
- wait);
- if (unix_recvq_full(other))
- writable = 0;
- }
+ /* No write status requested, avoid expensive OUT tests. */
+ if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
+ return mask;
- sock_put(other);
+ writable = unix_writable(sk);
+ other = unix_peer_get(sk);
+ if (other) {
+ if (unix_peer(other) != sk) {
+ sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
+ if (unix_recvq_full(other))
+ writable = 0;
}
+ sock_put(other);
}
if (writable)
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
index 9f188ab3dcd..4da14bc4807 100644
--- a/net/wanrouter/Makefile
+++ b/net/wanrouter/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
-wanrouter-objs := wanproc.o wanmain.o
+wanrouter-y := wanproc.o wanmain.o
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f7af98dff40..2351aceb296 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1357,11 +1357,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
void __user *argp = (void __user *)arg;
int rc;
- lock_kernel();
switch (cmd) {
case TIOCOUTQ: {
- int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+ int amount;
+ amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
if (amount < 0)
amount = 0;
rc = put_user(amount, (unsigned int __user *)argp);
@@ -1375,8 +1375,10 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
* These two are safe on a single CPU system as
* only user tasks fiddle here
*/
+ lock_sock(sk);
if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
amount = skb->len;
+ release_sock(sk);
rc = put_user(amount, (unsigned int __user *)argp);
break;
}
@@ -1413,24 +1415,31 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = x25_route_ioctl(cmd, argp);
break;
case SIOCX25GSUBSCRIP:
+ lock_kernel();
rc = x25_subscr_ioctl(cmd, argp);
+ unlock_kernel();
break;
case SIOCX25SSUBSCRIP:
rc = -EPERM;
if (!capable(CAP_NET_ADMIN))
break;
+ lock_kernel();
rc = x25_subscr_ioctl(cmd, argp);
+ unlock_kernel();
break;
case SIOCX25GFACILITIES: {
struct x25_facilities fac = x25->facilities;
+ lock_kernel();
rc = copy_to_user(argp, &fac,
sizeof(fac)) ? -EFAULT : 0;
+ unlock_kernel();
break;
}
case SIOCX25SFACILITIES: {
struct x25_facilities facilities;
rc = -EFAULT;
+ lock_kernel();
if (copy_from_user(&facilities, argp,
sizeof(facilities)))
break;
@@ -1466,12 +1475,15 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
x25->facilities = facilities;
rc = 0;
+ unlock_kernel();
break;
}
case SIOCX25GDTEFACILITIES: {
+ lock_kernel();
rc = copy_to_user(argp, &x25->dte_facilities,
sizeof(x25->dte_facilities));
+ unlock_kernel();
if (rc)
rc = -EFAULT;
break;
@@ -1480,6 +1492,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCX25SDTEFACILITIES: {
struct x25_dte_facilities dtefacs;
rc = -EFAULT;
+ lock_kernel();
if (copy_from_user(&dtefacs, argp, sizeof(dtefacs)))
break;
rc = -EINVAL;
@@ -1496,13 +1509,16 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
x25->dte_facilities = dtefacs;
rc = 0;
+ unlock_kernel();
break;
}
case SIOCX25GCALLUSERDATA: {
struct x25_calluserdata cud = x25->calluserdata;
+ lock_kernel();
rc = copy_to_user(argp, &cud,
sizeof(cud)) ? -EFAULT : 0;
+ unlock_kernel();
break;
}
@@ -1510,6 +1526,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct x25_calluserdata calluserdata;
rc = -EFAULT;
+ lock_kernel();
if (copy_from_user(&calluserdata, argp,
sizeof(calluserdata)))
break;
@@ -1517,24 +1534,29 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
if (calluserdata.cudlength > X25_MAX_CUD_LEN)
break;
x25->calluserdata = calluserdata;
+ unlock_kernel();
rc = 0;
break;
}
case SIOCX25GCAUSEDIAG: {
struct x25_causediag causediag;
+ lock_kernel();
causediag = x25->causediag;
rc = copy_to_user(argp, &causediag,
sizeof(causediag)) ? -EFAULT : 0;
+ unlock_kernel();
break;
}
case SIOCX25SCAUSEDIAG: {
struct x25_causediag causediag;
rc = -EFAULT;
+ lock_kernel();
if (copy_from_user(&causediag, argp, sizeof(causediag)))
break;
x25->causediag = causediag;
+ unlock_kernel();
rc = 0;
break;
@@ -1543,6 +1565,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCX25SCUDMATCHLEN: {
struct x25_subaddr sub_addr;
rc = -EINVAL;
+ lock_kernel();
if(sk->sk_state != TCP_CLOSE)
break;
rc = -EFAULT;
@@ -1553,21 +1576,25 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN)
break;
x25->cudmatchlength = sub_addr.cudmatchlength;
+ unlock_kernel();
rc = 0;
break;
}
case SIOCX25CALLACCPTAPPRV: {
rc = -EINVAL;
+ lock_kernel();
if (sk->sk_state != TCP_CLOSE)
break;
clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
+ unlock_kernel();
rc = 0;
break;
}
case SIOCX25SENDCALLACCPT: {
rc = -EINVAL;
+ lock_kernel();
if (sk->sk_state != TCP_ESTABLISHED)
break;
/* must call accptapprv above */
@@ -1575,6 +1602,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
x25_write_internal(sk, X25_CALL_ACCEPTED);
x25->state = X25_STATE_3;
+ unlock_kernel();
rc = 0;
break;
}
@@ -1583,7 +1611,6 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = -ENOIOCTLCMD;
break;
}
- unlock_kernel();
return rc;
}
@@ -1654,19 +1681,15 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCGSTAMP:
rc = -EINVAL;
- lock_kernel();
if (sk)
rc = compat_sock_get_timestamp(sk,
(struct timeval __user*)argp);
- unlock_kernel();
break;
case SIOCGSTAMPNS:
rc = -EINVAL;
- lock_kernel();
if (sk)
rc = compat_sock_get_timestampns(sk,
(struct timespec __user*)argp);
- unlock_kernel();
break;
case SIOCGIFADDR:
case SIOCSIFADDR:
@@ -1685,9 +1708,7 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
rc = -EPERM;
if (!capable(CAP_NET_ADMIN))
break;
- lock_kernel();
rc = x25_route_ioctl(cmd, argp);
- unlock_kernel();
break;
case SIOCX25GSUBSCRIP:
lock_kernel();
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754..55187c8f642 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
while (len > 0) {
switch (*p & X25_FAC_CLASS_MASK) {
case X25_FAC_CLASS_A:
+ if (len < 2)
+ return 0;
switch (*p) {
case X25_FAC_REVERSE:
if((p[1] & 0x81) == 0x81) {
@@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
len -= 2;
break;
case X25_FAC_CLASS_B:
+ if (len < 3)
+ return 0;
switch (*p) {
case X25_FAC_PACKET_SIZE:
facilities->pacsize_in = p[1];
@@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
len -= 3;
break;
case X25_FAC_CLASS_C:
+ if (len < 4)
+ return 0;
printk(KERN_DEBUG "X.25: unknown facility %02X, "
"values %02X, %02X, %02X\n",
p[0], p[1], p[2], p[3]);
@@ -132,26 +138,26 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
len -= 4;
break;
case X25_FAC_CLASS_D:
+ if (len < p[1] + 2)
+ return 0;
switch (*p) {
case X25_FAC_CALLING_AE:
- if (p[1] > X25_MAX_DTE_FACIL_LEN)
- break;
+ if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+ return 0;
dte_facs->calling_len = p[2];
memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLING_AE;
break;
case X25_FAC_CALLED_AE:
- if (p[1] > X25_MAX_DTE_FACIL_LEN)
- break;
+ if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+ return 0;
dte_facs->called_len = p[2];
memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLED_AE;
break;
default:
printk(KERN_DEBUG "X.25: unknown facility %02X,"
- "length %d, values %02X, %02X, "
- "%02X, %02X\n",
- p[0], p[1], p[2], p[3], p[4], p[5]);
+ "length %d\n", p[0], p[1]);
break;
}
len -= p[1] + 2;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efa..f729f022be6 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
&x25->vc_facil_mask);
if (len > 0)
skb_pull(skb, len);
+ else
+ return -1;
/*
* Copy any Call User Data.
*/