summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c7
-rw-r--r--net/802/hippi.c12
-rw-r--r--net/802/psnap.c4
-rw-r--r--net/802/tr.c9
-rw-r--r--net/8021q/vlan.c9
-rw-r--r--net/8021q/vlan_dev.c14
-rw-r--r--net/8021q/vlanproc.c36
-rw-r--r--net/Kconfig19
-rw-r--r--net/Makefile3
-rw-r--r--net/appletalk/aarp.c14
-rw-r--r--net/appletalk/ddp.c21
-rw-r--r--net/atm/br2684.c8
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/ioctl.c3
-rw-r--r--net/atm/lec.c15
-rw-r--r--net/atm/mpc.c17
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/ax25/af_ax25.c113
-rw-r--r--net/ax25/ax25_ds_subr.c2
-rw-r--r--net/ax25/ax25_in.c24
-rw-r--r--net/ax25/ax25_ip.c4
-rw-r--r--net/ax25/ax25_out.c12
-rw-r--r--net/ax25/ax25_subr.c4
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/bnep/core.c16
-rw-r--r--net/bluetooth/cmtp/core.c4
-rw-r--r--net/bluetooth/hci_conn.c36
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c8
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/l2cap.c76
-rw-r--r--net/bluetooth/rfcomm/core.c6
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bridge/br.c12
-rw-r--r--net/bridge/br_device.c22
-rw-r--r--net/bridge/br_fdb.c42
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c10
-rw-r--r--net/bridge/br_input.c51
-rw-r--r--net/bridge/br_ioctl.c9
-rw-r--r--net/bridge/br_netfilter.c182
-rw-r--r--net/bridge/br_netlink.c27
-rw-r--r--net/bridge/br_notify.c13
-rw-r--r--net/bridge/br_private.h23
-rw-r--r--net/bridge/br_stp.c10
-rw-r--r--net/bridge/br_stp_bpdu.c19
-rw-r--r--net/bridge/br_stp_if.c59
-rw-r--r--net/bridge/br_sysfs_br.c20
-rw-r--r--net/bridge/br_sysfs_if.c8
-rw-r--r--net/bridge/netfilter/ebt_arp.c48
-rw-r--r--net/bridge/netfilter/ebt_log.c12
-rw-r--r--net/bridge/netfilter/ebt_ulog.c12
-rw-r--r--net/compat.c79
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c10
-rw-r--r--net/core/dev.c460
-rw-r--r--net/core/dev_mcast.c7
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/fib_rules.c161
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/gen_stats.c4
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c34
-rw-r--r--net/core/net-sysfs.c31
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/pktgen.c299
-rw-r--r--net/core/rtnetlink.c305
-rw-r--r--net/core/skbuff.c373
-rw-r--r--net/core/sock.c775
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/utils.c6
-rw-r--r--net/dccp/ackvec.c2
-rw-r--r--net/dccp/ccids/ccid3.c322
-rw-r--r--net/dccp/ccids/ccid3.h10
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/dccp.h75
-rw-r--r--net/dccp/input.c54
-rw-r--r--net/dccp/ipv4.c43
-rw-r--r--net/dccp/ipv6.c40
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/options.c18
-rw-r--r--net/dccp/output.c3
-rw-r--r--net/dccp/probe.c17
-rw-r--r--net/decnet/af_decnet.c12
-rw-r--r--net/decnet/dn_dev.c116
-rw-r--r--net/decnet/dn_fib.c10
-rw-r--r--net/decnet/dn_neigh.c6
-rw-r--r--net/decnet/dn_nsp_in.c7
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/decnet/dn_route.c42
-rw-r--r--net/decnet/dn_rules.c6
-rw-r--r--net/decnet/dn_table.c11
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c8
-rw-r--r--net/econet/af_econet.c15
-rw-r--r--net/ethernet/eth.c5
-rw-r--r--net/ieee80211/Kconfig3
-rw-r--r--net/ieee80211/ieee80211_crypt.c2
-rw-r--r--net/ieee80211/ieee80211_crypt_ccmp.c4
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c6
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c4
-rw-r--r--net/ieee80211/ieee80211_module.c5
-rw-r--r--net/ieee80211/ieee80211_rx.c25
-rw-r--r--net/ieee80211/ieee80211_tx.c12
-rw-r--r--net/ieee80211/ieee80211_wx.c4
-rw-r--r--net/ipv4/Kconfig27
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c148
-rw-r--r--net/ipv4/ah4.c14
-rw-r--r--net/ipv4/arp.c16
-rw-r--r--net/ipv4/cipso_ipv4.c45
-rw-r--r--net/ipv4/devinet.c54
-rw-r--r--net/ipv4/esp4.c59
-rw-r--r--net/ipv4/fib_frontend.c21
-rw-r--r--net/ipv4/fib_hash.c2
-rw-r--r--net/ipv4/fib_rules.c11
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c51
-rw-r--r--net/ipv4/icmp.c31
-rw-r--r--net/ipv4/igmp.c58
-rw-r--r--net/ipv4/inet_diag.c90
-rw-r--r--net/ipv4/inetpeer.c38
-rw-r--r--net/ipv4/ip_forward.c14
-rw-r--r--net/ipv4/ip_fragment.c47
-rw-r--r--net/ipv4/ip_gre.c63
-rw-r--r--net/ipv4/ip_input.c38
-rw-r--r--net/ipv4/ip_options.c26
-rw-r--r--net/ipv4/ip_output.c129
-rw-r--r--net/ipv4/ip_sockglue.c1169
-rw-r--r--net/ipv4/ipcomp.c58
-rw-r--r--net/ipv4/ipconfig.c21
-rw-r--r--net/ipv4/ipip.c60
-rw-r--r--net/ipv4/ipmr.c418
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c14
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c56
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c16
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c24
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c26
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c44
-rw-r--r--net/ipv4/multipath_drr.c2
-rw-r--r--net/ipv4/netfilter.c8
-rw-r--r--net/ipv4/netfilter/Kconfig267
-rw-r--r--net/ipv4/netfilter/Makefile45
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c229
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c1550
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c520
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_h323.c1841
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c684
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c314
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c143
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c1577
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c74
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c328
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c315
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c659
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1164
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c148
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c520
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c962
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c161
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c85
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c634
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c180
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c436
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_h323.c611
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c350
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c122
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c174
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c87
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c154
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c144
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c55
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c314
-rw-r--r--net/ipv4/netfilter/ip_nat_sip.c282
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c1333
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c388
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c70
-rw-r--r--net/ipv4/netfilter/ip_queue.c28
-rw-r--r--net/ipv4/netfilter/ip_tables.c12
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c24
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c15
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c16
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c57
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c26
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c24
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c45
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c40
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c4
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c77
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c2
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c10
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c2
-rw-r--r--net/ipv4/netfilter/ipt_recent.c6
-rw-r--r--net/ipv4/netfilter/ipt_tos.c2
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c11
-rw-r--r--net/ipv4/netfilter/iptable_filter.c3
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c30
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c27
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c11
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c20
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c37
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c18
-rw-r--r--net/ipv4/proc.c41
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c29
-rw-r--r--net/ipv4/syncookies.c40
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c148
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c45
-rw-r--r--net/ipv4/tcp_cubic.c81
-rw-r--r--net/ipv4/tcp_highspeed.c24
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c356
-rw-r--r--net/ipv4/tcp_input.c646
-rw-r--r--net/ipv4/tcp_ipv4.c143
-rw-r--r--net/ipv4/tcp_lp.c8
-rw-r--r--net/ipv4/tcp_minisocks.c29
-rw-r--r--net/ipv4/tcp_output.c206
-rw-r--r--net/ipv4/tcp_probe.c68
-rw-r--r--net/ipv4/tcp_timer.c10
-rw-r--r--net/ipv4/tcp_vegas.c57
-rw-r--r--net/ipv4/tcp_vegas.h24
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c21
-rw-r--r--net/ipv4/tcp_yeah.c268
-rw-r--r--net/ipv4/udp.c438
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv4/xfrm4_input.c23
-rw-r--r--net/ipv4/xfrm4_mode_beet.c37
-rw-r--r--net/ipv4/xfrm4_mode_transport.c28
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c31
-rw-r--r--net/ipv4/xfrm4_output.c3
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv4/xfrm4_tunnel.c3
-rw-r--r--net/ipv6/Kconfig10
-rw-r--r--net/ipv6/Makefile5
-rw-r--r--net/ipv6/addrconf.c274
-rw-r--r--net/ipv6/af_inet6.c88
-rw-r--r--net/ipv6/ah6.c34
-rw-r--r--net/ipv6/anycast.c17
-rw-r--r--net/ipv6/datagram.c63
-rw-r--r--net/ipv6/esp6.c52
-rw-r--r--net/ipv6/exthdrs.c118
-rw-r--r--net/ipv6/fib6_rules.c39
-rw-r--r--net/ipv6/icmp.c48
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_input.c18
-rw-r--r--net/ipv6/ip6_output.c187
-rw-r--r--net/ipv6/ip6_tunnel.c643
-rw-r--r--net/ipv6/ipcomp6.c16
-rw-r--r--net/ipv6/ipv6_sockglue.c46
-rw-r--r--net/ipv6/ipv6_syms.c36
-rw-r--r--net/ipv6/mcast.c61
-rw-r--r--net/ipv6/mip6.c62
-rw-r--r--net/ipv6/ndisc.c435
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/ip6_queue.c28
-rw-r--r--net/ipv6/netfilter/ip6_tables.c17
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c2
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c21
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c8
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c2
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c18
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c30
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c59
-rw-r--r--net/ipv6/proc.c62
-rw-r--r--net/ipv6/protocol.c4
-rw-r--r--net/ipv6/raw.c52
-rw-r--r--net/ipv6/reassembly.c62
-rw-r--r--net/ipv6/route.c24
-rw-r--r--net/ipv6/sit.c58
-rw-r--r--net/ipv6/tcp_ipv6.c120
-rw-r--r--net/ipv6/udp.c123
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_input.c18
-rw-r--r--net/ipv6/xfrm6_mode_beet.c27
-rw-r--r--net/ipv6/xfrm6_mode_ro.c7
-rw-r--r--net/ipv6/xfrm6_mode_transport.c20
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c36
-rw-r--r--net/ipv6/xfrm6_output.c6
-rw-r--r--net/ipv6/xfrm6_policy.c25
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
-rw-r--r--net/ipx/af_ipx.c8
-rw-r--r--net/ipx/ipx_route.c4
-rw-r--r--net/irda/af_irda.c136
-rw-r--r--net/irda/ircomm/ircomm_param.c4
-rw-r--r--net/irda/irda_device.c21
-rw-r--r--net/irda/irlan/irlan_common.c2
-rw-r--r--net/irda/irlan/irlan_eth.c3
-rw-r--r--net/irda/irlap_event.c2
-rw-r--r--net/irda/irlap_frame.c18
-rw-r--r--net/irda/irqueue.c9
-rw-r--r--net/irda/irttp.c10
-rw-r--r--net/irda/parameters.c8
-rw-r--r--net/irda/qos.c14
-rw-r--r--net/irda/wrapper.c5
-rw-r--r--net/iucv/af_iucv.c195
-rw-r--r--net/iucv/iucv.c256
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/llc/llc_core.c10
-rw-r--r--net/llc/llc_input.c2
-rw-r--r--net/llc/llc_output.c8
-rw-r--r--net/llc/llc_sap.c5
-rw-r--r--net/netfilter/Kconfig63
-rw-r--r--net/netfilter/core.c21
-rw-r--r--net/netfilter/nf_conntrack_core.c58
-rw-r--r--net/netfilter/nf_conntrack_ecache.c23
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_ftp.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c66
-rw-r--r--net/netfilter/nf_conntrack_proto.c144
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c88
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c5
-rw-r--r--net/netfilter/nf_conntrack_standalone.c11
-rw-r--r--net/netfilter/nfnetlink.c197
-rw-r--r--net/netfilter/nfnetlink_log.c108
-rw-r--r--net/netfilter/nfnetlink_queue.c20
-rw-r--r--net/netfilter/x_tables.c26
-rw-r--r--net/netfilter/xt_CONNMARK.c32
-rw-r--r--net/netfilter/xt_CONNSECMARK.c18
-rw-r--r--net/netfilter/xt_DSCP.c10
-rw-r--r--net/netfilter/xt_NOTRACK.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c12
-rw-r--r--net/netfilter/xt_connbytes.c35
-rw-r--r--net/netfilter/xt_connmark.c17
-rw-r--r--net/netfilter/xt_conntrack.c110
-rw-r--r--net/netfilter/xt_dscp.c6
-rw-r--r--net/netfilter/xt_hashlimit.c14
-rw-r--r--net/netfilter/xt_helper.c60
-rw-r--r--net/netfilter/xt_length.c5
-rw-r--r--net/netfilter/xt_limit.c7
-rw-r--r--net/netfilter/xt_mac.c4
-rw-r--r--net/netfilter/xt_pkttype.c2
-rw-r--r--net/netfilter/xt_realm.c2
-rw-r--r--net/netfilter/xt_state.c4
-rw-r--r--net/netlabel/netlabel_kapi.c3
-rw-r--r--net/netlink/af_netlink.c122
-rw-r--r--net/netlink/attr.c5
-rw-r--r--net/netlink/genetlink.c66
-rw-r--r--net/netrom/af_netrom.c115
-rw-r--r--net/netrom/nr_dev.c4
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_loopback.c4
-rw-r--r--net/netrom/nr_out.c8
-rw-r--r--net/netrom/nr_route.c5
-rw-r--r--net/netrom/nr_subr.c4
-rw-r--r--net/packet/af_packet.c94
-rw-r--r--net/rose/af_rose.c70
-rw-r--r--net/rose/rose_loopback.c2
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rxrpc/Kconfig43
-rw-r--r--net/rxrpc/Makefile40
-rw-r--r--net/rxrpc/af_rxrpc.c879
-rw-r--r--net/rxrpc/ar-accept.c504
-rw-r--r--net/rxrpc/ar-ack.c1306
-rw-r--r--net/rxrpc/ar-call.c804
-rw-r--r--net/rxrpc/ar-connection.c911
-rw-r--r--net/rxrpc/ar-connevent.c403
-rw-r--r--net/rxrpc/ar-error.c255
-rw-r--r--net/rxrpc/ar-input.c797
-rw-r--r--net/rxrpc/ar-internal.h808
-rw-r--r--net/rxrpc/ar-key.c334
-rw-r--r--net/rxrpc/ar-local.c309
-rw-r--r--net/rxrpc/ar-output.c734
-rw-r--r--net/rxrpc/ar-peer.c316
-rw-r--r--net/rxrpc/ar-proc.c247
-rw-r--r--net/rxrpc/ar-recvmsg.c437
-rw-r--r--net/rxrpc/ar-security.c258
-rw-r--r--net/rxrpc/ar-skbuff.c132
-rw-r--r--net/rxrpc/ar-transport.c276
-rw-r--r--net/rxrpc/call.c2277
-rw-r--r--net/rxrpc/connection.c777
-rw-r--r--net/rxrpc/internal.h106
-rw-r--r--net/rxrpc/krxiod.c262
-rw-r--r--net/rxrpc/krxsecd.c270
-rw-r--r--net/rxrpc/krxtimod.c204
-rw-r--r--net/rxrpc/main.c180
-rw-r--r--net/rxrpc/peer.c398
-rw-r--r--net/rxrpc/proc.c617
-rw-r--r--net/rxrpc/rxkad.c1154
-rw-r--r--net/rxrpc/rxrpc_syms.c34
-rw-r--r--net/rxrpc/sysctl.c121
-rw-r--r--net/rxrpc/transport.c846
-rw-r--r--net/sched/Kconfig56
-rw-r--r--net/sched/act_api.c81
-rw-r--r--net/sched/act_gact.c5
-rw-r--r--net/sched/act_ipt.c5
-rw-r--r--net/sched/act_mirred.c5
-rw-r--r--net/sched/act_pedit.c7
-rw-r--r--net/sched/act_police.c34
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/cls_api.c36
-rw-r--r--net/sched/cls_basic.c7
-rw-r--r--net/sched/cls_fw.c7
-rw-r--r--net/sched/cls_route.c11
-rw-r--r--net/sched/cls_rsvp.c1
-rw-r--r--net/sched/cls_rsvp.h12
-rw-r--r--net/sched/cls_rsvp6.c1
-rw-r--r--net/sched/cls_tcindex.c9
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/em_u32.c2
-rw-r--r--net/sched/ematch.c17
-rw-r--r--net/sched/sch_api.c234
-rw-r--r--net/sched/sch_atm.c28
-rw-r--r--net/sched/sch_cbq.c207
-rw-r--r--net/sched/sch_dsmark.c22
-rw-r--r--net/sched/sch_generic.c35
-rw-r--r--net/sched/sch_hfsc.c109
-rw-r--r--net/sched/sch_htb.c130
-rw-r--r--net/sched/sch_ingress.c27
-rw-r--r--net/sched/sch_netem.c108
-rw-r--r--net/sched/sch_prio.c14
-rw-r--r--net/sched/sch_sfq.c9
-rw-r--r--net/sched/sch_tbf.c47
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/associola.c43
-rw-r--r--net/sctp/debug.c5
-rw-r--r--net/sctp/input.c51
-rw-r--r--net/sctp/inqueue.c8
-rw-r--r--net/sctp/ipv6.c85
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c12
-rw-r--r--net/sctp/protocol.c101
-rw-r--r--net/sctp/sm_make_chunk.c27
-rw-r--r--net/sctp/sm_sideeffect.c51
-rw-r--r--net/sctp/sm_statefuns.c57
-rw-r--r--net/sctp/sm_statetable.c2
-rw-r--r--net/sctp/socket.c403
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sctp/ulpevent.c49
-rw-r--r--net/sctp/ulpqueue.c173
-rw-r--r--net/socket.c33
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c13
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c69
-rw-r--r--net/sunrpc/pmap_clnt.c383
-rw-r--r--net/sunrpc/rpcb_clnt.c625
-rw-r--r--net/sunrpc/sched.c65
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcsock.c10
-rw-r--r--net/sunrpc/xprt.c4
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/config.c2
-rw-r--r--net/tipc/eth_media.c20
-rw-r--r--net/tipc/link.c48
-rw-r--r--net/tipc/msg.h18
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/port.c8
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wanrouter/wanmain.c6
-rw-r--r--net/wireless/Kconfig16
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/core.c224
-rw-r--r--net/wireless/core.h49
-rw-r--r--net/wireless/sysfs.c80
-rw-r--r--net/wireless/sysfs.h9
-rw-r--r--net/wireless/wext.c (renamed from net/core/wireless.c)1122
-rw-r--r--net/x25/af_x25.c22
-rw-r--r--net/x25/x25_dev.c4
-rw-r--r--net/x25/x25_in.c14
-rw-r--r--net/x25/x25_out.c6
-rw-r--r--net/xfrm/xfrm_algo.c169
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_policy.c85
-rw-r--r--net/xfrm/xfrm_state.c60
-rw-r--r--net/xfrm/xfrm_user.c233
492 files changed, 23260 insertions, 33133 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ace6386384b..91dde41b548 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -100,7 +100,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
struct fddihdr *fddi = (struct fddihdr *)skb->data;
#ifdef CONFIG_INET
- if (fddi->hdr.llc_snap.ethertype == __constant_htons(ETH_P_IP))
+ if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
/* Try to get ARP to resolve the header and fill destination address */
return arp_find(fddi->daddr, skb);
else
@@ -130,12 +130,13 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
* to start of packet data. Assume 802.2 SNAP frames for now.
*/
- skb->mac.raw = skb->data; /* point to frame control (FC) */
+ skb->dev = dev;
+ skb_reset_mac_header(skb); /* point to frame control (FC) */
if(fddi->hdr.llc_8022_1.dsap==0xe0)
{
skb_pull(skb, FDDI_K_8022_HLEN-3);
- type = __constant_htons(ETH_P_802_2);
+ type = htons(ETH_P_802_2);
}
else
{
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 578f2a3d692..87ffc12b689 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -60,7 +60,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
* Due to the stupidity of the little endian byte-order we
* have to set the fp field this way.
*/
- hip->fp.fixed = __constant_htonl(0x04800018);
+ hip->fp.fixed = htonl(0x04800018);
hip->fp.d2_size = htonl(len + 8);
hip->le.fc = 0;
hip->le.double_wide = 0; /* only HIPPI 800 for the time being */
@@ -104,7 +104,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
* Only IP is currently supported
*/
- if(hip->snap.ethertype != __constant_htons(ETH_P_IP))
+ if(hip->snap.ethertype != htons(ETH_P_IP))
{
printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
return 0;
@@ -126,14 +126,14 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
{
struct hippi_hdr *hip;
- hip = (struct hippi_hdr *) skb->data;
-
/*
* This is actually wrong ... question is if we really should
* set the raw address here.
*/
- skb->mac.raw = skb->data;
- skb_pull(skb, HIPPI_HLEN);
+ skb->dev = dev;
+ skb_reset_mac_header(skb);
+ hip = (struct hippi_hdr *)skb_mac_header(skb);
+ skb_pull(skb, HIPPI_HLEN);
/*
* No fancy promisc stuff here now.
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6e7c2120b83..04ee43e7538 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -56,10 +56,10 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
};
rcu_read_lock();
- proto = find_snap_client(skb->h.raw);
+ proto = find_snap_client(skb_transport_header(skb));
if (proto) {
/* Pass the frame on. */
- skb->h.raw += 5;
+ skb->transport_header += 5;
skb_pull_rcsum(skb, 5);
rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
} else {
diff --git a/net/802/tr.c b/net/802/tr.c
index 96bd14452c5..0ba1946211c 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -189,11 +189,13 @@ static int tr_rebuild_header(struct sk_buff *skb)
__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
{
- struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+ struct trh_hdr *trh;
struct trllc *trllc;
unsigned riflen=0;
- skb->mac.raw = skb->data;
+ skb->dev = dev;
+ skb_reset_mac_header(skb);
+ trh = tr_hdr(skb);
if(trh->saddr[0] & TR_RII)
riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
@@ -552,7 +554,8 @@ static int rif_seq_show(struct seq_file *seq, void *v)
if(j==1) {
segment=ntohs(entry->rseg[j-1])>>4;
seq_printf(seq," %03X",segment);
- };
+ }
+
segment=ntohs(entry->rseg[j])>>4;
brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index eb1c71ed7df..bd93c45778d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -117,8 +117,7 @@ static void __exit vlan_cleanup_devices(void)
struct net_device *dev, *nxt;
rtnl_lock();
- for (dev = dev_base; dev; dev = nxt) {
- nxt = dev->next;
+ for_each_netdev_safe(dev, nxt) {
if (dev->priv_flags & IFF_802_1Q_VLAN) {
unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
VLAN_DEV_INFO(dev)->vlan_id);
@@ -470,7 +469,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
*/
default:
snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
- };
+ }
new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
vlan_setup);
@@ -685,7 +684,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
break;
}
break;
- };
+ }
out:
return NOTIFY_DONE;
@@ -819,7 +818,7 @@ static int vlan_ioctl_handler(void __user *arg)
printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
__FUNCTION__, args.cmd);
return -EINVAL;
- };
+ }
out:
return err;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b6e0eea1e39..ec46084f44b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -66,7 +66,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
break;
- };
+ }
return 0;
}
@@ -83,7 +83,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
/* Lifted from Gleb's VLAN code... */
memmove(skb->data - ETH_HLEN,
skb->data - VLAN_ETH_HLEN, 12);
- skb->mac.raw += VLAN_HLEN;
+ skb->mac_header += VLAN_HLEN;
}
}
@@ -219,7 +219,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
break;
default:
break;
- };
+ }
/* Was a VLAN packet, grab the encapsulated protocol, which the layer
* three protocols care about.
@@ -258,7 +258,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
* won't work for fault tolerant netware but does for the rest.
*/
if (*(unsigned short *)rawp == 0xFFFF) {
- skb->protocol = __constant_htons(ETH_P_802_3);
+ skb->protocol = htons(ETH_P_802_3);
/* place it back on the queue to be handled by true layer 3 protocols.
*/
@@ -281,7 +281,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
/*
* Real 802.2 LLC
*/
- skb->protocol = __constant_htons(ETH_P_802_2);
+ skb->protocol = htons(ETH_P_802_2);
/* place it back on the queue to be handled by upper layer protocols.
*/
@@ -382,7 +382,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
}
skb->protocol = htons(ETH_P_8021Q);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
}
/* Before delegating work to the lower layer, enter our MAC-address */
@@ -448,7 +448,7 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
- if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) {
+ if (veth->h_vlan_proto != htons(ETH_P_8021Q)) {
int orig_headroom = skb_headroom(skb);
unsigned short veth_TCI;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5e24f72602a..d216a64421c 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -237,13 +237,9 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
* The following few functions build the content of /proc/net/vlan/config
*/
-/* starting at dev, find a VLAN device */
-static struct net_device *vlan_skip(struct net_device *dev)
+static inline int is_vlan_dev(struct net_device *dev)
{
- while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN))
- dev = dev->next;
-
- return dev;
+ return dev->priv_flags & IFF_802_1Q_VLAN;
}
/* start read of /proc/net/vlan/config */
@@ -257,19 +253,35 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
if (*pos == 0)
return SEQ_START_TOKEN;
- for (dev = vlan_skip(dev_base); dev && i < *pos;
- dev = vlan_skip(dev->next), ++i);
+ for_each_netdev(dev) {
+ if (!is_vlan_dev(dev))
+ continue;
+
+ if (i++ == *pos)
+ return dev;
+ }
- return (i == *pos) ? dev : NULL;
+ return NULL;
}
static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
+
++*pos;
- return vlan_skip((v == SEQ_START_TOKEN)
- ? dev_base
- : ((struct net_device *)v)->next);
+ dev = (struct net_device *)v;
+ if (v == SEQ_START_TOKEN)
+ dev = net_device_entry(&dev_base_head);
+
+ for_each_netdev_continue(dev) {
+ if (!is_vlan_dev(dev))
+ continue;
+
+ return dev;
+ }
+
+ return NULL;
}
static void vlan_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/Kconfig b/net/Kconfig
index 915657832d9..2fc8e77b1e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -27,13 +27,6 @@ if NET
menu "Networking options"
-config NETDEBUG
- bool "Network packet debugging"
- help
- You can say Y here if you want to get additional messages useful in
- debugging bad packets, but can overwhelm logs under denial of service
- attacks.
-
source "net/packet/Kconfig"
source "net/unix/Kconfig"
source "net/xfrm/Kconfig"
@@ -219,14 +212,18 @@ endmenu
source "net/ax25/Kconfig"
source "net/irda/Kconfig"
source "net/bluetooth/Kconfig"
-source "net/ieee80211/Kconfig"
-
-config WIRELESS_EXT
- bool
+source "net/rxrpc/Kconfig"
config FIB_RULES
bool
+menu "Wireless"
+
+source "net/wireless/Kconfig"
+source "net/ieee80211/Kconfig"
+
+endmenu
+
endif # if NET
endmenu # Networking
diff --git a/net/Makefile b/net/Makefile
index 4854ac50631..6b74d4118c5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_IRDA) += irda/
obj-$(CONFIG_BT) += bluetooth/
obj-$(CONFIG_SUNRPC) += sunrpc/
obj-$(CONFIG_RXRPC) += rxrpc/
+obj-$(CONFIG_AF_RXRPC) += rxrpc/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_ECONET) += econet/
@@ -52,3 +53,5 @@ obj-$(CONFIG_IUCV) += iucv/
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_SYSCTL) += sysctl_net.o
endif
+
+obj-y += wireless/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d89d62f3702..5ef6a238bdb 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -118,7 +118,9 @@ static void __aarp_send_query(struct aarp_entry *a)
/* Set up the buffer */
skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
- skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(*eah));
skb->protocol = htons(ETH_P_ATALK);
skb->dev = dev;
eah = aarp_hdr(skb);
@@ -163,7 +165,9 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
/* Set up the buffer */
skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
- skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(*eah));
skb->protocol = htons(ETH_P_ATALK);
skb->dev = dev;
eah = aarp_hdr(skb);
@@ -212,7 +216,9 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
/* Set up the buffer */
skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
- skb->nh.raw = skb->h.raw = skb_put(skb, sizeof(*eah));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(*eah));
skb->protocol = htons(ETH_P_ATALK);
skb->dev = dev;
eah = aarp_hdr(skb);
@@ -539,7 +545,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
int hash;
struct aarp_entry *a;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Check for LocalTalk first */
if (dev->type == ARPHRD_LOCALTLK) {
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c8b7dc2c325..f6a92a0b7aa 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1275,7 +1275,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
skb_pull(skb, 13);
skb->dev = dev;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
stats = dev->priv;
stats->rx_packets++;
@@ -1383,10 +1383,10 @@ free_it:
* @pt - packet type
*
* Receive a packet (in skb) from device dev. This has come from the SNAP
- * decoder, and on entry skb->h.raw is the DDP header, skb->len is the DDP
- * header, skb->len is the DDP length. The physical headers have been
- * extracted. PPP should probably pass frames marked as for this layer.
- * [ie ARPHRD_ETHERTALK]
+ * decoder, and on entry skb->transport_header is the DDP header, skb->len
+ * is the DDP header, skb->len is the DDP length. The physical headers
+ * have been extracted. PPP should probably pass frames marked as for this
+ * layer. [ie ARPHRD_ETHERTALK]
*/
static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
@@ -1484,7 +1484,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
/* Expand any short form frames */
- if (skb->mac.raw[2] == 1) {
+ if (skb_mac_header(skb)[2] == 1) {
struct ddpehdr *ddp;
/* Find our address */
struct atalk_addr *ap = atalk_find_dev_addr(dev);
@@ -1510,8 +1510,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
* we write the network numbers !
*/
- ddp->deh_dnode = skb->mac.raw[0]; /* From physical header */
- ddp->deh_snode = skb->mac.raw[1]; /* From physical header */
+ ddp->deh_dnode = skb_mac_header(skb)[0]; /* From physical header */
+ ddp->deh_snode = skb_mac_header(skb)[1]; /* From physical header */
ddp->deh_dnet = ap->s_net; /* Network number */
ddp->deh_snet = ap->s_net;
@@ -1522,7 +1522,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
/* Non routable, so force a drop if we slip up later */
ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
return atalk_rcv(skb, dev, pt, orig_dev);
freeit:
@@ -1771,6 +1771,9 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
rc = sock_get_timestamp(sk, argp);
break;
+ case SIOCGSTAMPNS:
+ rc = sock_get_timestampns(sk, argp);
+ break;
/* Routing */
case SIOCADDRT:
case SIOCDELRT:
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ec4ebd3299e..0e9f00c5c89 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -173,7 +173,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
}
skb_push(skb, minheadroom);
if (brvcc->encaps == e_llc)
- memcpy(skb->data, llc_oui_pid_pad, 10);
+ skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
else
memset(skb->data, 0, 2);
#endif /* FASTER_VERSION */
@@ -375,11 +375,11 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
{
if (brvcc->filter.netmask == 0)
return 0; /* no filter in place */
- if (type == __constant_htons(ETH_P_IP) &&
+ if (type == htons(ETH_P_IP) &&
(((struct iphdr *) (skb->data))->daddr & brvcc->filter.
netmask) == brvcc->filter.prefix)
return 0;
- if (type == __constant_htons(ETH_P_ARP))
+ if (type == htons(ETH_P_ARP))
return 0;
/* TODO: we should probably filter ARPs too.. don't want to have
* them returning values that don't make sense, or is that ok?
@@ -458,7 +458,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
than should be. What else should I set? */
skb_pull(skb, plen);
- skb->mac.raw = ((char *) (skb->data)) - ETH_HLEN;
+ skb_set_mac_header(skb, -ETH_HLEN);
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_BR2684_FAST_TRANS
skb->protocol = ((u16 *) skb->data)[-1];
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8c382581608..876b77f1474 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -213,7 +213,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
ATM_SKB(skb)->vcc = vcc;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
if (!clip_vcc->encap
|| skb->len < RFC1483LLC_LEN
|| memcmp(skb->data, llc_oui, sizeof (llc_oui)))
@@ -702,7 +702,7 @@ static struct atm_dev atmarpd_dev = {
.ops = &atmarpd_dev_ops,
.type = "arpd",
.number = 999,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(atmarpd_dev.lock)
};
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 8ccee4591f6..7afd8e7754f 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -82,6 +82,9 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP: /* borrowed from IP */
error = sock_get_timestamp(sk, argp);
goto done;
+ case SIOCGSTAMPNS: /* borrowed from IP */
+ error = sock_get_timestampns(sk, argp);
+ goto done;
case ATM_SETSC:
printk(KERN_WARNING "ATM_SETSC is obsolete\n");
error = 0;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 3d804d61f65..4dc5f2b8c43 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -283,8 +283,8 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
- (long)skb->head, (long)skb->data, (long)skb->tail,
- (long)skb->end);
+ (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
+ (long)skb_end_pointer(skb));
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
lec_handle_bridge(skb, dev);
@@ -576,8 +576,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
break;
}
skb2->len = sizeof(struct atmlec_msg);
- memcpy(skb2->data, mesg,
- sizeof(struct atmlec_msg));
+ skb_copy_to_linear_data(skb2, mesg,
+ sizeof(*mesg));
atm_force_charge(priv->lecd, skb2->truesize);
sk = sk_atm(priv->lecd);
skb_queue_tail(&sk->sk_receive_queue, skb2);
@@ -630,7 +630,7 @@ static struct atm_dev lecatm_dev = {
.ops = &lecdev_ops,
.type = "lec",
.number = 999, /* dummy device number */
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(lecatm_dev.lock)
};
/*
@@ -825,7 +825,6 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
if (!hlist_empty(&priv->lec_arp_empty_ones)) {
lec_arp_check_empties(priv, vcc, skb);
}
- skb->dev = dev;
skb_pull(skb, 2); /* skip lec_id */
#ifdef CONFIG_TR
if (priv->is_trdev)
@@ -1338,7 +1337,7 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
if (skb == NULL)
return -1;
skb->len = *sizeoftlvs;
- memcpy(skb->data, *tlvs, *sizeoftlvs);
+ skb_copy_to_linear_data(skb, *tlvs, *sizeoftlvs);
retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
}
return retval;
@@ -1372,7 +1371,7 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
if (skb == NULL)
return 0;
skb->len = sizeoftlvs;
- memcpy(skb->data, tlvs, sizeoftlvs);
+ skb_copy_to_linear_data(skb, tlvs, sizeoftlvs);
retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
if (retval != 0)
printk("lec.c: lane2_associate_req() failed\n");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index cb3c004ff02..7c85aa551d5 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -504,11 +504,13 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
skb_pull(skb, ETH_HLEN); /* get rid of Eth header */
skb_push(skb, sizeof(tagged_llc_snap_hdr)); /* add LLC/SNAP header */
- memcpy(skb->data, &tagged_llc_snap_hdr, sizeof(tagged_llc_snap_hdr));
+ skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr,
+ sizeof(tagged_llc_snap_hdr));
} else {
skb_pull(skb, ETH_HLEN); /* get rid of Eth header */
skb_push(skb, sizeof(struct llc_snap_hdr)); /* add LLC/SNAP header + tag */
- memcpy(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr));
+ skb_copy_to_linear_data(skb, &llc_snap_mpoa_data,
+ sizeof(struct llc_snap_hdr));
}
atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
@@ -711,11 +713,12 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
skb_push(new_skb, eg->ctrl_info.DH_length); /* add MAC header */
- memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length);
+ skb_copy_to_linear_data(new_skb, eg->ctrl_info.DLL_header,
+ eg->ctrl_info.DH_length);
new_skb->protocol = eth_type_trans(new_skb, dev);
- new_skb->nh.raw = new_skb->data;
+ skb_reset_network_header(new_skb);
- eg->latest_ip_addr = new_skb->nh.iph->saddr;
+ eg->latest_ip_addr = ip_hdr(new_skb)->saddr;
eg->packets_rcvd++;
mpc->eg_ops->put(eg);
@@ -734,7 +737,7 @@ static struct atm_dev mpc_dev = {
.ops = &mpc_ops,
.type = "mpc",
.number = 42,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(mpc_dev.lock)
/* members not explicitly initialised will be 0 */
};
@@ -936,7 +939,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
if (skb == NULL)
return -ENOMEM;
skb_put(skb, sizeof(struct k_message));
- memcpy(skb->data, mesg, sizeof(struct k_message));
+ skb_copy_to_linear_data(skb, mesg, sizeof(*mesg));
atm_force_charge(mpc->mpoad_vcc, skb->truesize);
sk = sk_atm(mpc->mpoad_vcc);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 31d98b57e1d..d14baaf1f4c 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -256,7 +256,7 @@ static struct atm_dev sigd_dev = {
.ops = &sigd_dev_ops,
.type = "sig",
.number = 999,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(sigd_dev.lock)
};
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1c07c6a50eb..6ded95272a5 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1127,22 +1127,22 @@ static int __must_check ax25_connect(struct socket *sock,
switch (sk->sk_state) {
case TCP_SYN_SENT: /* still trying */
err = -EINPROGRESS;
- goto out;
+ goto out_release;
case TCP_ESTABLISHED: /* connection established */
sock->state = SS_CONNECTED;
- goto out;
+ goto out_release;
case TCP_CLOSE: /* connection refused */
sock->state = SS_UNCONNECTED;
err = -ECONNREFUSED;
- goto out;
+ goto out_release;
}
}
if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
err = -EISCONN; /* No reconnect on a seqpacket socket */
- goto out;
+ goto out_release;
}
sk->sk_state = TCP_CLOSE;
@@ -1159,12 +1159,12 @@ static int __must_check ax25_connect(struct socket *sock,
/* Valid number of digipeaters ? */
if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
err = -EINVAL;
- goto out;
+ goto out_release;
}
if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
err = -ENOBUFS;
- goto out;
+ goto out_release;
}
digi->ndigi = fsa->fsa_ax25.sax25_ndigis;
@@ -1194,7 +1194,7 @@ static int __must_check ax25_connect(struct socket *sock,
current->comm);
if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
kfree(digi);
- goto out;
+ goto out_release;
}
ax25_fillin_cb(ax25, ax25->ax25_dev);
@@ -1203,7 +1203,7 @@ static int __must_check ax25_connect(struct socket *sock,
if (ax25->ax25_dev == NULL) {
kfree(digi);
err = -EHOSTUNREACH;
- goto out;
+ goto out_release;
}
}
@@ -1213,7 +1213,7 @@ static int __must_check ax25_connect(struct socket *sock,
kfree(digi);
err = -EADDRINUSE; /* Already such a connection */
ax25_cb_put(ax25t);
- goto out;
+ goto out_release;
}
ax25->dest_addr = fsa->fsa_ax25.sax25_call;
@@ -1223,7 +1223,7 @@ static int __must_check ax25_connect(struct socket *sock,
if (sk->sk_type != SOCK_SEQPACKET) {
sock->state = SS_CONNECTED;
sk->sk_state = TCP_ESTABLISHED;
- goto out;
+ goto out_release;
}
/* Move to connecting socket, ax.25 lapb WAIT_UA.. */
@@ -1255,55 +1255,53 @@ static int __must_check ax25_connect(struct socket *sock,
/* Now the loop */
if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
err = -EINPROGRESS;
- goto out;
+ goto out_release;
}
if (sk->sk_state == TCP_SYN_SENT) {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
if (sk->sk_state != TCP_SYN_SENT)
break;
- set_current_state(TASK_INTERRUPTIBLE);
- release_sock(sk);
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+
+ if (err)
+ goto out_release;
}
if (sk->sk_state != TCP_ESTABLISHED) {
/* Not in ABM, not in WAIT_UA -> failed */
sock->state = SS_UNCONNECTED;
err = sock_error(sk); /* Always set at this point */
- goto out;
+ goto out_release;
}
sock->state = SS_CONNECTED;
- err=0;
-out:
+ err = 0;
+out_release:
release_sock(sk);
return err;
}
-
static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
struct sk_buff *skb;
struct sock *newsk;
+ DEFINE_WAIT(wait);
struct sock *sk;
int err = 0;
@@ -1328,30 +1326,29 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
* The read queue this time is holding sockets ready to use
* hooked into the SABM we saved
*/
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
break;
- release_sock(sk);
- current->state = TASK_INTERRUPTIBLE;
if (flags & O_NONBLOCK) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -EWOULDBLOCK;
+ err = -EWOULDBLOCK;
+ break;
}
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+
+ if (err)
+ goto out;
newsk = skb->sk;
newsk->sk_socket = newsock;
@@ -1425,7 +1422,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
struct sockaddr_ax25 sax;
struct sk_buff *skb;
ax25_digi dtmp, *dp;
- unsigned char *asmptr;
ax25_cb *ax25;
size_t size;
int lv, err, addr_len = msg->msg_namelen;
@@ -1548,13 +1544,11 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Add the PID if one is not supplied by the user in the skb */
- if (!ax25->pidincl) {
- asmptr = skb_push(skb, 1);
- *asmptr = sk->sk_protocol;
- }
+ if (!ax25->pidincl)
+ *skb_push(skb, 1) = sk->sk_protocol;
SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
@@ -1573,7 +1567,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- asmptr = skb_push(skb, 1 + ax25_addr_size(dp));
+ skb_push(skb, 1 + ax25_addr_size(dp));
SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
@@ -1581,17 +1575,17 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
/* Build an AX.25 header */
- asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr,
- &sax.sax25_call, dp,
- AX25_COMMAND, AX25_MODULUS));
+ lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
+ dp, AX25_COMMAND, AX25_MODULUS);
SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
- skb->h.raw = asmptr;
+ skb_set_transport_header(skb, lv);
- SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr);
+ SOCK_DEBUG(sk, "base=%p pos=%p\n",
+ skb->data, skb_transport_header(skb));
- *asmptr = AX25_UI;
+ *skb_transport_header(skb) = AX25_UI;
/* Datagram frames go straight out of the door as UI */
ax25_queue_xmit(skb, ax25->ax25_dev->dev);
@@ -1631,8 +1625,8 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!ax25_sk(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
- skb->h.raw = skb->data;
- copied = skb->len;
+ skb_reset_transport_header(skb);
+ copied = skb->len;
if (copied > size) {
copied = size;
@@ -1645,9 +1639,10 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
ax25_digi digi;
ax25_address src;
+ const unsigned char *mac = skb_mac_header(skb);
- ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL);
-
+ ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
+ &digi, NULL, NULL);
sax->sax25_family = AF_AX25;
/* We set this correctly, even though we may not let the
application know the digi calls further down (because it
@@ -1711,6 +1706,10 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
res = sock_get_timestamp(sk, argp);
break;
+ case SIOCGSTAMPNS:
+ res = sock_get_timestampns(sk, argp);
+ break;
+
case SIOCAX25ADDUID: /* Add a uid to the uid/call map table */
case SIOCAX25DELUID: /* Delete a uid from the uid/call map table */
case SIOCAX25GETUID: {
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 9569dd3fa46..a49773ff2b9 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -136,7 +136,7 @@ static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char p
if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
return;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
p = skb_put(skb, 2);
*p++ = cmd;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 4a6b26becad..0ddaff0df21 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -61,12 +61,14 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
skb_reserve(skbn, AX25_MAX_HEADER_LEN);
skbn->dev = ax25->ax25_dev->dev;
- skbn->h.raw = skbn->data;
- skbn->nh.raw = skbn->data;
+ skb_reset_network_header(skbn);
+ skb_reset_transport_header(skbn);
/* Copy data from the fragments */
while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo,
+ skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
}
@@ -122,8 +124,8 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
}
skb_pull(skb, 1); /* Remove PID */
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
skb->dev = ax25->ax25_dev->dev;
skb->pkt_type = PACKET_HOST;
skb->protocol = htons(ETH_P_IP);
@@ -196,7 +198,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
* Process the AX.25/LAPB frame.
*/
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
kfree_skb(skb);
@@ -233,7 +235,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
/* UI frame - bypass LAPB processing */
if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
- skb->h.raw = skb->data + 2; /* skip control and pid */
+ skb_set_transport_header(skb, 2); /* skip control and pid */
ax25_send_to_raw(&dest, skb, skb->data[1]);
@@ -246,8 +248,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
switch (skb->data[1]) {
case AX25_P_IP:
skb_pull(skb,2); /* drop PID/CTRL */
- skb->h.raw = skb->data;
- skb->nh.raw = skb->data;
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
skb->dev = dev;
skb->pkt_type = PACKET_HOST;
skb->protocol = htons(ETH_P_IP);
@@ -256,8 +258,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
case AX25_P_ARP:
skb_pull(skb,2);
- skb->h.raw = skb->data;
- skb->nh.raw = skb->data;
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
skb->dev = dev;
skb->pkt_type = PACKET_HOST;
skb->protocol = htons(ETH_P_ARP);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7f818bbcd1c..930e4918037 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -121,7 +121,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
digipeat = route->digipeat;
dev = route->dev;
ip_mode = route->ip_mode;
- };
+ }
if (dev == NULL)
dev = skb->dev;
@@ -171,7 +171,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
src_c = *(ax25_address *)(bp + 8);
skb_pull(ourskb, AX25_HEADER_LEN - 1); /* Keep PID */
- ourskb->nh.raw = ourskb->data;
+ skb_reset_network_header(ourskb);
ax25=ax25_send_frame(
ourskb,
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 223835092b7..92b517af726 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,8 +148,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
if (ka9qfrag == 1) {
skb_reserve(skbn, frontlen + 2);
- skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_set_network_header(skbn,
+ skb_network_offset(skb));
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
p = skb_push(skbn, 2);
*p++ = AX25_P_SEGMENT;
@@ -161,8 +162,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
}
} else {
skb_reserve(skbn, frontlen + 1);
- skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_set_network_header(skbn,
+ skb_network_offset(skb));
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
p = skb_push(skbn, 1);
*p = AX25_P_TEXT;
}
@@ -205,7 +207,7 @@ static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
if (skb == NULL)
return;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (ax25->modulus == AX25_MODULUS) {
frame = skb_push(skb, 1);
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index b6c577e3c91..5fe9b2a6697 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -162,7 +162,7 @@ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Assume a response - address structure for DTE */
if (ax25->modulus == AX25_MODULUS) {
@@ -205,7 +205,7 @@ void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *des
return; /* Next SABM will get DM'd */
skb_reserve(skb, dev->hard_header_len);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
ax25_digi_invert(digi, &retdigi);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c7228cfc621..d942b946ba0 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b85d1492c35..ab2db55982c 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -326,7 +326,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
return 0;
}
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
/* Verify and pull out header */
if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
@@ -364,26 +364,28 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
case BNEP_COMPRESSED_SRC_ONLY:
memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
- memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+ memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
break;
case BNEP_COMPRESSED_DST_ONLY:
- memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
- memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2);
+ memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
+ ETH_ALEN);
+ memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
+ ETH_ALEN + 2);
break;
case BNEP_GENERAL:
- memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
+ memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
+ ETH_ALEN * 2);
put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
break;
}
- memcpy(__skb_put(nskb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len);
kfree_skb(skb);
s->stats.rx_packets++;
- nskb->dev = dev;
nskb->ip_summed = CHECKSUM_NONE;
nskb->protocol = eth_type_trans(nskb, dev);
netif_rx_ni(nskb);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 3933608a929..66bef1ccee2 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -124,7 +124,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
}
if (skb && (skb->len > 0))
- memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
memcpy(skb_put(nskb, count), buf, count);
@@ -256,7 +256,7 @@ static void cmtp_process_transmit(struct cmtp_session *session)
hdr[2] = size >> 8;
}
- memcpy(skb_put(nskb, size), skb->data, size);
+ skb_copy_from_linear_data(skb, skb_put(nskb, size), size);
skb_pull(skb, size);
if (skb->len > 0) {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f3403fdb59f..63980bd6b5f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -72,11 +72,11 @@ void hci_acl_connect(struct hci_conn *conn)
inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
cp.pscan_rep_mode = ie->data.pscan_rep_mode;
cp.pscan_mode = ie->data.pscan_mode;
- cp.clock_offset = ie->data.clock_offset | __cpu_to_le16(0x8000);
+ cp.clock_offset = ie->data.clock_offset | cpu_to_le16(0x8000);
memcpy(conn->dev_class, ie->data.dev_class, 3);
}
- cp.pkt_type = __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
+ cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
cp.role_switch = 0x01;
else
@@ -107,7 +107,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
conn->state = BT_DISCONN;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
cp.reason = reason;
hci_send_cmd(conn->hdev, OGF_LINK_CTL,
OCF_DISCONNECT, sizeof(cp), &cp);
@@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
conn->state = BT_CONNECT;
conn->out = 1;
- cp.pkt_type = __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
- cp.handle = __cpu_to_le16(handle);
+ cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+ cp.handle = cpu_to_le16(handle);
hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
}
@@ -348,7 +348,7 @@ int hci_conn_auth(struct hci_conn *conn)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
struct hci_cp_auth_requested cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp);
}
return 0;
@@ -368,7 +368,7 @@ int hci_conn_encrypt(struct hci_conn *conn)
if (hci_conn_auth(conn)) {
struct hci_cp_set_conn_encrypt cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
cp.encrypt = 1;
hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
}
@@ -383,7 +383,7 @@ int hci_conn_change_link_key(struct hci_conn *conn)
if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
struct hci_cp_change_conn_link_key cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
}
return 0;
@@ -423,7 +423,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn)
if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
struct hci_cp_exit_sniff_mode cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
}
@@ -452,21 +452,21 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn)
if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
struct hci_cp_sniff_subrate cp;
- cp.handle = __cpu_to_le16(conn->handle);
- cp.max_latency = __constant_cpu_to_le16(0);
- cp.min_remote_timeout = __constant_cpu_to_le16(0);
- cp.min_local_timeout = __constant_cpu_to_le16(0);
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.max_latency = cpu_to_le16(0);
+ cp.min_remote_timeout = cpu_to_le16(0);
+ cp.min_local_timeout = cpu_to_le16(0);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
}
if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
struct hci_cp_sniff_mode cp;
- cp.handle = __cpu_to_le16(conn->handle);
- cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval);
- cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval);
- cp.attempt = __constant_cpu_to_le16(4);
- cp.timeout = __constant_cpu_to_le16(1);
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.max_interval = cpu_to_le16(hdev->sniff_max_interval);
+ cp.min_interval = cpu_to_le16(hdev->sniff_min_interval);
+ cp.attempt = cpu_to_le16(4);
+ cp.timeout = cpu_to_le16(1);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_SNIFF_MODE, sizeof(cp), &cp);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4917919d86a..aa4b56a8c3e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,7 +149,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
default:
err = -ETIMEDOUT;
break;
- };
+ }
hdev->req_status = hdev->req_result = 0;
@@ -216,10 +216,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
/* Host buffer size */
{
struct hci_cp_host_buffer_size cp;
- cp.acl_mtu = __cpu_to_le16(HCI_MAX_ACL_SIZE);
+ cp.acl_mtu = cpu_to_le16(HCI_MAX_ACL_SIZE);
cp.sco_mtu = HCI_MAX_SCO_SIZE;
- cp.acl_max_pkt = __cpu_to_le16(0xffff);
- cp.sco_max_pkt = __cpu_to_le16(0xffff);
+ cp.acl_max_pkt = cpu_to_le16(0xffff);
+ cp.sco_max_pkt = cpu_to_le16(0xffff);
hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp);
}
#endif
@@ -240,11 +240,11 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
}
/* Page timeout ~20 secs */
- param = __cpu_to_le16(0x8000);
+ param = cpu_to_le16(0x8000);
hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param);
/* Connection accept timeout ~20 secs */
- param = __cpu_to_le16(0x7d00);
+ param = cpu_to_le16(0x7d00);
hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param);
}
@@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
}
hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
- hdr->opcode = __cpu_to_le16(hci_opcode_pack(ogf, ocf));
+ hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf));
hdr->plen = plen;
if (plen)
@@ -1060,7 +1060,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
hdr = (void *) hdev->sent_cmd->data;
- if (hdr->opcode != __cpu_to_le16(hci_opcode_pack(ogf, ocf)))
+ if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf)))
return NULL;
BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf);
@@ -1074,11 +1074,11 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
struct hci_acl_hdr *hdr;
int len = skb->len;
- hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE);
- hdr->handle = __cpu_to_le16(hci_handle_pack(handle, flags));
- hdr->dlen = __cpu_to_le16(len);
-
- skb->h.raw = (void *) hdr;
+ skb_push(skb, HCI_ACL_HDR_SIZE);
+ skb_reset_transport_header(skb);
+ hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
+ hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
+ hdr->dlen = cpu_to_le16(len);
}
int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
@@ -1140,11 +1140,12 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
return -EINVAL;
}
- hdr.handle = __cpu_to_le16(conn->handle);
+ hdr.handle = cpu_to_le16(conn->handle);
hdr.dlen = skb->len;
- skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE);
- memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
+ skb_push(skb, HCI_SCO_HDR_SIZE);
+ skb_reset_transport_header(skb);
+ memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
skb->dev = (void *) hdev;
bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
@@ -1387,7 +1388,7 @@ static void hci_rx_task(unsigned long arg)
case HCI_SCODATA_PKT:
kfree_skb(skb);
continue;
- };
+ }
}
/* Process frame */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 936d3fc479c..447ba713122 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -783,7 +783,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
if (conn->type == ACL_LINK && hdev->link_policy) {
struct hci_cp_write_link_policy cp;
cp.handle = ev->handle;
- cp.policy = __cpu_to_le16(hdev->link_policy);
+ cp.policy = cpu_to_le16(hdev->link_policy);
hci_send_cmd(hdev, OGF_LINK_POLICY,
OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
}
@@ -793,8 +793,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
struct hci_cp_change_conn_ptype cp;
cp.handle = ev->handle;
cp.pkt_type = (conn->type == ACL_LINK) ?
- __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
- __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+ cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
+ cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
hci_send_cmd(hdev, OGF_LINK_CTL,
OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
@@ -970,7 +970,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
if (!ev->status) {
struct hci_cp_set_conn_encrypt cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = cpu_to_le16(conn->handle);
cp.encrypt = 1;
hci_send_cmd(conn->hdev, OGF_LINK_CTL,
OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 71f5cfbbebb..832b5f44be5 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -375,7 +375,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
hci_sock_cmsg(sk, msg, skb);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e83ee82440d..a5867879b61 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -459,8 +459,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
sk->sk_state = BT_DISCONN;
l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
- req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
l2cap_send_cmd(conn, l2cap_get_ident(conn),
L2CAP_DISCONN_REQ, sizeof(req), &req);
} else {
@@ -652,7 +652,7 @@ static int l2cap_do_connect(struct sock *sk)
if (sk->sk_type == SOCK_SEQPACKET) {
struct l2cap_conn_req req;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
req.psm = l2cap_pi(sk)->psm;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_REQ, sizeof(req), &req);
@@ -868,8 +868,8 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
- lh->cid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- lh->len = __cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+ lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
if (sk->sk_type == SOCK_DGRAM)
put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
@@ -1096,7 +1096,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
} else if (sk->sk_state == BT_CONNECT) {
struct l2cap_conn_req req;
l2cap_pi(sk)->ident = l2cap_get_ident(conn);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
req.psm = l2cap_pi(sk)->psm;
l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req);
}
@@ -1192,13 +1192,13 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
return NULL;
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
- lh->len = __cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
- lh->cid = __cpu_to_le16(0x0001);
+ lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
+ lh->cid = cpu_to_le16(0x0001);
cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
cmd->code = code;
cmd->ident = ident;
- cmd->len = __cpu_to_le16(dlen);
+ cmd->len = cpu_to_le16(dlen);
if (dlen) {
count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
@@ -1316,11 +1316,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
break;
case 2:
- *((u16 *) opt->val) = __cpu_to_le16(val);
+ *((u16 *) opt->val) = cpu_to_le16(val);
break;
case 4:
- *((u32 *) opt->val) = __cpu_to_le32(val);
+ *((u32 *) opt->val) = cpu_to_le32(val);
break;
default:
@@ -1346,8 +1346,8 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
// l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
- req->dcid = __cpu_to_le16(pi->dcid);
- req->flags = __cpu_to_le16(0);
+ req->dcid = cpu_to_le16(pi->dcid);
+ req->flags = cpu_to_le16(0);
return ptr - data;
}
@@ -1383,9 +1383,9 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result)
else
flags = 0x0001;
- rsp->scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- rsp->result = __cpu_to_le16(result ? *result : 0);
- rsp->flags = __cpu_to_le16(flags);
+ rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp->result = cpu_to_le16(result ? *result : 0);
+ rsp->flags = cpu_to_le16(flags);
return ptr - data;
}
@@ -1470,10 +1470,10 @@ response:
bh_unlock_sock(parent);
sendresp:
- rsp.scid = __cpu_to_le16(scid);
- rsp.dcid = __cpu_to_le16(dcid);
- rsp.result = __cpu_to_le16(result);
- rsp.status = __cpu_to_le16(status);
+ rsp.scid = cpu_to_le16(scid);
+ rsp.dcid = cpu_to_le16(dcid);
+ rsp.result = cpu_to_le16(result);
+ rsp.status = cpu_to_le16(status);
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
return 0;
}
@@ -1613,8 +1613,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
l2cap_sock_set_timer(sk, HZ * 5);
{
struct l2cap_disconn_req req;
- req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+ req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
l2cap_send_cmd(conn, l2cap_get_ident(conn),
L2CAP_DISCONN_REQ, sizeof(req), &req);
}
@@ -1652,8 +1652,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
return 0;
- rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
- rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+ rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1696,8 +1696,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
BT_DBG("type 0x%4.4x", type);
- rsp.type = __cpu_to_le16(type);
- rsp.result = __cpu_to_le16(L2CAP_IR_NOTSUPP);
+ rsp.type = cpu_to_le16(type);
+ rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp);
return 0;
@@ -1794,7 +1794,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
BT_DBG("error %d", err);
/* FIXME: Map err to a valid reason */
- rej.reason = __cpu_to_le16(0);
+ rej.reason = cpu_to_le16(0);
l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
}
@@ -1993,10 +1993,10 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
result = L2CAP_CR_SEC_BLOCK;
}
- rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
- rsp.result = __cpu_to_le16(result);
- rsp.status = __cpu_to_le16(0);
+ rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+ rsp.result = cpu_to_le16(result);
+ rsp.status = cpu_to_le16(0);
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_RSP, sizeof(rsp), &rsp);
@@ -2041,10 +2041,10 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
result = L2CAP_CR_SEC_BLOCK;
}
- rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
- rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
- rsp.result = __cpu_to_le16(result);
- rsp.status = __cpu_to_le16(0);
+ rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
+ rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+ rsp.result = cpu_to_le16(result);
+ rsp.status = cpu_to_le16(0);
l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
L2CAP_CONN_RSP, sizeof(rsp), &rsp);
@@ -2107,7 +2107,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC)))
goto drop;
- memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
conn->rx_len = len - skb->len;
} else {
BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2128,7 +2129,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
goto drop;
}
- memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+ skb->len);
conn->rx_len -= skb->len;
if (!conn->rx_len) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 94f45736056..fe7df90eb70 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1567,7 +1567,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
/* Trim FCS */
skb->len--; skb->tail--;
- fcs = *(u8 *) skb->tail;
+ fcs = *(u8 *)skb_tail_pointer(skb);
if (__check_fcs(skb->data, type, fcs)) {
BT_ERR("bad checksum in packet");
@@ -1851,18 +1851,18 @@ static void rfcomm_worker(void)
BT_DBG("");
while (!atomic_read(&terminate)) {
+ set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
/* No pending events. Let's sleep.
* Incoming connections and data will wake us up. */
- set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
+ set_current_state(TASK_RUNNING);
/* Process stuff */
clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
rfcomm_process_sessions();
}
- set_current_state(TASK_RUNNING);
return;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ae439144095..3f5163e725e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -393,7 +393,7 @@ static void sco_sock_close(struct sock *sk)
default:
sock_set_flag(sk, SOCK_ZAPPED);
break;
- };
+ }
release_sock(sk);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 2994387999a..848b8fa8bed 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -37,7 +37,9 @@ static int __init br_init(void)
return -EADDRINUSE;
}
- br_fdb_init();
+ err = br_fdb_init();
+ if (err)
+ goto err_out1;
err = br_netfilter_init();
if (err)
@@ -47,7 +49,10 @@ static int __init br_init(void)
if (err)
goto err_out2;
- br_netlink_init();
+ err = br_netlink_init();
+ if (err)
+ goto err_out3;
+
brioctl_set(br_ioctl_deviceless_stub);
br_handle_frame_hook = br_handle_frame;
@@ -55,7 +60,8 @@ static int __init br_init(void)
br_fdb_put_hook = br_fdb_put;
return 0;
-
+err_out3:
+ unregister_netdevice_notifier(&br_device_notifier);
err_out2:
br_netfilter_fini();
err_out1:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 905a39c33a1..5e1892d8d87 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -37,7 +37,7 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br->statistics.tx_packets++;
br->statistics.tx_bytes += skb->len;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
if (dest[0] & 1)
@@ -83,27 +83,21 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-/* Allow setting mac address of pseudo-bridge to be same as
- * any of the bound interfaces
- */
+/* Allow setting mac address to any valid ethernet address. */
static int br_set_mac_address(struct net_device *dev, void *p)
{
struct net_bridge *br = netdev_priv(dev);
struct sockaddr *addr = p;
- struct net_bridge_port *port;
- int err = -EADDRNOTAVAIL;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EINVAL;
spin_lock_bh(&br->lock);
- list_for_each_entry(port, &br->port_list, list) {
- if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
- br_stp_change_bridge_id(br, addr->sa_data);
- err = 0;
- break;
- }
- }
+ memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+ br_stp_change_bridge_id(br, addr->sa_data);
spin_unlock_bh(&br->lock);
- return err;
+ return 0;
}
static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 8d566c13cc7..91b017016d5 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,19 +20,28 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/jhash.h>
+#include <linux/random.h>
#include <asm/atomic.h>
+#include <asm/unaligned.h>
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr);
-void __init br_fdb_init(void)
+static u32 fdb_salt __read_mostly;
+
+int __init br_fdb_init(void)
{
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
sizeof(struct net_bridge_fdb_entry),
0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!br_fdb_cache)
+ return -ENOMEM;
+
+ get_random_bytes(&fdb_salt, sizeof(fdb_salt));
+ return 0;
}
void __exit br_fdb_fini(void)
@@ -44,24 +53,26 @@ void __exit br_fdb_fini(void)
/* if topology_changing then use forward_delay (default 15 sec)
* otherwise keep longer (default 5 minutes)
*/
-static __inline__ unsigned long hold_time(const struct net_bridge *br)
+static inline unsigned long hold_time(const struct net_bridge *br)
{
return br->topology_change ? br->forward_delay : br->ageing_time;
}
-static __inline__ int has_expired(const struct net_bridge *br,
+static inline int has_expired(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
return !fdb->is_static
&& time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
}
-static __inline__ int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac)
{
- return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1);
+ /* use 1 byte of OUI cnd 3 bytes of NIC */
+ u32 key = get_unaligned((u32 *)(mac + 2));
+ return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
}
-static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
+static inline void fdb_delete(struct net_bridge_fdb_entry *f)
{
hlist_del_rcu(&f->hlist);
br_fdb_put(f);
@@ -128,7 +139,26 @@ void br_fdb_cleanup(unsigned long _data)
mod_timer(&br->gc_timer, jiffies + HZ/10);
}
+/* Completely flush all dynamic entries in forwarding database.*/
+void br_fdb_flush(struct net_bridge *br)
+{
+ int i;
+ spin_lock_bh(&br->hash_lock);
+ for (i = 0; i < BR_HASH_SIZE; i++) {
+ struct net_bridge_fdb_entry *f;
+ struct hlist_node *h, *n;
+ hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+ if (!f->is_static)
+ fdb_delete(f);
+ }
+ }
+ spin_unlock_bh(&br->hash_lock);
+}
+
+/* Flush all entries refering to a specific port.
+ * if do_all is set also flush static entries
+ */
void br_fdb_delete_by_port(struct net_bridge *br,
const struct net_bridge_port *p,
int do_all)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 3e45c1a1aa9..ada7f495445 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -71,7 +71,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
indev = skb->dev;
skb->dev = to->dev;
- skb->ip_summed = CHECKSUM_NONE;
+ skb_forward_csum(skb);
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
br_forward_finish);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3a2e29be40..849deaf1410 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -152,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p)
br_stp_disable_port(p);
spin_unlock_bh(&br->lock);
+ br_ifinfo_notify(RTM_DELLINK, p);
+
br_fdb_delete_by_port(br, p, 1);
list_del_rcu(&p->list);
@@ -203,7 +205,7 @@ static struct net_device *new_bridge_dev(const char *name)
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features;
- br->stp_enabled = 0;
+ br->stp_enabled = BR_NO_STP;
br->designated_root = br->bridge_id;
br->root_path_cost = 0;
br->root_port = 0;
@@ -434,6 +436,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
dev_set_mtu(br->dev, br_min_mtu(br));
kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -471,11 +475,9 @@ void __exit br_cleanup_bridges(void)
struct net_device *dev, *nxt;
rtnl_lock();
- for (dev = dev_base; dev; dev = nxt) {
- nxt = dev->next;
+ for_each_netdev_safe(dev, nxt)
if (dev->priv_flags & IFF_EBRIDGE)
del_br(dev->priv);
- }
rtnl_unlock();
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 35b94f9a1ac..420bbb9955e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -112,46 +112,59 @@ static int br_handle_local_finish(struct sk_buff *skb)
*/
static inline int is_link_local(const unsigned char *dest)
{
- return memcmp(dest, br_group_address, 5) == 0 && (dest[5] & 0xf0) == 0;
+ const u16 *a = (const u16 *) dest;
+ static const u16 *const b = (const u16 *const ) br_group_address;
+ static const u16 m = __constant_cpu_to_be16(0xfff0);
+
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
}
/*
* Called via br_handle_frame_hook.
- * Return 0 if *pskb should be processed furthur
- * 1 if *pskb is handled
+ * Return NULL if skb is handled
* note: already called with rcu_read_lock (preempt_disabled)
*/
-int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
+struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
- goto err;
+ goto drop;
if (unlikely(is_link_local(dest))) {
- skb->pkt_type = PACKET_HOST;
- return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
- NULL, br_handle_local_finish) != 0;
+ /* Pause frames shouldn't be passed up by driver anyway */
+ if (skb->protocol == htons(ETH_P_PAUSE))
+ goto drop;
+
+ /* Process STP BPDU's through normal netif_receive_skb() path */
+ if (p->br->stp_enabled != BR_NO_STP) {
+ if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+ NULL, br_handle_local_finish))
+ return NULL;
+ else
+ return skb;
+ }
}
- if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
+ switch (p->state) {
+ case BR_STATE_FORWARDING:
+
if (br_should_route_hook) {
- if (br_should_route_hook(pskb))
- return 0;
- skb = *pskb;
+ if (br_should_route_hook(&skb))
+ return skb;
dest = eth_hdr(skb)->h_dest;
}
-
+ /* fall through */
+ case BR_STATE_LEARNING:
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish);
- return 1;
+ break;
+ default:
+drop:
+ kfree_skb(skb);
}
-
-err:
- kfree_skb(skb);
- return 1;
+ return NULL;
}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 147015fe5c7..bb15e9e259b 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -27,7 +27,9 @@ static int get_bridge_ifindices(int *indices, int num)
struct net_device *dev;
int i = 0;
- for (dev = dev_base; dev && i < num; dev = dev->next) {
+ for_each_netdev(dev) {
+ if (i >= num)
+ break;
if (dev->priv_flags & IFF_EBRIDGE)
indices[i++] = dev->ifindex;
}
@@ -137,7 +139,8 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
b.topology_change = br->topology_change;
b.topology_change_detected = br->topology_change_detected;
b.root_port = br->root_port;
- b.stp_enabled = br->stp_enabled;
+
+ b.stp_enabled = (br->stp_enabled != BR_NO_STP);
b.ageing_time = jiffies_to_clock_t(br->ageing_time);
b.hello_timer_value = br_timer_value(&br->hello_timer);
b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -251,7 +254,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- br->stp_enabled = args[1]?1:0;
+ br_stp_set_enabled(br, args[1]);
return 0;
case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5439a3c46c3..fa779874b9d 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -29,6 +29,8 @@
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
@@ -48,8 +50,8 @@
#define skb_origaddr(skb) (((struct bridge_skb_cb *) \
(skb->nf_bridge->data))->daddr.ipv4)
-#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr)
-#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr)
+#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr)
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
@@ -57,8 +59,10 @@ static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly = 1;
+static int brnf_filter_pppoe_tagged __read_mostly = 1;
#else
#define brnf_filter_vlan_tagged 1
+#define brnf_filter_pppoe_tagged 1
#endif
static inline __be16 vlan_proto(const struct sk_buff *skb)
@@ -81,6 +85,22 @@ static inline __be16 vlan_proto(const struct sk_buff *skb)
vlan_proto(skb) == htons(ETH_P_ARP) && \
brnf_filter_vlan_tagged)
+static inline __be16 pppoe_proto(const struct sk_buff *skb)
+{
+ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+ sizeof(struct pppoe_hdr)));
+}
+
+#define IS_PPPOE_IP(skb) \
+ (skb->protocol == htons(ETH_P_PPP_SES) && \
+ pppoe_proto(skb) == htons(PPP_IP) && \
+ brnf_filter_pppoe_tagged)
+
+#define IS_PPPOE_IPV6(skb) \
+ (skb->protocol == htons(ETH_P_PPP_SES) && \
+ pppoe_proto(skb) == htons(PPP_IPV6) && \
+ brnf_filter_pppoe_tagged)
+
/* We need these fake structures to make netfilter happy --
* lots of places assume that skb->dst != NULL, which isn't
* all that unreasonable.
@@ -122,14 +142,36 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
return skb->nf_bridge;
}
-static inline void nf_bridge_save_header(struct sk_buff *skb)
+static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
+{
+ unsigned int len = nf_bridge_encap_header_len(skb);
+
+ skb_push(skb, len);
+ skb->network_header -= len;
+}
+
+static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
{
- int header_size = ETH_HLEN;
+ unsigned int len = nf_bridge_encap_header_len(skb);
- if (skb->protocol == htons(ETH_P_8021Q))
- header_size += VLAN_HLEN;
+ skb_pull(skb, len);
+ skb->network_header += len;
+}
+
+static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
+{
+ unsigned int len = nf_bridge_encap_header_len(skb);
+
+ skb_pull_rcsum(skb, len);
+ skb->network_header += len;
+}
- memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
+static inline void nf_bridge_save_header(struct sk_buff *skb)
+{
+ int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+
+ skb_copy_from_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
}
/*
@@ -139,19 +181,15 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
int nf_bridge_copy_header(struct sk_buff *skb)
{
int err;
- int header_size = ETH_HLEN;
-
- if (skb->protocol == htons(ETH_P_8021Q))
- header_size += VLAN_HLEN;
+ int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
err = skb_cow(skb, header_size);
if (err)
return err;
- memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
-
- if (skb->protocol == htons(ETH_P_8021Q))
- __skb_push(skb, VLAN_HLEN);
+ skb_copy_to_linear_data_offset(skb, -header_size,
+ skb->nf_bridge->data, header_size);
+ __skb_push(skb, nf_bridge_encap_header_len(skb));
return 0;
}
@@ -172,10 +210,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
dst_hold(skb->dst);
skb->dev = nf_bridge->physindev;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -253,10 +288,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
if (!skb->dev)
kfree_skb(skb);
else {
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header(skb);
skb->dst->output(skb);
}
return 0;
@@ -265,7 +297,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
int err;
@@ -322,11 +354,7 @@ bridged_dnat:
* bridged frame */
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
skb->dev = nf_bridge->physindev;
- if (skb->protocol ==
- htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
skb, skb->dev, NULL,
br_nf_pre_routing_finish_bridge,
@@ -342,10 +370,7 @@ bridged_dnat:
}
skb->dev = nf_bridge->physindev;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -372,9 +397,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
static int check_hbh_len(struct sk_buff *skb)
{
- unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
+ unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
u32 pkt_len;
- int off = raw - skb->nh.raw;
+ const unsigned char *nh = skb_network_header(skb);
+ int off = raw - nh;
int len = (raw[1] + 1) << 3;
if ((raw + len) - skb->data > skb_headlen(skb))
@@ -384,9 +410,9 @@ static int check_hbh_len(struct sk_buff *skb)
len -= 2;
while (len > 0) {
- int optlen = skb->nh.raw[off + 1] + 2;
+ int optlen = nh[off + 1] + 2;
- switch (skb->nh.raw[off]) {
+ switch (nh[off]) {
case IPV6_TLV_PAD0:
optlen = 1;
break;
@@ -395,17 +421,18 @@ static int check_hbh_len(struct sk_buff *skb)
break;
case IPV6_TLV_JUMBO:
- if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2)
+ if (nh[off + 1] != 4 || (off & 3) != 2)
goto bad;
- pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2));
+ pkt_len = ntohl(*(__be32 *) (nh + off + 2));
if (pkt_len <= IPV6_MAXPLEN ||
- skb->nh.ipv6h->payload_len)
+ ipv6_hdr(skb)->payload_len)
goto bad;
if (pkt_len > skb->len - sizeof(struct ipv6hdr))
goto bad;
if (pskb_trim_rcsum(skb,
pkt_len + sizeof(struct ipv6hdr)))
goto bad;
+ nh = skb_network_header(skb);
break;
default:
if (optlen > len)
@@ -439,7 +466,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto inhdr_error;
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
if (hdr->version != 6)
goto inhdr_error;
@@ -485,18 +512,15 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
__u32 len;
struct sk_buff *skb = *pskb;
- if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb)) {
+ if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
+ IS_PPPOE_IPV6(skb)) {
#ifdef CONFIG_SYSCTL
if (!brnf_call_ip6tables)
return NF_ACCEPT;
#endif
if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
goto out;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull_rcsum(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header_rcsum(skb);
return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
}
#ifdef CONFIG_SYSCTL
@@ -504,28 +528,25 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
return NF_ACCEPT;
#endif
- if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb))
+ if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
+ !IS_PPPOE_IP(skb))
return NF_ACCEPT;
if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
goto out;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull_rcsum(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header_rcsum(skb);
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
if (!pskb_may_pull(skb, 4 * iph->ihl))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
goto inhdr_error;
@@ -591,10 +612,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
} else {
in = *((struct net_device **)(skb->cb));
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
skb->dev, br_forward_finish, 1);
return 0;
@@ -622,15 +640,13 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
if (!parent)
return NF_DROP;
- if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+ if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+ IS_PPPOE_IP(skb))
pf = PF_INET;
else
pf = PF_INET6;
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(*pskb, VLAN_HLEN);
- (*pskb)->nh.raw += VLAN_HLEN;
- }
+ nf_bridge_pull_encap_header(*pskb);
nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -664,15 +680,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
if (skb->protocol != htons(ETH_P_ARP)) {
if (!IS_VLAN_ARP(skb))
return NF_ACCEPT;
- skb_pull(*pskb, VLAN_HLEN);
- (*pskb)->nh.raw += VLAN_HLEN;
+ nf_bridge_pull_encap_header(*pskb);
}
- if (skb->nh.arph->ar_pln != 4) {
- if (IS_VLAN_ARP(skb)) {
- skb_push(*pskb, VLAN_HLEN);
- (*pskb)->nh.raw -= VLAN_HLEN;
- }
+ if (arp_hdr(skb)->ar_pln != 4) {
+ if (IS_VLAN_ARP(skb))
+ nf_bridge_push_encap_header(*pskb);
return NF_ACCEPT;
}
*d = (struct net_device *)in;
@@ -719,10 +732,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_push(skb, VLAN_HLEN);
- skb->nh.raw -= VLAN_HLEN;
- }
+ nf_bridge_push_encap_header(skb);
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
br_forward_finish);
@@ -753,7 +763,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
#ifdef CONFIG_NETFILTER_DEBUG
/* Be very paranoid. This probably won't happen anymore, but let's
* keep the check just to be sure... */
- if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+ if (skb_mac_header(skb) < skb->head ||
+ skb_mac_header(skb) + ETH_HLEN > skb->data) {
printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
"bad mac.raw pointer.\n");
goto print_error;
@@ -766,7 +777,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
if (!realoutdev)
return NF_DROP;
- if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+ if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+ IS_PPPOE_IP(skb))
pf = PF_INET;
else
pf = PF_INET6;
@@ -785,11 +797,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
nf_bridge->mask |= BRNF_PKT_TYPE;
}
- if (skb->protocol == htons(ETH_P_8021Q)) {
- skb_pull(skb, VLAN_HLEN);
- skb->nh.raw += VLAN_HLEN;
- }
-
+ nf_bridge_pull_encap_header(skb);
nf_bridge_save_header(skb);
#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
@@ -808,7 +816,7 @@ print_error:
if (realoutdev)
printk("[%s]", realoutdev->name);
}
- printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
+ printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
skb->data);
dump_stack();
return NF_ACCEPT;
@@ -925,6 +933,14 @@ static ctl_table brnf_table[] = {
.mode = 0644,
.proc_handler = &brnf_sysctl_call_tables,
},
+ {
+ .ctl_name = NET_BRIDGE_NF_FILTER_PPPOE_TAGGED,
+ .procname = "bridge-nf-filter-pppoe-tagged",
+ .data = &brnf_filter_pppoe_tagged,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &brnf_sysctl_call_tables,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 7d68b24b565..0fcf6f07306 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -11,8 +11,7 @@
*/
#include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
#include "br_private.h"
static inline size_t br_nlmsg_size(void)
@@ -110,8 +109,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net_device *dev;
int idx;
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next) {
+ idx = 0;
+ for_each_netdev(dev) {
/* not a bridge port */
if (dev->br_port == NULL || idx < cb->args[0])
goto skip;
@@ -123,7 +122,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
skip:
++idx;
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
@@ -166,7 +164,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -EINVAL;
/* if kernel STP is running, don't allow changes */
- if (p->br->stp_enabled)
+ if (p->br->stp_enabled == BR_KERNEL_STP)
return -EBUSY;
if (!netif_running(dev) ||
@@ -179,18 +177,19 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
-static struct rtnetlink_link bridge_rtnetlink_table[RTM_NR_MSGTYPES] = {
- [RTM_GETLINK - RTM_BASE] = { .dumpit = br_dump_ifinfo, },
- [RTM_SETLINK - RTM_BASE] = { .doit = br_rtm_setlink, },
-};
-
-void __init br_netlink_init(void)
+int __init br_netlink_init(void)
{
- rtnetlink_links[PF_BRIDGE] = bridge_rtnetlink_table;
+ if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo))
+ return -ENOBUFS;
+
+ /* Only the first call to __rtnl_register can fail */
+ __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
+
+ return 0;
}
void __exit br_netlink_fini(void)
{
- rtnetlink_links[PF_BRIDGE] = NULL;
+ rtnl_unregister_all(PF_BRIDGE);
}
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 37357ed2149..c8451d3a070 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -50,7 +50,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
case NETDEV_CHANGEADDR:
spin_lock_bh(&br->lock);
br_fdb_changeaddr(p, dev->dev_addr);
- br_ifinfo_notify(RTM_NEWLINK, p);
br_stp_recalculate_bridge_id(br);
spin_unlock_bh(&br->lock);
break;
@@ -74,10 +73,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
case NETDEV_UP:
- spin_lock_bh(&br->lock);
- if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP))
+ if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
+ spin_lock_bh(&br->lock);
br_stp_enable_port(p);
- spin_unlock_bh(&br->lock);
+ spin_unlock_bh(&br->lock);
+ }
break;
case NETDEV_UNREGISTER:
@@ -85,5 +85,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
}
+ /* Events that may cause spanning tree to refresh */
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+ event == NETDEV_CHANGE || event == NETDEV_DOWN)
+ br_ifinfo_notify(RTM_NEWLINK, p);
+
return NOTIFY_DONE;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc3f1c99261..21bf3a9a03f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -26,7 +26,10 @@
#define BR_PORT_BITS 10
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
-#define BR_VERSION "2.2"
+#define BR_VERSION "2.3"
+
+/* Path to usermode spanning tree program */
+#define BR_STP_PROG "/sbin/bridge-stp"
typedef struct bridge_id bridge_id;
typedef struct mac_addr mac_addr;
@@ -107,7 +110,13 @@ struct net_bridge
u8 group_addr[ETH_ALEN];
u16 root_port;
- unsigned char stp_enabled;
+
+ enum {
+ BR_NO_STP, /* no spanning tree */
+ BR_KERNEL_STP, /* old STP in kernel */
+ BR_USER_STP, /* new RSTP in userspace */
+ } stp_enabled;
+
unsigned char topology_change;
unsigned char topology_change_detected;
@@ -127,14 +136,14 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
return !memcmp(&br->bridge_id, &br->designated_root, 8);
}
-
/* br_device.c */
extern void br_dev_setup(struct net_device *dev);
extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
/* br_fdb.c */
-extern void br_fdb_init(void);
+extern int br_fdb_init(void);
extern void br_fdb_fini(void);
+extern void br_fdb_flush(struct net_bridge *br);
extern void br_fdb_changeaddr(struct net_bridge_port *p,
const unsigned char *newaddr);
extern void br_fdb_cleanup(unsigned long arg);
@@ -182,7 +191,8 @@ extern void br_features_recompute(struct net_bridge *br);
/* br_input.c */
extern int br_handle_frame_finish(struct sk_buff *skb);
-extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb);
+extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
+ struct sk_buff *skb);
/* br_ioctl.c */
extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -207,6 +217,7 @@ extern void br_become_designated_port(struct net_bridge_port *p);
/* br_stp_if.c */
extern void br_stp_enable_bridge(struct net_bridge *br);
extern void br_stp_disable_bridge(struct net_bridge *br);
+extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
extern void br_stp_enable_port(struct net_bridge_port *p);
extern void br_stp_disable_port(struct net_bridge_port *p);
extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
@@ -235,7 +246,7 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
/* br_netlink.c */
-extern void br_netlink_init(void);
+extern int br_netlink_init(void);
extern void br_netlink_fini(void);
extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f9ff4d57b0d..ebb0861e9bd 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -370,11 +370,11 @@ static void br_make_blocking(struct net_bridge_port *p)
static void br_make_forwarding(struct net_bridge_port *p)
{
if (p->state == BR_STATE_BLOCKING) {
- if (p->br->stp_enabled) {
+ if (p->br->stp_enabled == BR_KERNEL_STP)
p->state = BR_STATE_LISTENING;
- } else {
+ else
p->state = BR_STATE_LEARNING;
- }
+
br_log_state(p);
mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); }
}
@@ -384,6 +384,10 @@ void br_port_state_selection(struct net_bridge *br)
{
struct net_bridge_port *p;
+ /* Don't change port states if userspace is handling STP */
+ if (br->stp_enabled == BR_USER_STP)
+ return;
+
list_for_each_entry(p, &br->port_list, list) {
if (p->state != BR_STATE_DISABLED) {
if (p->port_no == br->root_port) {
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b9fb0dc4ab1..60112bce669 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -33,9 +33,6 @@ static void br_send_bpdu(struct net_bridge_port *p,
{
struct sk_buff *skb;
- if (!p->br->stp_enabled)
- return;
-
skb = dev_alloc_skb(length+LLC_RESERVE);
if (!skb)
return;
@@ -75,6 +72,9 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
{
unsigned char buf[35];
+ if (p->br->stp_enabled != BR_KERNEL_STP)
+ return;
+
buf[0] = 0;
buf[1] = 0;
buf[2] = 0;
@@ -117,6 +117,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
{
unsigned char buf[4];
+ if (p->br->stp_enabled != BR_KERNEL_STP)
+ return;
+
buf[0] = 0;
buf[1] = 0;
buf[2] = 0;
@@ -157,9 +160,13 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
br = p->br;
spin_lock(&br->lock);
- if (p->state == BR_STATE_DISABLED
- || !br->stp_enabled
- || !(br->dev->flags & IFF_UP))
+ if (br->stp_enabled != BR_KERNEL_STP)
+ goto out;
+
+ if (!(br->dev->flags & IFF_UP))
+ goto out;
+
+ if (p->state == BR_STATE_DISABLED)
goto out;
if (compare_ether_addr(dest, br->group_addr) != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a285897a2fb..3e246b37020 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -87,7 +87,6 @@ void br_stp_disable_bridge(struct net_bridge *br)
void br_stp_enable_port(struct net_bridge_port *p)
{
br_init_port(p);
- br_ifinfo_notify(RTM_NEWLINK, p);
br_port_state_selection(p->br);
}
@@ -101,8 +100,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
br->dev->name, p->port_no, p->dev->name, "disabled");
- br_ifinfo_notify(RTM_DELLINK, p);
-
wasroot = br_is_root_bridge(br);
br_become_designated_port(p);
p->state = BR_STATE_DISABLED;
@@ -123,6 +120,62 @@ void br_stp_disable_port(struct net_bridge_port *p)
br_become_root_bridge(br);
}
+static void br_stp_start(struct net_bridge *br)
+{
+ int r;
+ char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+ char *envp[] = { NULL };
+
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+ if (r == 0) {
+ br->stp_enabled = BR_USER_STP;
+ printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+ } else {
+ br->stp_enabled = BR_KERNEL_STP;
+ printk(KERN_INFO "%s: starting userspace STP failed, "
+ "staring kernel STP\n", br->dev->name);
+
+ /* To start timers on any ports left in blocking */
+ spin_lock_bh(&br->lock);
+ br_port_state_selection(br);
+ spin_unlock_bh(&br->lock);
+ }
+}
+
+static void br_stp_stop(struct net_bridge *br)
+{
+ int r;
+ char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
+ char *envp[] = { NULL };
+
+ if (br->stp_enabled == BR_USER_STP) {
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+ printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
+ br->dev->name, r);
+
+
+ /* To start timers on any ports left in blocking */
+ spin_lock_bh(&br->lock);
+ br_port_state_selection(br);
+ spin_unlock_bh(&br->lock);
+ }
+
+ br->stp_enabled = BR_NO_STP;
+}
+
+void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
+{
+ ASSERT_RTNL();
+
+ if (val) {
+ if (br->stp_enabled == BR_NO_STP)
+ br_stp_start(br);
+ } else {
+ if (br->stp_enabled != BR_NO_STP)
+ br_stp_stop(br);
+ }
+}
+
/* called under bridge lock */
void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
{
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 01a22ad0cc7..33c6c4a7c68 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,7 +149,11 @@ static ssize_t show_stp_state(struct device *d,
static void set_stp_state(struct net_bridge *br, unsigned long val)
{
- br->stp_enabled = val;
+ rtnl_lock();
+ spin_unlock_bh(&br->lock);
+ br_stp_set_enabled(br, val);
+ spin_lock_bh(&br->lock);
+ rtnl_unlock();
}
static ssize_t store_stp_state(struct device *d,
@@ -309,6 +313,19 @@ static ssize_t store_group_addr(struct device *d,
static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR,
show_group_addr, store_group_addr);
+static ssize_t store_flush(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct net_bridge *br = to_bridge(d);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ br_fdb_flush(br);
+ return len;
+}
+static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush);
static struct attribute *bridge_attrs[] = {
&dev_attr_forward_delay.attr,
@@ -328,6 +345,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_topology_change_timer.attr,
&dev_attr_gc_timer.attr,
&dev_attr_group_addr.attr,
+ &dev_attr_flush.attr,
NULL
};
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0bc2aef8f9f..2da22927d8d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -137,6 +137,13 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
}
static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static ssize_t store_flush(struct net_bridge_port *p, unsigned long v)
+{
+ br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
+ return 0;
+}
+static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+
static struct brport_attribute *brport_attrs[] = {
&brport_attr_path_cost,
&brport_attr_priority,
@@ -152,6 +159,7 @@ static struct brport_attribute *brport_attrs[] = {
&brport_attr_message_age_timer,
&brport_attr_forward_delay_timer,
&brport_attr_hold_timer,
+ &brport_attr_flush,
NULL
};
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 9c599800a90..1a46952a56d 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -35,40 +35,36 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
return EBT_NOMATCH;
if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
- __be32 _addr, *ap;
+ __be32 saddr, daddr, *sap, *dap;
- /* IPv4 addresses are always 4 bytes */
- if (ah->ar_pln != sizeof(__be32))
+ if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
+ return EBT_NOMATCH;
+ sap = skb_header_pointer(skb, sizeof(struct arphdr) +
+ ah->ar_hln, sizeof(saddr),
+ &saddr);
+ if (sap == NULL)
+ return EBT_NOMATCH;
+ dap = skb_header_pointer(skb, sizeof(struct arphdr) +
+ 2*ah->ar_hln+sizeof(saddr),
+ sizeof(daddr), &daddr);
+ if (dap == NULL)
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_ARP_SRC_IP &&
+ FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_ARP_DST_IP &&
+ FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_ARP_GRAT &&
+ FWINV(*dap != *sap, EBT_ARP_GRAT))
return EBT_NOMATCH;
- if (info->bitmask & EBT_ARP_SRC_IP) {
- ap = skb_header_pointer(skb, sizeof(struct arphdr) +
- ah->ar_hln, sizeof(_addr),
- &_addr);
- if (ap == NULL)
- return EBT_NOMATCH;
- if (FWINV(info->saddr != (*ap & info->smsk),
- EBT_ARP_SRC_IP))
- return EBT_NOMATCH;
- }
-
- if (info->bitmask & EBT_ARP_DST_IP) {
- ap = skb_header_pointer(skb, sizeof(struct arphdr) +
- 2*ah->ar_hln+sizeof(__be32),
- sizeof(_addr), &_addr);
- if (ap == NULL)
- return EBT_NOMATCH;
- if (FWINV(info->daddr != (*ap & info->dmsk),
- EBT_ARP_DST_IP))
- return EBT_NOMATCH;
- }
}
if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
unsigned char _mac[ETH_ALEN], *mp;
uint8_t verdict, i;
- /* MAC addresses are 6 bytes */
- if (ah->ar_hln != ETH_ALEN)
+ if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
return EBT_NOMATCH;
if (info->bitmask & EBT_ARP_SRC_MAC) {
mp = skb_header_pointer(skb, sizeof(struct arphdr),
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 45712aec6a0..031bfa4a51f 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -196,14 +196,10 @@ static int __init ebt_log_init(void)
ret = ebt_register_watcher(&log);
if (ret < 0)
return ret;
- if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
- printk(KERN_WARNING "ebt_log: not logging via system console "
- "since somebody else already registered for PF_INET\n");
- /* we cannot make module load fail here, since otherwise
- * ebtables userspace would abort */
- }
-
- return 0;
+ ret = nf_log_register(PF_BRIDGE, &ebt_log_logger);
+ if (ret < 0 && ret != -EEXIST)
+ ebt_unregister_watcher(&log);
+ return ret;
}
static void __exit ebt_log_fini(void)
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8e15cc47f6c..9411db62591 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
unsigned int group = uloginfo->nlgroup;
ebt_ulog_buff_t *ub = &ulog_buffers[group];
spinlock_t *lock = &ub->lock;
+ ktime_t kt;
if ((uloginfo->cprange == 0) ||
(uloginfo->cprange > skb->len + ETH_HLEN))
@@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
/* Fill in the ulog data */
pm->version = EBT_ULOG_VERSION;
- do_gettimeofday(&pm->stamp);
+ kt = ktime_get_real();
+ pm->stamp = ktime_to_timeval(kt);
if (ub->qlen == 1)
- skb_set_timestamp(ub->skb, &pm->stamp);
+ ub->skb->tstamp = kt;
pm->data_len = copy_len;
pm->mark = skb->mark;
pm->hook = hooknr;
@@ -295,14 +297,12 @@ static int __init ebt_ulog_init(void)
/* initialize ulog_buffers */
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- init_timer(&ulog_buffers[i].timer);
- ulog_buffers[i].timer.function = ulog_timer;
- ulog_buffers[i].timer.data = i;
+ setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
spin_lock_init(&ulog_buffers[i].lock);
}
ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
- NULL, THIS_MODULE);
+ NULL, NULL, THIS_MODULE);
if (!ebtulognl)
ret = -ENOMEM;
else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/compat.c b/net/compat.c
index 1f32866d09b..9a0f5f2b90c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -34,11 +34,11 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
{
int tot_len = 0;
- while(niov > 0) {
+ while (niov > 0) {
compat_uptr_t buf;
compat_size_t len;
- if(get_user(len, &uiov32->iov_len) ||
+ if (get_user(len, &uiov32->iov_len) ||
get_user(buf, &uiov32->iov_base)) {
tot_len = -EFAULT;
break;
@@ -78,12 +78,12 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
{
int tot_len;
- if(kern_msg->msg_namelen) {
- if(mode==VERIFY_READ) {
+ if (kern_msg->msg_namelen) {
+ if (mode==VERIFY_READ) {
int err = move_addr_to_kernel(kern_msg->msg_name,
kern_msg->msg_namelen,
kern_address);
- if(err < 0)
+ if (err < 0)
return err;
}
kern_msg->msg_name = kern_address;
@@ -93,7 +93,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
tot_len = iov_from_user_compat_to_kern(kern_iov,
(struct compat_iovec __user *)kern_msg->msg_iov,
kern_msg->msg_iovlen);
- if(tot_len >= 0)
+ if (tot_len >= 0)
kern_msg->msg_iov = kern_iov;
return tot_len;
@@ -146,8 +146,8 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
kcmlen = 0;
kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
- while(ucmsg != NULL) {
- if(get_user(ucmlen, &ucmsg->cmsg_len))
+ while (ucmsg != NULL) {
+ if (get_user(ucmlen, &ucmsg->cmsg_len))
return -EFAULT;
/* Catch bogons. */
@@ -160,7 +160,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
kcmlen += tmp;
ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
}
- if(kcmlen == 0)
+ if (kcmlen == 0)
return -EINVAL;
/* The kcmlen holds the 64-bit version of the control length.
@@ -176,7 +176,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
/* Now copy them over neatly. */
memset(kcmsg, 0, kcmlen);
ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
- while(ucmsg != NULL) {
+ while (ucmsg != NULL) {
if (__get_user(ucmlen, &ucmsg->cmsg_len))
goto Efault;
if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
@@ -215,11 +215,12 @@ Efault:
int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
{
struct compat_timeval ctv;
+ struct compat_timespec cts;
struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
struct compat_cmsghdr cmhdr;
int cmlen;
- if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+ if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
kmsg->msg_flags |= MSG_CTRUNC;
return 0; /* XXX: return error? check spec. */
}
@@ -229,11 +230,18 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
ctv.tv_sec = tv->tv_sec;
ctv.tv_usec = tv->tv_usec;
data = &ctv;
- len = sizeof(struct compat_timeval);
+ len = sizeof(ctv);
+ }
+ if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) {
+ struct timespec *ts = (struct timespec *)data;
+ cts.tv_sec = ts->tv_sec;
+ cts.tv_nsec = ts->tv_nsec;
+ data = &cts;
+ len = sizeof(cts);
}
cmlen = CMSG_COMPAT_LEN(len);
- if(kmsg->msg_controllen < cmlen) {
+ if (kmsg->msg_controllen < cmlen) {
kmsg->msg_flags |= MSG_CTRUNC;
cmlen = kmsg->msg_controllen;
}
@@ -241,9 +249,9 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
- if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+ if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
return -EFAULT;
- if(copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
+ if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
return -EFAULT;
cmlen = CMSG_COMPAT_SPACE(len);
kmsg->msg_control += cmlen;
@@ -545,20 +553,49 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
struct compat_timeval __user *ctv =
(struct compat_timeval __user*) userstamp;
int err = -ENOENT;
+ struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
sock_enable_timestamp(sk);
- if (sk->sk_stamp.tv_sec == -1)
+ tv = ktime_to_timeval(sk->sk_stamp);
+ if (tv.tv_sec == -1)
return err;
- if (sk->sk_stamp.tv_sec == 0)
- do_gettimeofday(&sk->sk_stamp);
- if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) ||
- put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec))
+ if (tv.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ tv = ktime_to_timeval(sk->sk_stamp);
+ }
+ err = 0;
+ if (put_user(tv.tv_sec, &ctv->tv_sec) ||
+ put_user(tv.tv_usec, &ctv->tv_usec))
err = -EFAULT;
return err;
}
EXPORT_SYMBOL(compat_sock_get_timestamp);
+int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+ struct compat_timespec __user *ctv =
+ (struct compat_timespec __user*) userstamp;
+ int err = -ENOENT;
+ struct timespec ts;
+
+ if (!sock_flag(sk, SOCK_TIMESTAMP))
+ sock_enable_timestamp(sk);
+ ts = ktime_to_timespec(sk->sk_stamp);
+ if (ts.tv_sec == -1)
+ return err;
+ if (ts.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ ts = ktime_to_timespec(sk->sk_stamp);
+ }
+ err = 0;
+ if (put_user(ts.tv_sec, &ctv->tv_sec) ||
+ put_user(ts.tv_nsec, &ctv->tv_nsec))
+ err = -EFAULT;
+ return err;
+}
+EXPORT_SYMBOL(compat_sock_get_timestampns);
+
asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -617,7 +654,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
a0 = a[0];
a1 = a[1];
- switch(call) {
+ switch (call) {
case SYS_SOCKET:
ret = sys_socket(a0, a1, a[2]);
break;
diff --git a/net/core/Makefile b/net/core/Makefile
index 73272d506e9..4751613e1b5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
-obj-$(CONFIG_WIRELESS_EXT) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 186212b5b7d..cb056f47612 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -411,11 +411,11 @@ fault:
return -EFAULT;
}
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
{
__sum16 sum;
- sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
@@ -423,6 +423,12 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
}
return sum;
}
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+ return __skb_checksum_complete_head(skb, skb->len);
+}
EXPORT_SYMBOL(__skb_checksum_complete);
/**
diff --git a/net/core/dev.c b/net/core/dev.c
index 4dc93cc4d5b..f27d4ab181e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -109,7 +109,7 @@
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
-#include <linux/wireless.h>
+#include <net/wext.h>
#include <net/iw_handler.h>
#include <asm/current.h>
#include <linux/audit.h>
@@ -146,8 +146,8 @@
*/
static DEFINE_SPINLOCK(ptype_lock);
-static struct list_head ptype_base[16]; /* 16 way hashed list */
-static struct list_head ptype_all; /* Taps */
+static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
+static struct list_head ptype_all __read_mostly; /* Taps */
#ifdef CONFIG_NET_DMA
static struct dma_client *net_dma_client;
@@ -156,13 +156,13 @@ static spinlock_t net_dma_event_lock;
#endif
/*
- * The @dev_base list is protected by @dev_base_lock and the rtnl
+ * The @dev_base_head list is protected by @dev_base_lock and the rtnl
* semaphore.
*
* Pure readers hold dev_base_lock for reading.
*
* Writers must hold the rtnl semaphore while they loop through the
- * dev_base list, and hold dev_base_lock for writing when they do the
+ * dev_base_head list, and hold dev_base_lock for writing when they do the
* actual updates. This allows pure readers to access the list even
* while a writer is preparing to update it.
*
@@ -174,11 +174,10 @@ static spinlock_t net_dma_event_lock;
* unregister_netdevice(), which must be called with the rtnl
* semaphore held.
*/
-struct net_device *dev_base;
-static struct net_device **dev_tail = &dev_base;
+LIST_HEAD(dev_base_head);
DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_head);
EXPORT_SYMBOL(dev_base_lock);
#define NETDEV_HASHBITS 8
@@ -226,12 +225,6 @@ extern void netdev_unregister_sysfs(struct net_device *);
*******************************************************************************/
/*
- * For efficiency
- */
-
-static int netdev_nit;
-
-/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
@@ -265,10 +258,9 @@ void dev_add_pack(struct packet_type *pt)
int hash;
spin_lock_bh(&ptype_lock);
- if (pt->type == htons(ETH_P_ALL)) {
- netdev_nit++;
+ if (pt->type == htons(ETH_P_ALL))
list_add_rcu(&pt->list, &ptype_all);
- } else {
+ else {
hash = ntohs(pt->type) & 15;
list_add_rcu(&pt->list, &ptype_base[hash]);
}
@@ -295,10 +287,9 @@ void __dev_remove_pack(struct packet_type *pt)
spin_lock_bh(&ptype_lock);
- if (pt->type == htons(ETH_P_ALL)) {
- netdev_nit--;
+ if (pt->type == htons(ETH_P_ALL))
head = &ptype_all;
- } else
+ else
head = &ptype_base[ntohs(pt->type) & 15];
list_for_each_entry(pt1, head, list) {
@@ -575,26 +566,38 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
ASSERT_RTNL();
- for (dev = dev_base; dev; dev = dev->next)
+ for_each_netdev(dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
- break;
- return dev;
+ return dev;
+
+ return NULL;
}
EXPORT_SYMBOL(dev_getbyhwaddr);
+struct net_device *__dev_getfirstbyhwtype(unsigned short type)
+{
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+ for_each_netdev(dev)
+ if (dev->type == type)
+ return dev;
+
+ return NULL;
+}
+
+EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
struct net_device *dev_getfirstbyhwtype(unsigned short type)
{
struct net_device *dev;
rtnl_lock();
- for (dev = dev_base; dev; dev = dev->next) {
- if (dev->type == type) {
- dev_hold(dev);
- break;
- }
- }
+ dev = __dev_getfirstbyhwtype(type);
+ if (dev)
+ dev_hold(dev);
rtnl_unlock();
return dev;
}
@@ -614,17 +617,19 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
{
- struct net_device *dev;
+ struct net_device *dev, *ret;
+ ret = NULL;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
dev_hold(dev);
+ ret = dev;
break;
}
}
read_unlock(&dev_base_lock);
- return dev;
+ return ret;
}
/**
@@ -690,7 +695,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
if (!inuse)
return -ENOMEM;
- for (d = dev_base; d; d = d->next) {
+ for_each_netdev(d) {
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
@@ -817,7 +822,6 @@ static int default_rebuild_header(struct sk_buff *skb)
return 1;
}
-
/**
* dev_open - prepare an interface for use.
* @dev: device to open
@@ -973,7 +977,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (!err) {
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
nb->notifier_call(nb, NETDEV_REGISTER, dev);
if (dev->flags & IFF_UP)
@@ -1031,23 +1035,12 @@ void net_disable_timestamp(void)
atomic_dec(&netstamp_needed);
}
-void __net_timestamp(struct sk_buff *skb)
-{
- struct timeval tv;
-
- do_gettimeofday(&tv);
- skb_set_timestamp(skb, &tv);
-}
-EXPORT_SYMBOL(__net_timestamp);
-
static inline void net_timestamp(struct sk_buff *skb)
{
if (atomic_read(&netstamp_needed))
__net_timestamp(skb);
- else {
- skb->tstamp.off_sec = 0;
- skb->tstamp.off_usec = 0;
- }
+ else
+ skb->tstamp.tv64 = 0;
}
/*
@@ -1077,18 +1070,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
set by sender, so that the second statement is
just protection against buggy protocols.
*/
- skb2->mac.raw = skb2->data;
+ skb_reset_mac_header(skb2);
- if (skb2->nh.raw < skb2->data ||
- skb2->nh.raw > skb2->tail) {
+ if (skb_network_header(skb2) < skb2->data ||
+ skb2->network_header > skb2->tail) {
if (net_ratelimit())
printk(KERN_CRIT "protocol %04x is "
"buggy, dev %s\n",
skb2->protocol, dev->name);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
}
- skb2->h.raw = skb2->nh.raw;
+ skb2->transport_header = skb2->network_header;
skb2->pkt_type = PACKET_OUTGOING;
ptype->func(skb2, skb->dev, ptype, skb->dev);
}
@@ -1167,7 +1160,7 @@ EXPORT_SYMBOL(netif_device_attach);
int skb_checksum_help(struct sk_buff *skb)
{
__wsum csum;
- int ret = 0, offset = skb->h.raw - skb->data;
+ int ret = 0, offset;
if (skb->ip_summed == CHECKSUM_COMPLETE)
goto out_set_summed;
@@ -1183,15 +1176,16 @@ int skb_checksum_help(struct sk_buff *skb)
goto out;
}
+ offset = skb->csum_start - skb_headroom(skb);
BUG_ON(offset > (int)skb->len);
csum = skb_checksum(skb, offset, skb->len-offset, 0);
- offset = skb->tail - skb->h.raw;
+ offset = skb_headlen(skb) - offset;
BUG_ON(offset <= 0);
BUG_ON(skb->csum_offset + 2 > offset);
- *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
-
+ *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
+ csum_fold(csum);
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
@@ -1217,11 +1211,11 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
BUG_ON(skb_shinfo(skb)->frag_list);
- skb->mac.raw = skb->data;
- skb->mac_len = skb->nh.raw - skb->data;
+ skb_reset_mac_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
__skb_pull(skb, skb->mac_len);
- if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
if (skb_header_cloned(skb) &&
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
return ERR_PTR(err);
@@ -1235,7 +1229,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
segs = ERR_PTR(err);
if (err || skb_gso_ok(skb, features))
break;
- __skb_push(skb, skb->data - skb->nh.raw);
+ __skb_push(skb, (skb->data -
+ skb_network_header(skb)));
}
segs = ptype->gso_segment(skb, features);
break;
@@ -1243,7 +1238,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
}
rcu_read_unlock();
- __skb_push(skb, skb->data - skb->mac.raw);
+ __skb_push(skb, skb->data - skb_mac_header(skb));
return segs;
}
@@ -1340,7 +1335,7 @@ static int dev_gso_segment(struct sk_buff *skb)
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
if (likely(!skb->next)) {
- if (netdev_nit)
+ if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
if (netif_needs_gso(dev, skb)) {
@@ -1442,12 +1437,16 @@ int dev_queue_xmit(struct sk_buff *skb)
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
- if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (!(dev->features & NETIF_F_GEN_CSUM) &&
- (!(dev->features & NETIF_F_IP_CSUM) ||
- skb->protocol != htons(ETH_P_IP))))
- if (skb_checksum_help(skb))
- goto out_kfree_skb;
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ skb_set_transport_header(skb, skb->csum_start -
+ skb_headroom(skb));
+
+ if (!(dev->features & NETIF_F_GEN_CSUM) &&
+ (!(dev->features & NETIF_F_IP_CSUM) ||
+ skb->protocol != htons(ETH_P_IP)))
+ if (skb_checksum_help(skb))
+ goto out_kfree_skb;
+ }
gso:
spin_lock_prefetch(&dev->queue_lock);
@@ -1543,9 +1542,9 @@ out:
Receiver routines
=======================================================================*/
-int netdev_max_backlog = 1000;
-int netdev_budget = 300;
-int weight_p = 64; /* old backlog weight */
+int netdev_max_backlog __read_mostly = 1000;
+int netdev_budget __read_mostly = 300;
+int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
@@ -1577,7 +1576,7 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->tstamp.off_sec)
+ if (!skb->tstamp.tv64)
net_timestamp(skb);
/*
@@ -1684,40 +1683,46 @@ static void net_tx_action(struct softirq_action *h)
}
}
-static __inline__ int deliver_skb(struct sk_buff *skb,
- struct packet_type *pt_prev,
- struct net_device *orig_dev)
+static inline int deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
{
atomic_inc(&skb->users);
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+/* These hooks defined here for ATM */
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
-void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
-static __inline__ int handle_bridge(struct sk_buff **pskb,
- struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
+/*
+ * If bridge module is loaded call bridging hook.
+ * returns NULL if packet was consumed.
+ */
+struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+ struct sk_buff *skb) __read_mostly;
+static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
+ struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev)
{
struct net_bridge_port *port;
- if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
- (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
- return 0;
+ if (skb->pkt_type == PACKET_LOOPBACK ||
+ (port = rcu_dereference(skb->dev->br_port)) == NULL)
+ return skb;
if (*pt_prev) {
- *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- return br_handle_frame_hook(port, pskb);
+ return br_handle_frame_hook(port, skb);
}
#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
#endif
#ifdef CONFIG_NET_CLS_ACT
@@ -1747,10 +1752,10 @@ static int ing_filter(struct sk_buff *skb)
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
- spin_lock(&dev->queue_lock);
+ spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
- spin_unlock(&dev->queue_lock);
+ spin_unlock(&dev->ingress_lock);
}
@@ -1769,7 +1774,7 @@ int netif_receive_skb(struct sk_buff *skb)
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
- if (!skb->tstamp.off_sec)
+ if (!skb->tstamp.tv64)
net_timestamp(skb);
if (!skb->iif)
@@ -1782,8 +1787,9 @@ int netif_receive_skb(struct sk_buff *skb)
__get_cpu_var(netdev_rx_stat).total++;
- skb->h.raw = skb->nh.raw = skb->data;
- skb->mac_len = skb->nh.raw - skb->mac.raw;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
pt_prev = NULL;
@@ -1823,7 +1829,8 @@ int netif_receive_skb(struct sk_buff *skb)
ncls:
#endif
- if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
+ skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
goto out;
type = skb->protocol;
@@ -2044,7 +2051,7 @@ static int dev_ifconf(char __user *arg)
*/
total = 0;
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
@@ -2076,26 +2083,28 @@ static int dev_ifconf(char __user *arg)
* This is invoked by the /proc filesystem handler to display a device
* in detail.
*/
-static __inline__ struct net_device *dev_get_idx(loff_t pos)
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ loff_t off;
struct net_device *dev;
- loff_t i;
- for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+ read_lock(&dev_base_lock);
+ if (!*pos)
+ return SEQ_START_TOKEN;
- return i == pos ? dev : NULL;
-}
+ off = 1;
+ for_each_netdev(dev)
+ if (off++ == *pos)
+ return dev;
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
- read_lock(&dev_base_lock);
- return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return NULL;
}
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
- return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+ return v == SEQ_START_TOKEN ?
+ first_net_device() : next_net_device((struct net_device *)v);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -2105,28 +2114,25 @@ void dev_seq_stop(struct seq_file *seq, void *v)
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
- if (dev->get_stats) {
- struct net_device_stats *stats = dev->get_stats(dev);
-
- seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
- "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
- dev->name, stats->rx_bytes, stats->rx_packets,
- stats->rx_errors,
- stats->rx_dropped + stats->rx_missed_errors,
- stats->rx_fifo_errors,
- stats->rx_length_errors + stats->rx_over_errors +
- stats->rx_crc_errors + stats->rx_frame_errors,
- stats->rx_compressed, stats->multicast,
- stats->tx_bytes, stats->tx_packets,
- stats->tx_errors, stats->tx_dropped,
- stats->tx_fifo_errors, stats->collisions,
- stats->tx_carrier_errors +
- stats->tx_aborted_errors +
- stats->tx_window_errors +
- stats->tx_heartbeat_errors,
- stats->tx_compressed);
- } else
- seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+ struct net_device_stats *stats = dev->get_stats(dev);
+
+ seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+ dev->name, stats->rx_bytes, stats->rx_packets,
+ stats->rx_errors,
+ stats->rx_dropped + stats->rx_missed_errors,
+ stats->rx_fifo_errors,
+ stats->rx_length_errors + stats->rx_over_errors +
+ stats->rx_crc_errors + stats->rx_frame_errors,
+ stats->rx_compressed, stats->multicast,
+ stats->tx_bytes, stats->tx_packets,
+ stats->tx_errors, stats->tx_dropped,
+ stats->tx_fifo_errors, stats->collisions,
+ stats->tx_carrier_errors +
+ stats->tx_aborted_errors +
+ stats->tx_window_errors +
+ stats->tx_heartbeat_errors,
+ stats->tx_compressed);
}
/*
@@ -2185,7 +2191,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
.stop = dev_seq_stop,
@@ -2205,7 +2211,7 @@ static const struct file_operations dev_seq_fops = {
.release = seq_release,
};
-static struct seq_operations softnet_seq_ops = {
+static const struct seq_operations softnet_seq_ops = {
.start = softnet_seq_start,
.next = softnet_seq_next,
.stop = softnet_seq_stop,
@@ -2225,12 +2231,135 @@ static const struct file_operations softnet_seq_fops = {
.release = seq_release,
};
-#ifdef CONFIG_WIRELESS_EXT
-extern int wireless_proc_init(void);
-#else
-#define wireless_proc_init() 0
+static void *ptype_get_idx(loff_t pos)
+{
+ struct packet_type *pt = NULL;
+ loff_t i = 0;
+ int t;
+
+ list_for_each_entry_rcu(pt, &ptype_all, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+
+ for (t = 0; t < 16; t++) {
+ list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+ return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ rcu_read_lock();
+ return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct packet_type *pt;
+ struct list_head *nxt;
+ int hash;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return ptype_get_idx(0);
+
+ pt = v;
+ nxt = pt->list.next;
+ if (pt->type == htons(ETH_P_ALL)) {
+ if (nxt != &ptype_all)
+ goto found;
+ hash = 0;
+ nxt = ptype_base[0].next;
+ } else
+ hash = ntohs(pt->type) & 15;
+
+ while (nxt == &ptype_base[hash]) {
+ if (++hash >= 16)
+ return NULL;
+ nxt = ptype_base[hash].next;
+ }
+found:
+ return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+{
+ rcu_read_unlock();
+}
+
+static void ptype_seq_decode(struct seq_file *seq, void *sym)
+{
+#ifdef CONFIG_KALLSYMS
+ unsigned long offset = 0, symsize;
+ const char *symname;
+ char *modname;
+ char namebuf[128];
+
+ symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
+ &modname, namebuf);
+
+ if (symname) {
+ char *delim = ":";
+
+ if (!modname)
+ modname = delim = "";
+ seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
+ symname, offset);
+ return;
+ }
#endif
+ seq_printf(seq, "[%p]", sym);
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+ struct packet_type *pt = v;
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "Type Device Function\n");
+ else {
+ if (pt->type == htons(ETH_P_ALL))
+ seq_puts(seq, "ALL ");
+ else
+ seq_printf(seq, "%04x", ntohs(pt->type));
+
+ seq_printf(seq, " %-8s ",
+ pt->dev ? pt->dev->name : "");
+ ptype_seq_decode(seq, pt->func);
+ seq_putc(seq, '\n');
+ }
+
+ return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+ .start = ptype_seq_start,
+ .next = ptype_seq_next,
+ .stop = ptype_seq_stop,
+ .show = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ptype_seq_ops);
+}
+
+static const struct file_operations ptype_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ptype_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+
static int __init dev_proc_init(void)
{
int rc = -ENOMEM;
@@ -2239,13 +2368,18 @@ static int __init dev_proc_init(void)
goto out;
if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
goto out_dev;
- if (wireless_proc_init())
+ if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
+ goto out_dev2;
+
+ if (wext_proc_init())
goto out_softnet;
rc = 0;
out:
return rc;
out_softnet:
proc_net_remove("softnet_stat");
+out_dev2:
+ proc_net_remove("ptype");
out_dev:
proc_net_remove("dev");
goto out;
@@ -2795,29 +2929,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
ret = -EFAULT;
return ret;
}
-#ifdef CONFIG_WIRELESS_EXT
/* Take care of Wireless Extensions */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- /* If command is `set a parameter', or
- * `get the encoding parameters', check if
- * the user has the right to do it */
- if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
- || cmd == SIOCGIWENCODEEXT) {
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- }
- dev_load(ifr.ifr_name);
- rtnl_lock();
- /* Follow me in net/core/wireless.c */
- ret = wireless_process_ioctl(&ifr, cmd);
- rtnl_unlock();
- if (IW_IS_GET(cmd) &&
- copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- return ret;
- }
-#endif /* CONFIG_WIRELESS_EXT */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+ return wext_handle_ioctl(&ifr, cmd, arg);
return -EINVAL;
}
}
@@ -2847,7 +2961,7 @@ static int dev_boot_phase = 1;
static DEFINE_SPINLOCK(net_todo_list_lock);
static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
-static inline void net_set_todo(struct net_device *dev)
+static void net_set_todo(struct net_device *dev)
{
spin_lock(&net_todo_list_lock);
list_add_tail(&dev->todo_list, &net_todo_list);
@@ -2888,9 +3002,7 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
dev->xmit_lock_owner = -1;
-#ifdef CONFIG_NET_CLS_ACT
spin_lock_init(&dev->ingress_lock);
-#endif
dev->iflink = -1;
@@ -2974,11 +3086,9 @@ int register_netdevice(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
- dev->next = NULL;
dev_init_scheduler(dev);
write_lock_bh(&dev_base_lock);
- *dev_tail = dev;
- dev_tail = &dev->next;
+ list_add_tail(&dev->dev_list, &dev_base_head);
hlist_add_head(&dev->name_hlist, head);
hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
@@ -3002,7 +3112,7 @@ out:
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
- * This is a wrapper around register_netdev that takes the rtnl semaphore
+ * This is a wrapper around register_netdevice that takes the rtnl semaphore
* and expands the device name if you passed a format string to
* alloc_netdev.
*/
@@ -3157,6 +3267,11 @@ out:
mutex_unlock(&net_todo_run_mutex);
}
+static struct net_device_stats *internal_stats(struct net_device *dev)
+{
+ return &dev->stats;
+}
+
/**
* alloc_netdev - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -3192,6 +3307,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
if (sizeof_priv)
dev->priv = netdev_priv(dev);
+ dev->get_stats = internal_stats;
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -3246,8 +3362,6 @@ void synchronize_net(void)
void unregister_netdevice(struct net_device *dev)
{
- struct net_device *d, **dp;
-
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -3267,19 +3381,11 @@ void unregister_netdevice(struct net_device *dev)
dev_close(dev);
/* And unlink it from device chain. */
- for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
- if (d == dev) {
- write_lock_bh(&dev_base_lock);
- hlist_del(&dev->name_hlist);
- hlist_del(&dev->index_hlist);
- if (dev_tail == &dev->next)
- dev_tail = dp;
- *dp = d->next;
- write_unlock_bh(&dev_base_lock);
- break;
- }
- }
- BUG_ON(!d);
+ write_lock_bh(&dev_base_lock);
+ list_del(&dev->dev_list);
+ hlist_del(&dev->name_hlist);
+ hlist_del(&dev->index_hlist);
+ write_unlock_bh(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERING;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 56b310c0c86..5a54053386c 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -223,7 +223,7 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
loff_t off = 0;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if (off++ == *pos)
return dev;
}
@@ -232,9 +232,8 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = v;
++*pos;
- return dev->next;
+ return next_net_device((struct net_device *)v);
}
static void dev_mc_seq_stop(struct seq_file *seq, void *v)
@@ -264,7 +263,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations dev_mc_seq_ops = {
+static const struct seq_operations dev_mc_seq_ops = {
.start = dev_mc_seq_start,
.next = dev_mc_seq_next,
.stop = dev_mc_seq_stop,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6168edd137d..8d5e5a09b57 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -836,7 +836,7 @@ int dev_ethtool(struct ifreq *ifr)
return -EPERM;
}
- if(dev->ethtool_ops->begin)
+ if (dev->ethtool_ops->begin)
if ((rc = dev->ethtool_ops->begin(dev)) < 0)
return rc;
@@ -952,7 +952,7 @@ int dev_ethtool(struct ifreq *ifr)
rc = -EOPNOTSUPP;
}
- if(dev->ethtool_ops->complete)
+ if (dev->ethtool_ops->complete)
dev->ethtool_ops->complete(dev);
if (old_features != dev->features)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7174ced75ef..8c5474e1668 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,6 +44,12 @@ static void rules_ops_put(struct fib_rules_ops *ops)
module_put(ops->owner);
}
+static void flush_route_cache(struct fib_rules_ops *ops)
+{
+ if (ops->flush_cache)
+ ops->flush_cache();
+}
+
int fib_rules_register(struct fib_rules_ops *ops)
{
int err = -EEXIST;
@@ -132,10 +138,25 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
rcu_read_lock();
list_for_each_entry_rcu(rule, ops->rules_list, list) {
+jumped:
if (!fib_rule_match(rule, ops, fl, flags))
continue;
- err = ops->action(rule, fl, flags, arg);
+ if (rule->action == FR_ACT_GOTO) {
+ struct fib_rule *target;
+
+ target = rcu_dereference(rule->ctarget);
+ if (target == NULL) {
+ continue;
+ } else {
+ rule = target;
+ goto jumped;
+ }
+ } else if (rule->action == FR_ACT_NOP)
+ continue;
+ else
+ err = ops->action(rule, fl, flags, arg);
+
if (err != -EAGAIN) {
fib_rule_get(rule);
arg->rule = rule;
@@ -174,13 +195,13 @@ errout:
return err;
}
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r, *last = NULL;
struct nlattr *tb[FRA_MAX+1];
- int err = -EINVAL;
+ int err = -EINVAL, unresolved = 0;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
goto errout;
@@ -237,6 +258,28 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (!rule->pref && ops->default_pref)
rule->pref = ops->default_pref();
+ err = -EINVAL;
+ if (tb[FRA_GOTO]) {
+ if (rule->action != FR_ACT_GOTO)
+ goto errout_free;
+
+ rule->target = nla_get_u32(tb[FRA_GOTO]);
+ /* Backward jumps are prohibited to avoid endless loops */
+ if (rule->target <= rule->pref)
+ goto errout_free;
+
+ list_for_each_entry(r, ops->rules_list, list) {
+ if (r->pref == rule->target) {
+ rule->ctarget = r;
+ break;
+ }
+ }
+
+ if (rule->ctarget == NULL)
+ unresolved = 1;
+ } else if (rule->action == FR_ACT_GOTO)
+ goto errout_free;
+
err = ops->configure(rule, skb, nlh, frh, tb);
if (err < 0)
goto errout_free;
@@ -249,12 +292,35 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
fib_rule_get(rule);
+ if (ops->unresolved_rules) {
+ /*
+ * There are unresolved goto rules in the list, check if
+ * any of them are pointing to this new rule.
+ */
+ list_for_each_entry(r, ops->rules_list, list) {
+ if (r->action == FR_ACT_GOTO &&
+ r->target == rule->pref) {
+ BUG_ON(r->ctarget != NULL);
+ rcu_assign_pointer(r->ctarget, rule);
+ if (--ops->unresolved_rules == 0)
+ break;
+ }
+ }
+ }
+
+ if (rule->action == FR_ACT_GOTO)
+ ops->nr_goto_rules++;
+
+ if (unresolved)
+ ops->unresolved_rules++;
+
if (last)
list_add_rcu(&rule->list, &last->list);
else
list_add_rcu(&rule->list, ops->rules_list);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ flush_route_cache(ops);
rules_ops_put(ops);
return 0;
@@ -265,11 +331,11 @@ errout:
return err;
}
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
- struct fib_rule *rule;
+ struct fib_rule *rule, *tmp;
struct nlattr *tb[FRA_MAX+1];
int err = -EINVAL;
@@ -322,10 +388,30 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
list_del_rcu(&rule->list);
+
+ if (rule->action == FR_ACT_GOTO)
+ ops->nr_goto_rules--;
+
+ /*
+ * Check if this rule is a target to any of them. If so,
+ * disable them. As this operation is eventually very
+ * expensive, it is only performed if goto rules have
+ * actually been added.
+ */
+ if (ops->nr_goto_rules > 0) {
+ list_for_each_entry(tmp, ops->rules_list, list) {
+ if (tmp->ctarget == rule) {
+ rcu_assign_pointer(tmp->ctarget, NULL);
+ ops->unresolved_rules++;
+ }
+ }
+ }
+
synchronize_rcu();
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).pid);
fib_rule_put(rule);
+ flush_route_cache(ops);
rules_ops_put(ops);
return 0;
}
@@ -371,9 +457,16 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
- if (rule->ifname[0])
+ if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+ frh->flags |= FIB_RULE_UNRESOLVED;
+
+ if (rule->ifname[0]) {
NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+ if (rule->ifindex == -1)
+ frh->flags |= FIB_RULE_DEV_DETACHED;
+ }
+
if (rule->pref)
NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
@@ -383,6 +476,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (rule->mark_mask || rule->mark)
NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
+ if (rule->target)
+ NLA_PUT_U32(skb, FRA_GOTO, rule->target);
+
if (ops->fill(rule, skb, nlh, frh) < 0)
goto nla_put_failure;
@@ -393,19 +489,14 @@ nla_put_failure:
return -EMSGSIZE;
}
-int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
+ struct fib_rules_ops *ops)
{
int idx = 0;
struct fib_rule *rule;
- struct fib_rules_ops *ops;
-
- ops = lookup_rules_ops(family);
- if (ops == NULL)
- return -EAFNOSUPPORT;
- rcu_read_lock();
- list_for_each_entry_rcu(rule, ops->rules_list, list) {
- if (idx < cb->args[0])
+ list_for_each_entry(rule, ops->rules_list, list) {
+ if (idx < cb->args[1])
goto skip;
if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
@@ -415,14 +506,44 @@ int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
skip:
idx++;
}
- rcu_read_unlock();
- cb->args[0] = idx;
+ cb->args[1] = idx;
rules_ops_put(ops);
return skb->len;
}
-EXPORT_SYMBOL_GPL(fib_rules_dump);
+static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct fib_rules_ops *ops;
+ int idx = 0, family;
+
+ family = rtnl_msg_family(cb->nlh);
+ if (family != AF_UNSPEC) {
+ /* Protocol specific dump request */
+ ops = lookup_rules_ops(family);
+ if (ops == NULL)
+ return -EAFNOSUPPORT;
+
+ return dump_rules(skb, cb, ops);
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &rules_ops, list) {
+ if (idx < cb->args[0] || !try_module_get(ops->owner))
+ goto skip;
+
+ if (dump_rules(skb, cb, ops) < 0)
+ break;
+
+ cb->args[1] = 0;
+ skip:
+ idx++;
+ }
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -501,6 +622,10 @@ static struct notifier_block fib_rules_notifier = {
static int __init fib_rules_init(void)
{
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+
return register_netdevice_notifier(&fib_rules_notifier);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185a089c5..bd903aaf7aa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,11 +42,11 @@ static void *__load_pointer(struct sk_buff *skb, int k)
u8 *ptr = NULL;
if (k >= SKF_NET_OFF)
- ptr = skb->nh.raw + k - SKF_NET_OFF;
+ ptr = skb_network_header(skb) + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
- ptr = skb->mac.raw + k - SKF_LL_OFF;
+ ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
- if (ptr >= skb->head && ptr < skb->tail)
+ if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
return ptr;
return NULL;
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 259473d0559..bcc25591d8a 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -61,7 +61,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
spin_lock_bh(lock);
d->lock = lock;
if (type)
- d->tail = (struct rtattr *) skb->tail;
+ d->tail = (struct rtattr *)skb_tail_pointer(skb);
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
@@ -212,7 +212,7 @@ int
gnet_stats_finish_copy(struct gnet_dump *d)
{
if (d->tail)
- d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+ d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8b45c9d3b24..e3c26a9ccad 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,7 +79,7 @@ static void rfc2863_policy(struct net_device *dev)
case IF_LINK_MODE_DEFAULT:
default:
break;
- };
+ }
dev->operstate = operstate;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 841e3f32cab..6f3bb73053c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1125,7 +1125,7 @@ int neigh_compat_output(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
if (dev->hard_header &&
dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
@@ -1147,7 +1147,7 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!dst || !(neigh = dst->neighbour))
goto discard;
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
if (!neigh_event_send(neigh, skb)) {
int err;
@@ -1190,7 +1190,7 @@ int neigh_connected_output(struct sk_buff *skb)
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
read_lock_bh(&neigh->lock);
err = dev->hard_header(skb, dev, ntohs(skb->protocol),
@@ -1441,7 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
return 0;
}
-int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ndmsg *ndm;
struct nlattr *dst_attr;
@@ -1506,7 +1506,7 @@ out:
return err;
}
-int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1786,7 +1786,7 @@ static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
[NDTPA_LOCKTIME] = { .type = NLA_U64 },
};
-int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
@@ -1910,7 +1910,7 @@ errout:
return err;
}
-int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
int family, tidx, nidx = 0;
int tbl_skip = cb->args[0];
@@ -2034,7 +2034,7 @@ out:
return rc;
}
-int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
struct neigh_table *tbl;
int t, family, s_t;
@@ -2393,7 +2393,7 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations neigh_stat_seq_ops = {
+static const struct seq_operations neigh_stat_seq_ops = {
.start = neigh_stat_seq_start,
.next = neigh_stat_seq_next,
.stop = neigh_stat_seq_stop,
@@ -2746,14 +2746,26 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
#endif /* CONFIG_SYSCTL */
+static int __init neigh_init(void)
+{
+ rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
+ rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+
+ return 0;
+}
+
+subsys_initcall(neigh_init);
+
EXPORT_SYMBOL(__neigh_event_send);
EXPORT_SYMBOL(neigh_changeaddr);
EXPORT_SYMBOL(neigh_compat_output);
EXPORT_SYMBOL(neigh_connected_output);
EXPORT_SYMBOL(neigh_create);
-EXPORT_SYMBOL(neigh_delete);
EXPORT_SYMBOL(neigh_destroy);
-EXPORT_SYMBOL(neigh_dump_info);
EXPORT_SYMBOL(neigh_event_ns);
EXPORT_SYMBOL(neigh_ifdown);
EXPORT_SYMBOL(neigh_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4cbb1290a6a..b21307b15b8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -352,8 +352,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
read_lock(&dev_base_lock);
if (dev_isalive(dev)) {
- if(dev->wireless_handlers &&
- dev->wireless_handlers->get_wireless_stats)
+ if (dev->wireless_handlers &&
+ dev->wireless_handlers->get_wireless_stats)
iw = dev->wireless_handlers->get_wireless_stats(dev);
if (iw != NULL)
ret = (*format)(iw, buf);
@@ -412,20 +412,25 @@ static int netdev_uevent(struct device *d, char **envp,
int num_envp, char *buf, int size)
{
struct net_device *dev = to_net_dev(d);
- int i = 0;
- int n;
+ int retval, len = 0, i = 0;
/* pass interface to uevent. */
- envp[i++] = buf;
- n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
- buf += n;
- size -= n;
-
- if ((size <= 0) || (i >= num_envp))
- return -ENOMEM;
-
+ retval = add_uevent_var(envp, num_envp, &i,
+ buf, size, &len,
+ "INTERFACE=%s", dev->name);
+ if (retval)
+ goto exit;
+
+ /* pass ifindex to uevent.
+ * ifindex is useful as it won't change (interface name may change)
+ * and is what RtNetlink uses natively. */
+ retval = add_uevent_var(envp, num_envp, &i,
+ buf, size, &len,
+ "IFINDEX=%d", dev->ifindex);
+
+exit:
envp[i] = NULL;
- return 0;
+ return retval;
}
#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4581ece48bb..b316435b0e2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -86,7 +86,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
{
__wsum psum;
- if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
+ if (uh->check == 0 || skb_csum_unnecessary(skb))
return 0;
psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
@@ -293,10 +293,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
if (!skb)
return;
- memcpy(skb->data, msg, len);
+ skb_copy_to_linear_data(skb, msg, len);
skb->len += len;
- skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+ skb_push(skb, sizeof(*udph));
+ skb_reset_transport_header(skb);
+ udph = udp_hdr(skb);
udph->source = htons(np->local_port);
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
@@ -308,7 +310,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
- skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+ skb_push(skb, sizeof(*iph));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
/* iph->version = 4; iph->ihl = 5; */
put_unaligned(0x45, (unsigned char *)iph);
@@ -324,7 +328,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb->protocol = eth->h_proto = htons(ETH_P_IP);
memcpy(eth->h_source, np->local_mac, 6);
memcpy(eth->h_dest, np->remote_mac, 6);
@@ -359,8 +363,9 @@ static void arp_reply(struct sk_buff *skb)
(2 * sizeof(u32)))))
return;
- skb->h.raw = skb->nh.raw = skb->data;
- arp = skb->nh.arph;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ arp = arp_hdr(skb);
if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
@@ -389,7 +394,7 @@ static void arp_reply(struct sk_buff *skb)
if (!send_skb)
return;
- send_skb->nh.raw = send_skb->data;
+ skb_reset_network_header(send_skb);
arp = (struct arphdr *) skb_put(send_skb, size);
send_skb->dev = skb->dev;
send_skb->protocol = htons(ETH_P_ARP);
@@ -443,7 +448,7 @@ int __netpoll_rx(struct sk_buff *skb)
goto out;
/* check if netpoll clients need ARP */
- if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+ if (skb->protocol == htons(ETH_P_ARP) &&
atomic_read(&trapped)) {
skb_queue_tail(&npi->arp_tx, skb);
return 1;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4b01496dc33..b92a322872a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,14 +164,11 @@
#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n"
-/* #define PG_DEBUG(a) a */
-#define PG_DEBUG(a)
-
/* The buckets are exponential in 'width' */
#define LAT_BUCKETS_MAX 32
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
-#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100)
+#define MPLS_STACK_BOTTOM htonl(0x00000100)
/* Device flag bits */
#define F_IPSRC_RND (1<<0) /* IP-Src Random */
@@ -214,15 +211,11 @@ struct flow_state {
};
struct pktgen_dev {
-
/*
* Try to keep frequent/infrequent used vars. separated.
*/
-
- char ifname[IFNAMSIZ];
- char result[512];
-
- struct pktgen_thread *pg_thread; /* the owner */
+ struct proc_dir_entry *entry; /* proc file */
+ struct pktgen_thread *pg_thread;/* the owner */
struct list_head list; /* Used for chaining in the thread's run-queue */
int running; /* if this changes to false, the test will stop */
@@ -349,6 +342,8 @@ struct pktgen_dev {
unsigned cflows; /* Concurrent flows (config) */
unsigned lflow; /* Flow length (config) */
unsigned nflows; /* accumulated flows (stats) */
+
+ char result[512];
};
struct pktgen_hdr {
@@ -468,17 +463,6 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
return tmp;
}
-static inline u32 pktgen_random(void)
-{
-#if 0
- __u32 n;
- get_random_bytes(&n, 4);
- return n;
-#else
- return net_random();
-#endif
-}
-
static inline __u64 getCurMs(void)
{
struct timeval tv;
@@ -512,7 +496,7 @@ static void pktgen_stop_all_threads_ifs(void);
static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
-static int pktgen_mark_device(const char *ifname);
+
static unsigned int scan_ip6(const char *s, char ip[16]);
static unsigned int fmt_ip6(char *s, const char ip[16]);
@@ -606,7 +590,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
" frags: %d delay: %u clone_skb: %d ifname: %s\n",
pkt_dev->nfrags,
1000 * pkt_dev->delay_us + pkt_dev->delay_ns,
- pkt_dev->clone_skb, pkt_dev->ifname);
+ pkt_dev->clone_skb, pkt_dev->odev->name);
seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows,
pkt_dev->lflow);
@@ -661,7 +645,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->nr_labels) {
unsigned i;
seq_printf(seq, " mpls: ");
- for(i = 0; i < pkt_dev->nr_labels; i++)
+ for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
i == pkt_dev->nr_labels-1 ? "\n" : ", ");
}
@@ -766,7 +750,7 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32
int i = 0;
*num = 0;
- for(; i < maxlen; i++) {
+ for (; i < maxlen; i++) {
char c;
*num <<= 4;
if (get_user(c, &user_buffer[i]))
@@ -802,7 +786,7 @@ static int count_trail_chars(const char __user * user_buffer,
break;
default:
goto done;
- };
+ }
}
done:
return i;
@@ -845,7 +829,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
break;
default:
break;
- };
+ }
}
done_str:
return i;
@@ -874,7 +858,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
n++;
if (n >= MAX_MPLS_LABELS)
return -E2BIG;
- } while(c == ',');
+ } while (c == ',');
pkt_dev->nr_labels = n;
return i;
@@ -1503,7 +1487,7 @@ static ssize_t pktgen_if_write(struct file *file,
if (len < 0) { return len; }
i += len;
offset = sprintf(pg_result, "OK: mpls=");
- for(n = 0; n < pkt_dev->nr_labels; n++)
+ for (n = 0; n < pkt_dev->nr_labels; n++)
offset += sprintf(pg_result + offset,
"%08x%s", ntohl(pkt_dev->labels[n]),
n == pkt_dev->nr_labels-1 ? "" : ",");
@@ -1697,13 +1681,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
- seq_printf(seq, "%s ", pkt_dev->ifname);
+ seq_printf(seq, "%s ", pkt_dev->odev->name);
seq_printf(seq, "\nStopped: ");
list_for_each_entry(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
- seq_printf(seq, "%s ", pkt_dev->ifname);
+ seq_printf(seq, "%s ", pkt_dev->odev->name);
if (t->result[0])
seq_printf(seq, "\nResult: %s\n", t->result);
@@ -1849,16 +1833,14 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
/*
* mark a device for removal
*/
-static int pktgen_mark_device(const char *ifname)
+static void pktgen_mark_device(const char *ifname)
{
struct pktgen_dev *pkt_dev = NULL;
const int max_tries = 10, msec_per_try = 125;
int i = 0;
- int ret = 0;
mutex_lock(&pktgen_thread_lock);
- PG_DEBUG(printk("pktgen: pktgen_mark_device marking %s for removal\n",
- ifname));
+ pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
while (1) {
@@ -1867,8 +1849,8 @@ static int pktgen_mark_device(const char *ifname)
break; /* success */
mutex_unlock(&pktgen_thread_lock);
- PG_DEBUG(printk("pktgen: pktgen_mark_device waiting for %s "
- "to disappear....\n", ifname));
+ pr_debug("pktgen: pktgen_mark_device waiting for %s "
+ "to disappear....\n", ifname);
schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
mutex_lock(&pktgen_thread_lock);
@@ -1876,79 +1858,91 @@ static int pktgen_mark_device(const char *ifname)
printk("pktgen_mark_device: timed out after waiting "
"%d msec for device %s to be removed\n",
msec_per_try * i, ifname);
- ret = 1;
break;
}
}
mutex_unlock(&pktgen_thread_lock);
+}
- return ret;
+static void pktgen_change_name(struct net_device *dev)
+{
+ struct pktgen_thread *t;
+
+ list_for_each_entry(t, &pktgen_threads, th_list) {
+ struct pktgen_dev *pkt_dev;
+
+ list_for_each_entry(pkt_dev, &t->if_list, list) {
+ if (pkt_dev->odev != dev)
+ continue;
+
+ remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+
+ pkt_dev->entry = create_proc_entry(dev->name, 0600,
+ pg_proc_dir);
+ if (!pkt_dev->entry)
+ printk(KERN_ERR "pktgen: can't move proc "
+ " entry for '%s'\n", dev->name);
+ break;
+ }
+ }
}
static int pktgen_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = (struct net_device *)(ptr);
+ struct net_device *dev = ptr;
/* It is OK that we do not hold the group lock right now,
* as we run under the RTNL lock.
*/
switch (event) {
- case NETDEV_CHANGEADDR:
- case NETDEV_GOING_DOWN:
- case NETDEV_DOWN:
- case NETDEV_UP:
- /* Ignore for now */
+ case NETDEV_CHANGENAME:
+ pktgen_change_name(dev);
break;
case NETDEV_UNREGISTER:
pktgen_mark_device(dev->name);
break;
- };
+ }
return NOTIFY_DONE;
}
/* Associate pktgen_dev with a device. */
-static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev)
+static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
{
struct net_device *odev;
+ int err;
/* Clean old setups */
-
if (pkt_dev->odev) {
dev_put(pkt_dev->odev);
pkt_dev->odev = NULL;
}
- odev = dev_get_by_name(pkt_dev->ifname);
-
+ odev = dev_get_by_name(ifname);
if (!odev) {
- printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname);
- goto out;
+ printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+ return -ENODEV;
}
+
if (odev->type != ARPHRD_ETHER) {
- printk("pktgen: not an ethernet device: \"%s\"\n",
- pkt_dev->ifname);
- goto out_put;
- }
- if (!netif_running(odev)) {
- printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname);
- goto out_put;
+ printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
+ err = -EINVAL;
+ } else if (!netif_running(odev)) {
+ printk("pktgen: device is down: \"%s\"\n", ifname);
+ err = -ENETDOWN;
+ } else {
+ pkt_dev->odev = odev;
+ return 0;
}
- pkt_dev->odev = odev;
- return pkt_dev->odev;
-
-out_put:
dev_put(odev);
-out:
- return NULL;
-
+ return err;
}
/* Read pkt_dev from the interface and set up internal pktgen_dev
@@ -1956,10 +1950,6 @@ out:
*/
static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
{
- /* Try once more, just in case it works now. */
- if (!pkt_dev->odev)
- pktgen_setup_dev(pkt_dev);
-
if (!pkt_dev->odev) {
printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
sprintf(pkt_dev->result,
@@ -2096,7 +2086,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
int flow = 0;
if (pkt_dev->cflows) {
- flow = pktgen_random() % pkt_dev->cflows;
+ flow = random32() % pkt_dev->cflows;
if (pkt_dev->flows[flow].count > pkt_dev->lflow)
pkt_dev->flows[flow].count = 0;
@@ -2108,7 +2098,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACSRC_RND)
- mc = pktgen_random() % (pkt_dev->src_mac_count);
+ mc = random32() % pkt_dev->src_mac_count;
else {
mc = pkt_dev->cur_src_mac_offset++;
if (pkt_dev->cur_src_mac_offset >
@@ -2134,7 +2124,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACDST_RND)
- mc = pktgen_random() % (pkt_dev->dst_mac_count);
+ mc = random32() % pkt_dev->dst_mac_count;
else {
mc = pkt_dev->cur_dst_mac_offset++;
@@ -2158,27 +2148,26 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->flags & F_MPLS_RND) {
unsigned i;
- for(i = 0; i < pkt_dev->nr_labels; i++)
+ for (i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
- ((__force __be32)pktgen_random() &
+ ((__force __be32)random32() &
htonl(0x000fffff));
}
if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
- pkt_dev->vlan_id = pktgen_random() % 4096;
+ pkt_dev->vlan_id = random32() & (4096-1);
}
if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
- pkt_dev->svlan_id = pktgen_random() % 4096;
+ pkt_dev->svlan_id = random32() & (4096 - 1);
}
if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
if (pkt_dev->flags & F_UDPSRC_RND)
- pkt_dev->cur_udp_src =
- ((pktgen_random() %
- (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) +
- pkt_dev->udp_src_min);
+ pkt_dev->cur_udp_src = random32() %
+ (pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+ + pkt_dev->udp_src_min;
else {
pkt_dev->cur_udp_src++;
@@ -2189,10 +2178,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
if (pkt_dev->flags & F_UDPDST_RND) {
- pkt_dev->cur_udp_dst =
- ((pktgen_random() %
- (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) +
- pkt_dev->udp_dst_min);
+ pkt_dev->cur_udp_dst = random32() %
+ (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+ + pkt_dev->udp_dst_min;
} else {
pkt_dev->cur_udp_dst++;
if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
@@ -2207,7 +2195,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
saddr_max))) {
__u32 t;
if (pkt_dev->flags & F_IPSRC_RND)
- t = ((pktgen_random() % (imx - imn)) + imn);
+ t = random32() % (imx - imn) + imn;
else {
t = ntohl(pkt_dev->cur_saddr);
t++;
@@ -2228,14 +2216,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__be32 s;
if (pkt_dev->flags & F_IPDST_RND) {
- t = pktgen_random() % (imx - imn) + imn;
+ t = random32() % (imx - imn) + imn;
s = htonl(t);
while (LOOPBACK(s) || MULTICAST(s)
|| BADCLASS(s) || ZERONET(s)
|| LOCAL_MCAST(s)) {
- t = (pktgen_random() %
- (imx - imn)) + imn;
+ t = random32() % (imx - imn) + imn;
s = htonl(t);
}
pkt_dev->cur_daddr = s;
@@ -2267,7 +2254,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < 4; i++) {
pkt_dev->cur_in6_daddr.s6_addr32[i] =
- (((__force __be32)pktgen_random() |
+ (((__force __be32)random32() |
pkt_dev->min_in6_daddr.s6_addr32[i]) &
pkt_dev->max_in6_daddr.s6_addr32[i]);
}
@@ -2277,9 +2264,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
__u32 t;
if (pkt_dev->flags & F_TXSIZE_RND) {
- t = ((pktgen_random() %
- (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size))
- + pkt_dev->min_pkt_size);
+ t = random32() %
+ (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+ + pkt_dev->min_pkt_size;
} else {
t = pkt_dev->cur_pkt_size + 1;
if (t > pkt_dev->max_pkt_size)
@@ -2294,7 +2281,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
{
unsigned i;
- for(i = 0; i < pkt_dev->nr_labels; i++) {
+ for (i = 0; i < pkt_dev->nr_labels; i++) {
*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
}
mpls--;
@@ -2316,7 +2303,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
int datalen, iplen;
struct iphdr *iph;
struct pktgen_hdr *pgh = NULL;
- __be16 protocol = __constant_htons(ETH_P_IP);
+ __be16 protocol = htons(ETH_P_IP);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
@@ -2325,10 +2312,10 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
if (pkt_dev->nr_labels)
- protocol = __constant_htons(ETH_P_MPLS_UC);
+ protocol = htons(ETH_P_MPLS_UC);
if (pkt_dev->vlan_id != 0xffff)
- protocol = __constant_htons(ETH_P_8021Q);
+ protocol = htons(ETH_P_8021Q);
/* Update any of the values, used when we're incrementing various
* fields.
@@ -2354,24 +2341,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mpls_push(mpls, pkt_dev);
if (pkt_dev->vlan_id != 0xffff) {
- if(pkt_dev->svlan_id != 0xffff) {
+ if (pkt_dev->svlan_id != 0xffff) {
svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*svlan_tci = build_tci(pkt_dev->svlan_id,
pkt_dev->svlan_cfi,
pkt_dev->svlan_p);
svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+ *svlan_encapsulated_proto = htons(ETH_P_8021Q);
}
vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*vlan_tci = build_tci(pkt_dev->vlan_id,
pkt_dev->vlan_cfi,
pkt_dev->vlan_p);
vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *vlan_encapsulated_proto = __constant_htons(ETH_P_IP);
+ *vlan_encapsulated_proto = htons(ETH_P_IP);
}
- iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
- udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+ skb->network_header = skb->tail;
+ skb->transport_header = skb->network_header + sizeof(struct iphdr);
+ skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+
+ iph = ip_hdr(skb);
+ udph = udp_hdr(skb);
memcpy(eth, pkt_dev->hh, 12);
*(__be16 *) & eth[12] = protocol;
@@ -2400,12 +2391,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->check = 0;
iph->check = ip_fast_csum((void *)iph, iph->ihl);
skb->protocol = protocol;
- skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
- VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+ skb->mac_header = (skb->network_header - ETH_HLEN -
+ pkt_dev->nr_labels * sizeof(u32) -
+ VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- skb->nh.iph = iph;
- skb->h.uh = udph;
if (pkt_dev->nfrags <= 0)
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2654,7 +2644,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
int datalen;
struct ipv6hdr *iph;
struct pktgen_hdr *pgh = NULL;
- __be16 protocol = __constant_htons(ETH_P_IPV6);
+ __be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
@@ -2662,10 +2652,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
if (pkt_dev->nr_labels)
- protocol = __constant_htons(ETH_P_MPLS_UC);
+ protocol = htons(ETH_P_MPLS_UC);
if (pkt_dev->vlan_id != 0xffff)
- protocol = __constant_htons(ETH_P_8021Q);
+ protocol = htons(ETH_P_8021Q);
/* Update any of the values, used when we're incrementing various
* fields.
@@ -2690,24 +2680,28 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
mpls_push(mpls, pkt_dev);
if (pkt_dev->vlan_id != 0xffff) {
- if(pkt_dev->svlan_id != 0xffff) {
+ if (pkt_dev->svlan_id != 0xffff) {
svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*svlan_tci = build_tci(pkt_dev->svlan_id,
pkt_dev->svlan_cfi,
pkt_dev->svlan_p);
svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+ *svlan_encapsulated_proto = htons(ETH_P_8021Q);
}
vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
*vlan_tci = build_tci(pkt_dev->vlan_id,
pkt_dev->vlan_cfi,
pkt_dev->vlan_p);
vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
- *vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6);
+ *vlan_encapsulated_proto = htons(ETH_P_IPV6);
}
- iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
- udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+ skb->network_header = skb->tail;
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+
+ iph = ipv6_hdr(skb);
+ udph = udp_hdr(skb);
memcpy(eth, pkt_dev->hh, 12);
*(__be16 *) & eth[12] = protocol;
@@ -2729,7 +2723,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
udph->len = htons(datalen + sizeof(struct udphdr));
udph->check = 0; /* No checksum */
- *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */
+ *(__be32 *) iph = htonl(0x60000000); /* Version + flow */
if (pkt_dev->traffic_class) {
/* Version + traffic class + flow (0) */
@@ -2744,13 +2738,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
- skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
- VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+ skb->mac_header = (skb->network_header - ETH_HLEN -
+ pkt_dev->nr_labels * sizeof(u32) -
+ VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
skb->protocol = protocol;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- skb->nh.ipv6h = iph;
- skb->h.uh = udph;
if (pkt_dev->nfrags <= 0)
pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2848,7 +2841,7 @@ static void pktgen_run(struct pktgen_thread *t)
struct pktgen_dev *pkt_dev;
int started = 0;
- PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t));
+ pr_debug("pktgen: entering pktgen_run. %p\n", t);
if_lock(t);
list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -2880,7 +2873,7 @@ static void pktgen_stop_all_threads_ifs(void)
{
struct pktgen_thread *t;
- PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads_ifs.\n"));
+ pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
mutex_lock(&pktgen_thread_lock);
@@ -2948,7 +2941,7 @@ static void pktgen_run_all_threads(void)
{
struct pktgen_thread *t;
- PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n"));
+ pr_debug("pktgen: entering pktgen_run_all_threads.\n");
mutex_lock(&pktgen_thread_lock);
@@ -3006,7 +2999,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
if (!pkt_dev->running) {
printk("pktgen: interface: %s is already stopped\n",
- pkt_dev->ifname);
+ pkt_dev->odev->name);
return -EINVAL;
}
@@ -3040,7 +3033,7 @@ static void pktgen_stop(struct pktgen_thread *t)
{
struct pktgen_dev *pkt_dev;
- PG_DEBUG(printk("pktgen: entering pktgen_stop\n"));
+ pr_debug("pktgen: entering pktgen_stop\n");
if_lock(t);
@@ -3064,7 +3057,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
struct list_head *q, *n;
struct pktgen_dev *cur;
- PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n"));
+ pr_debug("pktgen: entering pktgen_rem_one_if\n");
if_lock(t);
@@ -3093,7 +3086,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
/* Remove all devices, free mem */
- PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n"));
+ pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
@@ -3276,7 +3269,7 @@ static int pktgen_thread_worker(void *arg)
t->pid = current->pid;
- PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid));
+ pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid);
max_before_softirq = t->max_before_softirq;
@@ -3339,13 +3332,13 @@ static int pktgen_thread_worker(void *arg)
set_current_state(TASK_INTERRUPTIBLE);
}
- PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm));
+ pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
- PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm));
+ pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
pktgen_rem_all_ifs(t);
- PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm));
+ pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
pktgen_rem_thread(t);
return 0;
@@ -3358,13 +3351,13 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
if_lock(t);
list_for_each_entry(p, &t->if_list, list)
- if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) {
+ if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) {
pkt_dev = p;
break;
}
if_unlock(t);
- PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev));
+ pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
return pkt_dev;
}
@@ -3399,7 +3392,7 @@ out:
static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
{
struct pktgen_dev *pkt_dev;
- struct proc_dir_entry *pe;
+ int err;
/* We don't allow a device to be on several threads */
@@ -3441,29 +3434,28 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
- strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
+ err = pktgen_setup_dev(pkt_dev, ifname);
+ if (err)
+ goto out1;
- if (!pktgen_setup_dev(pkt_dev)) {
- printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
- kfree(pkt_dev);
- return -ENODEV;
- }
-
- pe = create_proc_entry(ifname, 0600, pg_proc_dir);
- if (!pe) {
+ pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
+ if (!pkt_dev->entry) {
printk("pktgen: cannot create %s/%s procfs entry.\n",
PG_PROC_DIR, ifname);
- if (pkt_dev->flows)
- vfree(pkt_dev->flows);
- kfree(pkt_dev);
- return -EINVAL;
+ err = -EINVAL;
+ goto out2;
}
- pe->proc_fops = &pktgen_if_fops;
- pe->data = pkt_dev;
+ pkt_dev->entry->proc_fops = &pktgen_if_fops;
+ pkt_dev->entry->data = pkt_dev;
return add_dev_to_thread(t, pkt_dev);
+out2:
+ dev_put(pkt_dev->odev);
+out1:
+ if (pkt_dev->flows)
+ vfree(pkt_dev->flows);
+ kfree(pkt_dev);
+ return err;
}
static int __init pktgen_create_thread(int cpu)
@@ -3533,7 +3525,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
struct pktgen_dev *pkt_dev)
{
- PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev));
+ pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
if (pkt_dev->running) {
printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
@@ -3551,9 +3543,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
_rem_dev_from_if_list(t, pkt_dev);
- /* Clean up proc file system */
-
- remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
+ if (pkt_dev->entry)
+ remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
if (pkt_dev->flows)
vfree(pkt_dev->flows);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 33ea8eac7fe..8c971a2efe2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,11 +50,13 @@
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
-#include <net/netlink.h>
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
+#include <net/rtnetlink.h>
+
+struct rtnl_link
+{
+ rtnl_doit_func doit;
+ rtnl_dumpit_func dumpit;
+};
static DEFINE_MUTEX(rtnl_mutex);
static struct sock *rtnl;
@@ -95,7 +97,151 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
return 0;
}
-struct rtnetlink_link * rtnetlink_links[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+
+static inline int rtm_msgindex(int msgtype)
+{
+ int msgindex = msgtype - RTM_BASE;
+
+ /*
+ * msgindex < 0 implies someone tried to register a netlink
+ * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
+ * the message type has not been added to linux/rtnetlink.h
+ */
+ BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
+
+ return msgindex;
+}
+
+static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
+{
+ struct rtnl_link *tab;
+
+ tab = rtnl_msg_handlers[protocol];
+ if (tab == NULL || tab[msgindex].doit == NULL)
+ tab = rtnl_msg_handlers[PF_UNSPEC];
+
+ return tab ? tab[msgindex].doit : NULL;
+}
+
+static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
+{
+ struct rtnl_link *tab;
+
+ tab = rtnl_msg_handlers[protocol];
+ if (tab == NULL || tab[msgindex].dumpit == NULL)
+ tab = rtnl_msg_handlers[PF_UNSPEC];
+
+ return tab ? tab[msgindex].dumpit : NULL;
+}
+
+/**
+ * __rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_register(int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+ struct rtnl_link *tab;
+ int msgindex;
+
+ BUG_ON(protocol < 0 || protocol >= NPROTO);
+ msgindex = rtm_msgindex(msgtype);
+
+ tab = rtnl_msg_handlers[protocol];
+ if (tab == NULL) {
+ tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+ if (tab == NULL)
+ return -ENOBUFS;
+
+ rtnl_msg_handlers[protocol] = tab;
+ }
+
+ if (doit)
+ tab[msgindex].doit = doit;
+
+ if (dumpit)
+ tab[msgindex].dumpit = dumpit;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register - Register a rtnetlink message type
+ *
+ * Identical to __rtnl_register() but panics on failure. This is useful
+ * as failure of this function is very unlikely, it can only happen due
+ * to lack of memory when allocating the chain to store all message
+ * handlers for a protocol. Meant for use in init functions where lack
+ * of memory implies no sense in continueing.
+ */
+void rtnl_register(int protocol, int msgtype,
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+ if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+ panic("Unable to register rtnetlink message handler, "
+ "protocol = %d, message type = %d\n",
+ protocol, msgtype);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_register);
+
+/**
+ * rtnl_unregister - Unregister a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_unregister(int protocol, int msgtype)
+{
+ int msgindex;
+
+ BUG_ON(protocol < 0 || protocol >= NPROTO);
+ msgindex = rtm_msgindex(msgtype);
+
+ if (rtnl_msg_handlers[protocol] == NULL)
+ return -ENOENT;
+
+ rtnl_msg_handlers[protocol][msgindex].doit = NULL;
+ rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister);
+
+/**
+ * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
+ * @protocol : Protocol family or PF_UNSPEC
+ *
+ * Identical to calling rtnl_unregster() for all registered message types
+ * of a certain protocol family.
+ */
+void rtnl_unregister_all(int protocol)
+{
+ BUG_ON(protocol < 0 || protocol >= NPROTO);
+
+ kfree(rtnl_msg_handlers[protocol]);
+ rtnl_msg_handlers[protocol] = NULL;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister_all);
static const int rtm_min[RTM_NR_FAMILIES] =
{
@@ -249,7 +395,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
operstate == IF_OPER_UNKNOWN)
operstate = IF_OPER_DORMANT;
break;
- };
+ }
if (dev->operstate != operstate) {
write_lock_bh(&dev_base_lock);
@@ -393,16 +539,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
int s_idx = cb->args[0];
struct net_device *dev;
- read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
break;
+cont:
+ idx++;
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
return skb->len;
@@ -536,17 +683,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
modified = 1;
}
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- if (tb[IFLA_WIRELESS]) {
- /* Call Wireless Extensions.
- * Various stuff checked in there... */
- err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
- nla_len(tb[IFLA_WIRELESS]));
- if (err < 0)
- goto errout_dev;
- }
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
if (tb[IFLA_BROADCAST]) {
nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
send_addr_notify = 1;
@@ -610,22 +746,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
} else
return -EINVAL;
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
- if (tb[IFLA_WIRELESS]) {
- /* Call Wireless Extensions. We need to know the size before
- * we can alloc. Various stuff checked in there... */
- err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
- nla_len(tb[IFLA_WIRELESS]),
- &iw_buf, &iw_buf_len);
- if (err < 0)
- goto errout;
-
- /* Payload is at an offset in buffer */
- iw = iw_buf + IW_EV_POINT_OFF;
- }
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
if (nskb == NULL) {
err = -ENOBUFS;
@@ -659,12 +779,12 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
- if (rtnetlink_links[idx] == NULL ||
- rtnetlink_links[idx][type].dumpit == NULL)
+ if (rtnl_msg_handlers[idx] == NULL ||
+ rtnl_msg_handlers[idx][type].dumpit == NULL)
continue;
if (idx > s_idx)
memset(&cb->args[0], 0, sizeof(cb->args));
- if (rtnetlink_links[idx][type].dumpit(skb, cb))
+ if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
break;
}
cb->family = idx;
@@ -700,30 +820,18 @@ static int rtattr_max;
/* Process one rtnetlink message. */
-static __inline__ int
-rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct rtnetlink_link *link;
- struct rtnetlink_link *link_tab;
+ rtnl_doit_func doit;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
- /* Only requests are handled by kernel now */
- if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
- return 0;
-
type = nlh->nlmsg_type;
-
- /* A control message: ignore them */
- if (type < RTM_BASE)
- return 0;
-
- /* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
- goto err_inval;
+ return -EOPNOTSUPP;
type -= RTM_BASE;
@@ -732,45 +840,33 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
- if (family >= NPROTO) {
- *errp = -EAFNOSUPPORT;
- return -1;
- }
-
- link_tab = rtnetlink_links[family];
- if (link_tab == NULL)
- link_tab = rtnetlink_links[PF_UNSPEC];
- link = &link_tab[type];
+ if (family >= NPROTO)
+ return -EAFNOSUPPORT;
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
- *errp = -EPERM;
- return -1;
- }
+ if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
- if (link->dumpit == NULL)
- link = &(rtnetlink_links[PF_UNSPEC][type]);
-
- if (link->dumpit == NULL)
- goto err_inval;
+ rtnl_dumpit_func dumpit;
- if ((*errp = netlink_dump_start(rtnl, skb, nlh,
- link->dumpit, NULL)) != 0) {
- return -1;
- }
+ dumpit = rtnl_get_dumpit(family, type);
+ if (dumpit == NULL)
+ return -EOPNOTSUPP;
- netlink_queue_skip(nlh, skb);
- return -1;
+ __rtnl_unlock();
+ err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+ rtnl_lock();
+ return err;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
- goto err_inval;
+ return -EINVAL;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -780,25 +876,18 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
- goto err_inval;
+ return -EINVAL;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
- if (link->doit == NULL)
- link = &(rtnetlink_links[PF_UNSPEC][type]);
- if (link->doit == NULL)
- goto err_inval;
- err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+ doit = rtnl_get_doit(family, type);
+ if (doit == NULL)
+ return -EOPNOTSUPP;
- *errp = err;
- return err;
-
-err_inval:
- *errp = -EINVAL;
- return -1;
+ return doit(skb, nlh, (void *)&rta_buf[0]);
}
static void rtnetlink_rcv(struct sock *sk, int len)
@@ -814,25 +903,6 @@ static void rtnetlink_rcv(struct sock *sk, int len)
} while (qlen);
}
-static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
- [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink,
- .dumpit = rtnl_dump_ifinfo },
- [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all },
- [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all },
- [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
- [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
- [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
-#ifdef CONFIG_FIB_RULES
- [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule },
- [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule },
-#endif
- [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all },
- [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
- [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
-};
-
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
@@ -874,19 +944,22 @@ void __init rtnetlink_init(void)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
- THIS_MODULE);
+ &rtnl_mutex, THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
- rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
- rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+
+ rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
+
+ rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+ rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
}
EXPORT_SYMBOL(__rta_fill);
EXPORT_SYMBOL(rtattr_strlcpy);
EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(rtnetlink_links);
EXPORT_SYMBOL(rtnetlink_put_metrics);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_trylock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 336958fbbcb..142257307fa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
+#include <linux/scatterlist.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -87,8 +88,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
- "data:%p tail:%p end:%p dev:%s\n",
- here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ "data:%p tail:%#lx end:%#lx dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data,
+ (unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
@@ -105,8 +107,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
- "data:%p tail:%p end:%p dev:%s\n",
- here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ "data:%p tail:%#lx end:%#lx dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data,
+ (unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
@@ -155,20 +158,22 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
if (!skb)
goto out;
- /* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
gfp_mask, node);
if (!data)
goto nodata;
- memset(skb, 0, offsetof(struct sk_buff, truesize));
+ /*
+ * See comment in sk_buff definition, just before the 'tail' member
+ */
+ memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = size + sizeof(struct sk_buff);
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
- skb->tail = data;
- skb->end = data + size;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
atomic_set(&shinfo->dataref, 1);
@@ -299,7 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
if (atomic_dec_and_test(fclone_ref))
kmem_cache_free(skbuff_fclone_cache, other);
break;
- };
+ }
}
/**
@@ -321,15 +326,13 @@ void __kfree_skb(struct sk_buff *skb)
WARN_ON(in_irq());
skb->destructor(skb);
}
-#ifdef CONFIG_NETFILTER
- nf_conntrack_put(skb->nfct);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put(skb->nfct);
nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
-#endif
/* XXX: IS this still necessary? - JHS */
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
@@ -396,9 +399,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->sk = NULL;
C(tstamp);
C(dev);
- C(h);
- C(nh);
- C(mac);
+ C(transport_header);
+ C(network_header);
+ C(mac_header);
C(dst);
dst_clone(skb->dst);
C(sp);
@@ -422,19 +425,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
C(protocol);
n->destructor = NULL;
C(mark);
-#ifdef CONFIG_NETFILTER
- C(nfct);
- nf_conntrack_get(skb->nfct);
- C(nfctinfo);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- C(nfct_reasm);
- nf_conntrack_get_reasm(skb->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- C(nf_bridge);
- nf_bridge_get(skb->nf_bridge);
-#endif
-#endif /*CONFIG_NETFILTER*/
+ __nf_copy(n, skb);
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
@@ -460,11 +451,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
/*
* Shift between the two data areas in bytes
*/
unsigned long offset = new->data - old->data;
-
+#endif
new->sk = NULL;
new->dev = old->dev;
new->priority = old->priority;
@@ -473,9 +465,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_INET
new->sp = secpath_get(old->sp);
#endif
- new->h.raw = old->h.raw + offset;
- new->nh.raw = old->nh.raw + offset;
- new->mac.raw = old->mac.raw + offset;
+ new->transport_header = old->transport_header;
+ new->network_header = old->network_header;
+ new->mac_header = old->mac_header;
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+ /* {transport,network,mac}_header are relative to skb->head */
+ new->transport_header += offset;
+ new->network_header += offset;
+ new->mac_header += offset;
+#endif
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
new->fclone = SKB_FCLONE_UNAVAILABLE;
@@ -483,22 +481,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->tstamp = old->tstamp;
new->destructor = NULL;
new->mark = old->mark;
-#ifdef CONFIG_NETFILTER
- new->nfct = old->nfct;
- nf_conntrack_get(old->nfct);
- new->nfctinfo = old->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- new->nfct_reasm = old->nfct_reasm;
- nf_conntrack_get_reasm(old->nfct_reasm);
-#endif
+ __nf_copy(new, old);
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new->ipvs_property = old->ipvs_property;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- new->nf_bridge = old->nf_bridge;
- nf_bridge_get(old->nf_bridge);
-#endif
-#endif
#ifdef CONFIG_NET_SCHED
#ifdef CONFIG_NET_CLS_ACT
new->tc_verd = old->tc_verd;
@@ -535,8 +521,12 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
- gfp_mask);
+ struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ n = alloc_skb(skb->end + skb->data_len, gfp_mask);
+#else
+ n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+#endif
if (!n)
return NULL;
@@ -573,8 +563,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
-
+ struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ n = alloc_skb(skb->end, gfp_mask);
+#else
+ n = alloc_skb(skb->end - skb->head, gfp_mask);
+#endif
if (!n)
goto out;
@@ -583,7 +577,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
- memcpy(n->data, skb->data, n->len);
+ skb_copy_from_linear_data(skb, n->data, n->len);
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
@@ -632,7 +626,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
{
int i;
u8 *data;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ int size = nhead + skb->end + ntail;
+#else
int size = nhead + (skb->end - skb->head) + ntail;
+#endif
long off;
if (skb_shared(skb))
@@ -646,8 +644,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
/* Copy only real data... and, alas, header. This should be
* optimized for the cases when header is void. */
- memcpy(data + nhead, skb->head, skb->tail - skb->head);
- memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+ memcpy(data + nhead, skb->head,
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->tail);
+#else
+ skb->tail - skb->head);
+#endif
+ memcpy(data + size, skb_end_pointer(skb),
+ sizeof(struct skb_shared_info));
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page);
@@ -660,12 +664,18 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
off = (data + nhead) - skb->head;
skb->head = data;
- skb->end = data + size;
skb->data += off;
- skb->tail += off;
- skb->mac.raw += off;
- skb->h.raw += off;
- skb->nh.raw += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ skb->end = size;
+ off = nhead;
+#else
+ skb->end = skb->head + size;
+#endif
+ /* {transport,network,mac}_header and tail are relative to skb->head */
+ skb->tail += off;
+ skb->transport_header += off;
+ skb->network_header += off;
+ skb->mac_header += off;
skb->cloned = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
@@ -726,7 +736,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
*/
struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
gfp_mask);
+ int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
+ int off = 0;
if (!n)
return NULL;
@@ -736,7 +748,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/* Set the tail pointer and length */
skb_put(n, skb->len);
- head_copy_len = skb_headroom(skb);
+ head_copy_len = oldheadroom;
head_copy_off = 0;
if (newheadroom <= head_copy_len)
head_copy_len = newheadroom;
@@ -750,6 +762,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
copy_skb_header(n, skb);
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+ off = newheadroom - oldheadroom;
+#endif
+ n->transport_header += off;
+ n->network_header += off;
+ n->mac_header += off;
+
return n;
}
@@ -877,7 +896,7 @@ done:
} else {
skb->len = len;
skb->data_len = 0;
- skb->tail = skb->data + len;
+ skb_set_tail_pointer(skb, len);
}
return 0;
@@ -922,7 +941,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
return NULL;
}
- if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+ if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
BUG();
/* Optimization: no fragments, no reasons to preestimate
@@ -1018,7 +1037,7 @@ pull_pages:
skb->tail += delta;
skb->data_len -= delta;
- return skb->tail;
+ return skb_tail_pointer(skb);
}
/* Copy some data bits from skb to kernel buffer. */
@@ -1035,7 +1054,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
if ((copy = start - offset) > 0) {
if (copy > len)
copy = len;
- memcpy(to, skb->data + offset, copy);
+ skb_copy_from_linear_data_offset(skb, offset, to, copy);
if ((len -= copy) == 0)
return 0;
offset += copy;
@@ -1110,7 +1129,7 @@ fault:
* traversing fragment lists and such.
*/
-int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
{
int i, copy;
int start = skb_headlen(skb);
@@ -1121,7 +1140,7 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
if ((copy = start - offset) > 0) {
if (copy > len)
copy = len;
- memcpy(skb->data + offset, from, copy);
+ skb_copy_to_linear_data_offset(skb, offset, from, copy);
if ((len -= copy) == 0)
return 0;
offset += copy;
@@ -1348,13 +1367,13 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
long csstart;
if (skb->ip_summed == CHECKSUM_PARTIAL)
- csstart = skb->h.raw - skb->data;
+ csstart = skb->csum_start - skb_headroom(skb);
else
csstart = skb_headlen(skb);
BUG_ON(csstart > skb_headlen(skb));
- memcpy(to, skb->data, csstart);
+ skb_copy_from_linear_data(skb, to, csstart);
csum = 0;
if (csstart != skb->len)
@@ -1522,27 +1541,14 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
spin_unlock_irqrestore(&list->lock, flags);
}
-#if 0
-/*
- * Tune the memory allocator for a new MTU size.
- */
-void skb_add_mtu(int mtu)
-{
- /* Must match allocation in alloc_skb */
- mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
-
- kmem_add_cache_size(mtu);
-}
-#endif
-
static inline void skb_split_inside_header(struct sk_buff *skb,
struct sk_buff* skb1,
const u32 len, const int pos)
{
int i;
- memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
-
+ skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
+ pos - len);
/* And move data appendix as is. */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -1553,7 +1559,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
skb1->len += skb1->data_len;
skb->data_len = 0;
skb->len = len;
- skb->tail = skb->data + len;
+ skb_set_tail_pointer(skb, len);
}
static inline void skb_split_no_header(struct sk_buff *skb,
@@ -1878,7 +1884,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
unsigned int mss = skb_shinfo(skb)->gso_size;
- unsigned int doffset = skb->data - skb->mac.raw;
+ unsigned int doffset = skb->data - skb_mac_header(skb);
unsigned int offset = doffset;
unsigned int headroom;
unsigned int len;
@@ -1928,11 +1934,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
nskb->mac_len = skb->mac_len;
skb_reserve(nskb, headroom);
- nskb->mac.raw = nskb->data;
- nskb->nh.raw = nskb->data + skb->mac_len;
- nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
- memcpy(skb_put(nskb, doffset), skb->data, doffset);
-
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb, skb->mac_len);
+ nskb->transport_header = (nskb->network_header +
+ skb_network_header_len(skb));
+ skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
+ doffset);
if (!sg) {
nskb->csum = skb_copy_and_csum_bits(skb, offset,
skb_put(nskb, len),
@@ -1945,7 +1952,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum = skb->csum;
- memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+ skb_copy_from_linear_data_offset(skb, offset,
+ skb_put(nskb, hsize), hsize);
while (pos < offset + len) {
BUG_ON(i >= nfrags);
@@ -2005,6 +2013,190 @@ void __init skb_init(void)
NULL, NULL);
}
+/**
+ * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ * @skb: Socket buffer containing the buffers to be mapped
+ * @sg: The scatter-gather list to map into
+ * @offset: The offset into the buffer's contents to start mapping
+ * @len: Length of buffer space to be mapped
+ *
+ * Fill the specified scatter-gather list with mappings/pointers into a
+ * region of the buffer space attached to a socket buffer.
+ */
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ int elt = 0;
+
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ sg[elt].page = virt_to_page(skb->data + offset);
+ sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+ sg[elt].length = copy;
+ elt++;
+ if ((len -= copy) == 0)
+ return elt;
+ offset += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (copy > len)
+ copy = len;
+ sg[elt].page = frag->page;
+ sg[elt].offset = frag->page_offset+offset-start;
+ sg[elt].length = copy;
+ elt++;
+ if (!(len -= copy))
+ return elt;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+ if ((len -= copy) == 0)
+ return elt;
+ offset += copy;
+ }
+ start = end;
+ }
+ }
+ BUG_ON(len);
+ return elt;
+}
+
+/**
+ * skb_cow_data - Check that a socket buffer's data buffers are writable
+ * @skb: The socket buffer to check.
+ * @tailbits: Amount of trailing space to be added
+ * @trailer: Returned pointer to the skb where the @tailbits space begins
+ *
+ * Make sure that the data buffers attached to a socket buffer are
+ * writable. If they are not, private copies are made of the data buffers
+ * and the socket buffer is set to use these instead.
+ *
+ * If @tailbits is given, make sure that there is space to write @tailbits
+ * bytes of data beyond current end of socket buffer. @trailer will be
+ * set to point to the skb in which this space begins.
+ *
+ * The number of scatterlist elements required to completely map the
+ * COW'd and extended socket buffer will be returned.
+ */
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+ int copyflag;
+ int elt;
+ struct sk_buff *skb1, **skb_p;
+
+ /* If skb is cloned or its head is paged, reallocate
+ * head pulling out all the pages (pages are considered not writable
+ * at the moment even if they are anonymous).
+ */
+ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+ __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+ return -ENOMEM;
+
+ /* Easy case. Most of packets will go this way. */
+ if (!skb_shinfo(skb)->frag_list) {
+ /* A little of trouble, not enough of space for trailer.
+ * This should not happen, when stack is tuned to generate
+ * good frames. OK, on miss we reallocate and reserve even more
+ * space, 128 bytes is fair. */
+
+ if (skb_tailroom(skb) < tailbits &&
+ pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+ return -ENOMEM;
+
+ /* Voila! */
+ *trailer = skb;
+ return 1;
+ }
+
+ /* Misery. We are in troubles, going to mincer fragments... */
+
+ elt = 1;
+ skb_p = &skb_shinfo(skb)->frag_list;
+ copyflag = 0;
+
+ while ((skb1 = *skb_p) != NULL) {
+ int ntail = 0;
+
+ /* The fragment is partially pulled by someone,
+ * this can happen on input. Copy it and everything
+ * after it. */
+
+ if (skb_shared(skb1))
+ copyflag = 1;
+
+ /* If the skb is the last, worry about trailer. */
+
+ if (skb1->next == NULL && tailbits) {
+ if (skb_shinfo(skb1)->nr_frags ||
+ skb_shinfo(skb1)->frag_list ||
+ skb_tailroom(skb1) < tailbits)
+ ntail = tailbits + 128;
+ }
+
+ if (copyflag ||
+ skb_cloned(skb1) ||
+ ntail ||
+ skb_shinfo(skb1)->nr_frags ||
+ skb_shinfo(skb1)->frag_list) {
+ struct sk_buff *skb2;
+
+ /* Fuck, we are miserable poor guys... */
+ if (ntail == 0)
+ skb2 = skb_copy(skb1, GFP_ATOMIC);
+ else
+ skb2 = skb_copy_expand(skb1,
+ skb_headroom(skb1),
+ ntail,
+ GFP_ATOMIC);
+ if (unlikely(skb2 == NULL))
+ return -ENOMEM;
+
+ if (skb1->sk)
+ skb_set_owner_w(skb2, skb1->sk);
+
+ /* Looking around. Are we still alive?
+ * OK, link new skb, drop old one */
+
+ skb2->next = skb1->next;
+ *skb_p = skb2;
+ kfree_skb(skb1);
+ skb1 = skb2;
+ }
+ elt++;
+ *trailer = skb1;
+ skb_p = &skb1->next;
+ }
+
+ return elt;
+}
+
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(kfree_skb);
@@ -2039,3 +2231,6 @@ EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
EXPORT_SYMBOL(skb_append_datato_frags);
+
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 27c4f62382b..22183c2ef28 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = {
"sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
"sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
"sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
- "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+ "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
+ "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
};
static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -167,7 +168,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
"slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
"slock-27" , "slock-28" , "slock-29" ,
- "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+ "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
+ "slock-AF_RXRPC" , "slock-AF_MAX"
};
#endif
@@ -361,8 +363,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
}
#endif
- if(optlen<sizeof(int))
- return(-EINVAL);
+ if (optlen < sizeof(int))
+ return -EINVAL;
if (get_user(val, (int __user *)optval))
return -EFAULT;
@@ -371,265 +373,270 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
- switch(optname)
- {
- case SO_DEBUG:
- if(val && !capable(CAP_NET_ADMIN))
- {
- ret = -EACCES;
- }
- else if (valbool)
- sock_set_flag(sk, SOCK_DBG);
- else
- sock_reset_flag(sk, SOCK_DBG);
- break;
- case SO_REUSEADDR:
- sk->sk_reuse = valbool;
- break;
- case SO_TYPE:
- case SO_ERROR:
- ret = -ENOPROTOOPT;
- break;
- case SO_DONTROUTE:
- if (valbool)
- sock_set_flag(sk, SOCK_LOCALROUTE);
- else
- sock_reset_flag(sk, SOCK_LOCALROUTE);
- break;
- case SO_BROADCAST:
- sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
- break;
- case SO_SNDBUF:
- /* Don't error on this BSD doesn't and if you think
- about it this is right. Otherwise apps have to
- play 'guess the biggest size' games. RCVBUF/SNDBUF
- are treated in BSD as hints */
-
- if (val > sysctl_wmem_max)
- val = sysctl_wmem_max;
+ switch(optname) {
+ case SO_DEBUG:
+ if (val && !capable(CAP_NET_ADMIN)) {
+ ret = -EACCES;
+ }
+ else if (valbool)
+ sock_set_flag(sk, SOCK_DBG);
+ else
+ sock_reset_flag(sk, SOCK_DBG);
+ break;
+ case SO_REUSEADDR:
+ sk->sk_reuse = valbool;
+ break;
+ case SO_TYPE:
+ case SO_ERROR:
+ ret = -ENOPROTOOPT;
+ break;
+ case SO_DONTROUTE:
+ if (valbool)
+ sock_set_flag(sk, SOCK_LOCALROUTE);
+ else
+ sock_reset_flag(sk, SOCK_LOCALROUTE);
+ break;
+ case SO_BROADCAST:
+ sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+ break;
+ case SO_SNDBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+ play 'guess the biggest size' games. RCVBUF/SNDBUF
+ are treated in BSD as hints */
+
+ if (val > sysctl_wmem_max)
+ val = sysctl_wmem_max;
set_sndbuf:
- sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- if ((val * 2) < SOCK_MIN_SNDBUF)
- sk->sk_sndbuf = SOCK_MIN_SNDBUF;
- else
- sk->sk_sndbuf = val * 2;
+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+ if ((val * 2) < SOCK_MIN_SNDBUF)
+ sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+ else
+ sk->sk_sndbuf = val * 2;
- /*
- * Wake up sending tasks if we
- * upped the value.
- */
- sk->sk_write_space(sk);
- break;
+ /*
+ * Wake up sending tasks if we
+ * upped the value.
+ */
+ sk->sk_write_space(sk);
+ break;
- case SO_SNDBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
- goto set_sndbuf;
+ case SO_SNDBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+ goto set_sndbuf;
- case SO_RCVBUF:
- /* Don't error on this BSD doesn't and if you think
- about it this is right. Otherwise apps have to
- play 'guess the biggest size' games. RCVBUF/SNDBUF
- are treated in BSD as hints */
+ case SO_RCVBUF:
+ /* Don't error on this BSD doesn't and if you think
+ about it this is right. Otherwise apps have to
+ play 'guess the biggest size' games. RCVBUF/SNDBUF
+ are treated in BSD as hints */
- if (val > sysctl_rmem_max)
- val = sysctl_rmem_max;
+ if (val > sysctl_rmem_max)
+ val = sysctl_rmem_max;
set_rcvbuf:
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- * We double it on the way in to account for
- * "struct sk_buff" etc. overhead. Applications
- * assume that the SO_RCVBUF setting they make will
- * allow that much actual data to be received on that
- * socket.
- *
- * Applications are unaware that "struct sk_buff" and
- * other overheads allocate from the receive buffer
- * during socket buffer allocation.
- *
- * And after considering the possible alternatives,
- * returning the value we actually used in getsockopt
- * is the most desirable behavior.
- */
- if ((val * 2) < SOCK_MIN_RCVBUF)
- sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
- else
- sk->sk_rcvbuf = val * 2;
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+ /*
+ * We double it on the way in to account for
+ * "struct sk_buff" etc. overhead. Applications
+ * assume that the SO_RCVBUF setting they make will
+ * allow that much actual data to be received on that
+ * socket.
+ *
+ * Applications are unaware that "struct sk_buff" and
+ * other overheads allocate from the receive buffer
+ * during socket buffer allocation.
+ *
+ * And after considering the possible alternatives,
+ * returning the value we actually used in getsockopt
+ * is the most desirable behavior.
+ */
+ if ((val * 2) < SOCK_MIN_RCVBUF)
+ sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+ else
+ sk->sk_rcvbuf = val * 2;
+ break;
+
+ case SO_RCVBUFFORCE:
+ if (!capable(CAP_NET_ADMIN)) {
+ ret = -EPERM;
break;
+ }
+ goto set_rcvbuf;
- case SO_RCVBUFFORCE:
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
- goto set_rcvbuf;
-
- case SO_KEEPALIVE:
+ case SO_KEEPALIVE:
#ifdef CONFIG_INET
- if (sk->sk_protocol == IPPROTO_TCP)
- tcp_set_keepalive(sk, valbool);
+ if (sk->sk_protocol == IPPROTO_TCP)
+ tcp_set_keepalive(sk, valbool);
#endif
- sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
- break;
-
- case SO_OOBINLINE:
- sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+ break;
+
+ case SO_OOBINLINE:
+ sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+ break;
+
+ case SO_NO_CHECK:
+ sk->sk_no_check = valbool;
+ break;
+
+ case SO_PRIORITY:
+ if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+ sk->sk_priority = val;
+ else
+ ret = -EPERM;
+ break;
+
+ case SO_LINGER:
+ if (optlen < sizeof(ling)) {
+ ret = -EINVAL; /* 1003.1g */
break;
-
- case SO_NO_CHECK:
- sk->sk_no_check = valbool;
- break;
-
- case SO_PRIORITY:
- if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
- sk->sk_priority = val;
- else
- ret = -EPERM;
+ }
+ if (copy_from_user(&ling,optval,sizeof(ling))) {
+ ret = -EFAULT;
break;
-
- case SO_LINGER:
- if(optlen<sizeof(ling)) {
- ret = -EINVAL; /* 1003.1g */
- break;
- }
- if (copy_from_user(&ling,optval,sizeof(ling))) {
- ret = -EFAULT;
- break;
- }
- if (!ling.l_onoff)
- sock_reset_flag(sk, SOCK_LINGER);
- else {
+ }
+ if (!ling.l_onoff)
+ sock_reset_flag(sk, SOCK_LINGER);
+ else {
#if (BITS_PER_LONG == 32)
- if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
- sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
- else
+ if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+ sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+ else
#endif
- sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
- sock_set_flag(sk, SOCK_LINGER);
- }
- break;
-
- case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("setsockopt");
- break;
-
- case SO_PASSCRED:
- if (valbool)
- set_bit(SOCK_PASSCRED, &sock->flags);
+ sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+ sock_set_flag(sk, SOCK_LINGER);
+ }
+ break;
+
+ case SO_BSDCOMPAT:
+ sock_warn_obsolete_bsdism("setsockopt");
+ break;
+
+ case SO_PASSCRED:
+ if (valbool)
+ set_bit(SOCK_PASSCRED, &sock->flags);
+ else
+ clear_bit(SOCK_PASSCRED, &sock->flags);
+ break;
+
+ case SO_TIMESTAMP:
+ case SO_TIMESTAMPNS:
+ if (valbool) {
+ if (optname == SO_TIMESTAMP)
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
else
- clear_bit(SOCK_PASSCRED, &sock->flags);
- break;
+ sock_set_flag(sk, SOCK_RCVTSTAMPNS);
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
+ sock_enable_timestamp(sk);
+ } else {
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+ }
+ break;
- case SO_TIMESTAMP:
- if (valbool) {
- sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk);
- } else
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- break;
+ case SO_RCVLOWAT:
+ if (val < 0)
+ val = INT_MAX;
+ sk->sk_rcvlowat = val ? : 1;
+ break;
- case SO_RCVLOWAT:
- if (val < 0)
- val = INT_MAX;
- sk->sk_rcvlowat = val ? : 1;
- break;
+ case SO_RCVTIMEO:
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+ break;
- case SO_RCVTIMEO:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
- break;
+ case SO_SNDTIMEO:
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+ break;
- case SO_SNDTIMEO:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+#ifdef CONFIG_NETDEVICES
+ case SO_BINDTODEVICE:
+ {
+ char devname[IFNAMSIZ];
+
+ /* Sorry... */
+ if (!capable(CAP_NET_RAW)) {
+ ret = -EPERM;
break;
+ }
-#ifdef CONFIG_NETDEVICES
- case SO_BINDTODEVICE:
- {
- char devname[IFNAMSIZ];
+ /* Bind this socket to a particular device like "eth0",
+ * as specified in the passed interface name. If the
+ * name is "" or the option length is zero the socket
+ * is not bound.
+ */
- /* Sorry... */
- if (!capable(CAP_NET_RAW)) {
- ret = -EPERM;
+ if (!valbool) {
+ sk->sk_bound_dev_if = 0;
+ } else {
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+ memset(devname, 0, sizeof(devname));
+ if (copy_from_user(devname, optval, optlen)) {
+ ret = -EFAULT;
break;
}
- /* Bind this socket to a particular device like "eth0",
- * as specified in the passed interface name. If the
- * name is "" or the option length is zero the socket
- * is not bound.
- */
+ /* Remove any cached route for this socket. */
+ sk_dst_reset(sk);
- if (!valbool) {
+ if (devname[0] == '\0') {
sk->sk_bound_dev_if = 0;
} else {
- if (optlen > IFNAMSIZ - 1)
- optlen = IFNAMSIZ - 1;
- memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
- ret = -EFAULT;
+ struct net_device *dev = dev_get_by_name(devname);
+ if (!dev) {
+ ret = -ENODEV;
break;
}
-
- /* Remove any cached route for this socket. */
- sk_dst_reset(sk);
-
- if (devname[0] == '\0') {
- sk->sk_bound_dev_if = 0;
- } else {
- struct net_device *dev = dev_get_by_name(devname);
- if (!dev) {
- ret = -ENODEV;
- break;
- }
- sk->sk_bound_dev_if = dev->ifindex;
- dev_put(dev);
- }
+ sk->sk_bound_dev_if = dev->ifindex;
+ dev_put(dev);
}
- break;
}
+ break;
+ }
#endif
- case SO_ATTACH_FILTER:
- ret = -EINVAL;
- if (optlen == sizeof(struct sock_fprog)) {
- struct sock_fprog fprog;
+ case SO_ATTACH_FILTER:
+ ret = -EINVAL;
+ if (optlen == sizeof(struct sock_fprog)) {
+ struct sock_fprog fprog;
- ret = -EFAULT;
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
- break;
-
- ret = sk_attach_filter(&fprog, sk);
- }
- break;
-
- case SO_DETACH_FILTER:
- rcu_read_lock_bh();
- filter = rcu_dereference(sk->sk_filter);
- if (filter) {
- rcu_assign_pointer(sk->sk_filter, NULL);
- sk_filter_release(sk, filter);
- rcu_read_unlock_bh();
+ ret = -EFAULT;
+ if (copy_from_user(&fprog, optval, sizeof(fprog)))
break;
- }
+
+ ret = sk_attach_filter(&fprog, sk);
+ }
+ break;
+
+ case SO_DETACH_FILTER:
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter) {
+ rcu_assign_pointer(sk->sk_filter, NULL);
+ sk_filter_release(sk, filter);
rcu_read_unlock_bh();
- ret = -ENONET;
break;
+ }
+ rcu_read_unlock_bh();
+ ret = -ENONET;
+ break;
- case SO_PASSSEC:
- if (valbool)
- set_bit(SOCK_PASSSEC, &sock->flags);
- else
- clear_bit(SOCK_PASSSEC, &sock->flags);
- break;
+ case SO_PASSSEC:
+ if (valbool)
+ set_bit(SOCK_PASSSEC, &sock->flags);
+ else
+ clear_bit(SOCK_PASSSEC, &sock->flags);
+ break;
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
- default:
- ret = -ENOPROTOOPT;
- break;
+ default:
+ ret = -ENOPROTOOPT;
+ break;
}
release_sock(sk);
return ret;
@@ -641,8 +648,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
- union
- {
+ union {
int val;
struct linger ling;
struct timeval tm;
@@ -651,148 +657,153 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
unsigned int lv = sizeof(int);
int len;
- if(get_user(len,optlen))
+ if (get_user(len, optlen))
return -EFAULT;
- if(len < 0)
+ if (len < 0)
return -EINVAL;
- switch(optname)
- {
- case SO_DEBUG:
- v.val = sock_flag(sk, SOCK_DBG);
- break;
-
- case SO_DONTROUTE:
- v.val = sock_flag(sk, SOCK_LOCALROUTE);
- break;
-
- case SO_BROADCAST:
- v.val = !!sock_flag(sk, SOCK_BROADCAST);
- break;
-
- case SO_SNDBUF:
- v.val = sk->sk_sndbuf;
- break;
-
- case SO_RCVBUF:
- v.val = sk->sk_rcvbuf;
- break;
-
- case SO_REUSEADDR:
- v.val = sk->sk_reuse;
- break;
-
- case SO_KEEPALIVE:
- v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
- break;
-
- case SO_TYPE:
- v.val = sk->sk_type;
- break;
-
- case SO_ERROR:
- v.val = -sock_error(sk);
- if(v.val==0)
- v.val = xchg(&sk->sk_err_soft, 0);
- break;
-
- case SO_OOBINLINE:
- v.val = !!sock_flag(sk, SOCK_URGINLINE);
- break;
-
- case SO_NO_CHECK:
- v.val = sk->sk_no_check;
- break;
-
- case SO_PRIORITY:
- v.val = sk->sk_priority;
- break;
-
- case SO_LINGER:
- lv = sizeof(v.ling);
- v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
- v.ling.l_linger = sk->sk_lingertime / HZ;
- break;
-
- case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("getsockopt");
- break;
-
- case SO_TIMESTAMP:
- v.val = sock_flag(sk, SOCK_RCVTSTAMP);
- break;
+ switch(optname) {
+ case SO_DEBUG:
+ v.val = sock_flag(sk, SOCK_DBG);
+ break;
+
+ case SO_DONTROUTE:
+ v.val = sock_flag(sk, SOCK_LOCALROUTE);
+ break;
+
+ case SO_BROADCAST:
+ v.val = !!sock_flag(sk, SOCK_BROADCAST);
+ break;
+
+ case SO_SNDBUF:
+ v.val = sk->sk_sndbuf;
+ break;
+
+ case SO_RCVBUF:
+ v.val = sk->sk_rcvbuf;
+ break;
+
+ case SO_REUSEADDR:
+ v.val = sk->sk_reuse;
+ break;
+
+ case SO_KEEPALIVE:
+ v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+ break;
+
+ case SO_TYPE:
+ v.val = sk->sk_type;
+ break;
+
+ case SO_ERROR:
+ v.val = -sock_error(sk);
+ if (v.val==0)
+ v.val = xchg(&sk->sk_err_soft, 0);
+ break;
+
+ case SO_OOBINLINE:
+ v.val = !!sock_flag(sk, SOCK_URGINLINE);
+ break;
+
+ case SO_NO_CHECK:
+ v.val = sk->sk_no_check;
+ break;
+
+ case SO_PRIORITY:
+ v.val = sk->sk_priority;
+ break;
+
+ case SO_LINGER:
+ lv = sizeof(v.ling);
+ v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
+ v.ling.l_linger = sk->sk_lingertime / HZ;
+ break;
+
+ case SO_BSDCOMPAT:
+ sock_warn_obsolete_bsdism("getsockopt");
+ break;
+
+ case SO_TIMESTAMP:
+ v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+ !sock_flag(sk, SOCK_RCVTSTAMPNS);
+ break;
+
+ case SO_TIMESTAMPNS:
+ v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
+ break;
+
+ case SO_RCVTIMEO:
+ lv=sizeof(struct timeval);
+ if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+ v.tm.tv_sec = 0;
+ v.tm.tv_usec = 0;
+ } else {
+ v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+ v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+ }
+ break;
+
+ case SO_SNDTIMEO:
+ lv=sizeof(struct timeval);
+ if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+ v.tm.tv_sec = 0;
+ v.tm.tv_usec = 0;
+ } else {
+ v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+ v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+ }
+ break;
- case SO_RCVTIMEO:
- lv=sizeof(struct timeval);
- if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
- v.tm.tv_sec = 0;
- v.tm.tv_usec = 0;
- } else {
- v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
- v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
- }
- break;
+ case SO_RCVLOWAT:
+ v.val = sk->sk_rcvlowat;
+ break;
- case SO_SNDTIMEO:
- lv=sizeof(struct timeval);
- if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
- v.tm.tv_sec = 0;
- v.tm.tv_usec = 0;
- } else {
- v.tm.tv_sec = sk->sk_sndtimeo / HZ;
- v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
- }
- break;
+ case SO_SNDLOWAT:
+ v.val=1;
+ break;
- case SO_RCVLOWAT:
- v.val = sk->sk_rcvlowat;
- break;
+ case SO_PASSCRED:
+ v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+ break;
- case SO_SNDLOWAT:
- v.val=1;
- break;
+ case SO_PEERCRED:
+ if (len > sizeof(sk->sk_peercred))
+ len = sizeof(sk->sk_peercred);
+ if (copy_to_user(optval, &sk->sk_peercred, len))
+ return -EFAULT;
+ goto lenout;
- case SO_PASSCRED:
- v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
- break;
-
- case SO_PEERCRED:
- if (len > sizeof(sk->sk_peercred))
- len = sizeof(sk->sk_peercred);
- if (copy_to_user(optval, &sk->sk_peercred, len))
- return -EFAULT;
- goto lenout;
-
- case SO_PEERNAME:
- {
- char address[128];
-
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
- return -ENOTCONN;
- if (lv < len)
- return -EINVAL;
- if (copy_to_user(optval, address, len))
- return -EFAULT;
- goto lenout;
- }
+ case SO_PEERNAME:
+ {
+ char address[128];
+
+ if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ return -ENOTCONN;
+ if (lv < len)
+ return -EINVAL;
+ if (copy_to_user(optval, address, len))
+ return -EFAULT;
+ goto lenout;
+ }
- /* Dubious BSD thing... Probably nobody even uses it, but
- * the UNIX standard wants it for whatever reason... -DaveM
- */
- case SO_ACCEPTCONN:
- v.val = sk->sk_state == TCP_LISTEN;
- break;
+ /* Dubious BSD thing... Probably nobody even uses it, but
+ * the UNIX standard wants it for whatever reason... -DaveM
+ */
+ case SO_ACCEPTCONN:
+ v.val = sk->sk_state == TCP_LISTEN;
+ break;
- case SO_PASSSEC:
- v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
- break;
+ case SO_PASSSEC:
+ v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+ break;
- case SO_PEERSEC:
- return security_socket_getpeersec_stream(sock, optval, optlen, len);
+ case SO_PEERSEC:
+ return security_socket_getpeersec_stream(sock, optval, optlen, len);
- default:
- return(-ENOPROTOOPT);
+ default:
+ return -ENOPROTOOPT;
}
+
if (len > lv)
len = lv;
if (copy_to_user(optval, &v, len))
@@ -904,6 +915,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
atomic_set(&newsk->sk_rmem_alloc, 0);
atomic_set(&newsk->sk_wmem_alloc, 0);
@@ -923,7 +935,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
newsk->sk_send_head = NULL;
- newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
@@ -970,6 +981,21 @@ out:
EXPORT_SYMBOL_GPL(sk_clone);
+void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+{
+ __sk_dst_set(sk, dst);
+ sk->sk_route_caps = dst->dev->features;
+ if (sk->sk_route_caps & NETIF_F_GSO)
+ sk->sk_route_caps |= NETIF_F_GSO_MASK;
+ if (sk_can_gso(sk)) {
+ if (dst->header_len)
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ else
+ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ }
+}
+EXPORT_SYMBOL_GPL(sk_setup_caps);
+
void __init sk_init(void)
{
if (num_physpages <= 4096) {
@@ -1220,13 +1246,13 @@ static void __lock_sock(struct sock *sk)
{
DEFINE_WAIT(wait);
- for(;;) {
+ for (;;) {
prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_bh(&sk->sk_lock.slock);
schedule();
spin_lock_bh(&sk->sk_lock.slock);
- if(!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk))
break;
}
finish_wait(&sk->sk_lock.wq, &wait);
@@ -1258,7 +1284,7 @@ static void __release_sock(struct sock *sk)
} while (skb != NULL);
bh_lock_sock(sk);
- } while((skb = sk->sk_backlog.head) != NULL);
+ } while ((skb = sk->sk_backlog.head) != NULL);
}
/**
@@ -1420,7 +1446,7 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
@@ -1482,8 +1508,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sock_set_flag(sk, SOCK_ZAPPED);
- if(sock)
- {
+ if (sock) {
sk->sk_type = sock->type;
sk->sk_sleep = &sock->wait;
sock->sk = sk;
@@ -1512,8 +1537,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp.tv_sec = -1L;
- sk->sk_stamp.tv_usec = -1L;
+ sk->sk_stamp = ktime_set(-1L, -1L);
atomic_set(&sk->sk_refcnt, 1);
}
@@ -1554,17 +1578,36 @@ EXPORT_SYMBOL(release_sock);
int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
{
+ struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
sock_enable_timestamp(sk);
- if (sk->sk_stamp.tv_sec == -1)
+ tv = ktime_to_timeval(sk->sk_stamp);
+ if (tv.tv_sec == -1)
return -ENOENT;
- if (sk->sk_stamp.tv_sec == 0)
- do_gettimeofday(&sk->sk_stamp);
- return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
- -EFAULT : 0;
+ if (tv.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ tv = ktime_to_timeval(sk->sk_stamp);
+ }
+ return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
}
EXPORT_SYMBOL(sock_get_timestamp);
+int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+ struct timespec ts;
+ if (!sock_flag(sk, SOCK_TIMESTAMP))
+ sock_enable_timestamp(sk);
+ ts = ktime_to_timespec(sk->sk_stamp);
+ if (ts.tv_sec == -1)
+ return -ENOENT;
+ if (ts.tv_sec == 0) {
+ sk->sk_stamp = ktime_get_real();
+ ts = ktime_to_timespec(sk->sk_stamp);
+ }
+ return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL(sock_get_timestampns);
+
void sock_enable_timestamp(struct sock *sk)
{
if (!sock_flag(sk, SOCK_TIMESTAMP)) {
@@ -1899,7 +1942,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations proto_seq_ops = {
+static const struct seq_operations proto_seq_ops = {
.start = proto_seq_start,
.next = proto_seq_next,
.stop = proto_seq_stop,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1e75b158546..b29712033dd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -136,6 +136,14 @@ ctl_table core_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+ {
+ .ctl_name = NET_CORE_WARNINGS,
+ .procname = "warnings",
+ .data = &net_msg_warn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
{ .ctl_name = 0 }
};
diff --git a/net/core/utils.c b/net/core/utils.c
index 07236c17fab..adecfd281ae 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -30,8 +30,10 @@
#include <asm/system.h>
#include <asm/uaccess.h>
-int net_msg_cost = 5*HZ;
-int net_msg_burst = 10;
+int net_msg_cost __read_mostly = 5*HZ;
+int net_msg_burst __read_mostly = 10;
+int net_msg_warn __read_mostly = 1;
+EXPORT_SYMBOL(net_msg_warn);
/*
* All net warning printk()s should be guarded by this function.
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index a086c6312d3..01030f34617 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -157,7 +157,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
if (av != NULL) {
av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
- av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
+ av->dccpav_buf_ackno = UINT48_MAX + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 746f79d104b..d7d9ce73724 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -33,7 +33,6 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-
#include "../ccid.h"
#include "../dccp.h"
#include "lib/packet_history.h"
@@ -52,6 +51,9 @@ static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
static struct dccp_li_hist *ccid3_li_hist;
+/*
+ * Transmitter Half-Connection Routines
+ */
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
{
@@ -80,23 +82,37 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
}
/*
- * Recalculate scheduled nominal send time t_nom, inter-packet interval
- * t_ipi, and delta value. Should be called after each change to X.
+ * Compute the initial sending rate X_init according to RFC 3390:
+ * w_init = min(4 * MSS, max(2 * MSS, 4380 bytes))
+ * X_init = w_init / RTT
+ * For consistency with other parts of the code, X_init is scaled by 2^6.
*/
-static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
+static inline u64 rfc3390_initial_rate(struct sock *sk)
{
- timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+ const struct dccp_sock *dp = dccp_sk(sk);
+ const __u32 w_init = min(4 * dp->dccps_mss_cache,
+ max(2 * dp->dccps_mss_cache, 4380U));
- /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
- hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
- hctx->ccid3hctx_x >> 6);
+ return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
+}
- /* Update nominal send time with regard to the new t_ipi */
- timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+/*
+ * Recalculate t_ipi and delta (should be called whenever X changes)
+ */
+static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
+{
+ /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+ hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
+ hctx->ccid3hctx_x);
/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
TFRC_OPSYS_HALF_TIME_GRAN);
+
+ ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
+ hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
+ hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
+
}
/*
* Update X by
@@ -112,19 +128,28 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
* fine-grained resolution of sending rates. This requires scaling by 2^6
* throughout the code. Only X_calc is unscaled (in bytes/second).
*
- * If X has changed, we also update the scheduled send time t_now,
- * the inter-packet interval t_ipi, and the delta value.
*/
static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ __u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
const __u64 old_x = hctx->ccid3hctx_x;
+ /*
+ * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
+ * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
+ * For consistency with X and X_recv, min_rate is also scaled by 2^6.
+ */
+ if (unlikely(hctx->ccid3hctx_idle)) {
+ min_rate = rfc3390_initial_rate(sk);
+ min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
+ }
+
if (hctx->ccid3hctx_p > 0) {
hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
- hctx->ccid3hctx_x_recv * 2);
+ min_rate);
hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
(((__u64)hctx->ccid3hctx_s) << 6) /
TFRC_T_MBI);
@@ -133,14 +158,21 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
hctx->ccid3hctx_x =
- max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
+ max(min(2 * hctx->ccid3hctx_x, min_rate),
scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
hctx->ccid3hctx_rtt));
hctx->ccid3hctx_t_ld = *now;
}
- if (hctx->ccid3hctx_x != old_x)
- ccid3_update_send_time(hctx);
+ if (hctx->ccid3hctx_x != old_x) {
+ ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
+ "X_recv=%u\n", (unsigned)(old_x >> 6),
+ (unsigned)(hctx->ccid3hctx_x >> 6),
+ hctx->ccid3hctx_x_calc,
+ (unsigned)(hctx->ccid3hctx_x_recv >> 6));
+
+ ccid3_update_send_interval(hctx);
+ }
}
/*
@@ -149,17 +181,12 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
*/
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
{
- if (unlikely(len == 0))
- ccid3_pr_debug("Packet payload length is 0 - not updating\n");
- else
- hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
- (9 * hctx->ccid3hctx_s + len) / 10;
- /*
- * Note: We could do a potential optimisation here - when `s' changes,
- * recalculate sending rate and consequently t_ipi, t_delta, and
- * t_now. This is however non-standard, and the benefits are not
- * clear, so it is currently left out.
- */
+ const u16 old_s = hctx->ccid3hctx_s;
+
+ hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;
+
+ if (hctx->ccid3hctx_s != old_s)
+ ccid3_update_send_interval(hctx);
}
/*
@@ -193,6 +220,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ struct timeval now;
unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
@@ -205,6 +233,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
ccid3_tx_state_name(hctx->ccid3hctx_state));
+ hctx->ccid3hctx_idle = 1;
+
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_FBACK:
/* RFC 3448, 4.4: Halve send rate directly */
@@ -219,53 +249,37 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
/* The value of R is still undefined and so we can not recompute
* the timout value. Keep initial value as per [RFC 4342, 5]. */
t_nfb = TFRC_INITIAL_TIMEOUT;
- ccid3_update_send_time(hctx);
+ ccid3_update_send_interval(hctx);
break;
case TFRC_SSTATE_FBACK:
/*
- * Check if IDLE since last timeout and recv rate is less than
- * 4 packets (in units of 64*bytes/sec) per RTT
+ * Modify the cached value of X_recv [RFC 3448, 4.4]
+ *
+ * If (p == 0 || X_calc > 2 * X_recv)
+ * X_recv = max(X_recv / 2, s / (2 * t_mbi));
+ * Else
+ * X_recv = X_calc / 4;
+ *
+ * Note that X_recv is scaled by 2^6 while X_calc is not
*/
- if (!hctx->ccid3hctx_idle ||
- (hctx->ccid3hctx_x_recv >= 4 *
- scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
- hctx->ccid3hctx_rtt))) {
- struct timeval now;
+ BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
- ccid3_pr_debug("%s(%p, state=%s), not idle\n",
- dccp_role(sk), sk,
- ccid3_tx_state_name(hctx->ccid3hctx_state));
+ if (hctx->ccid3hctx_p == 0 ||
+ (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
- /*
- * Modify the cached value of X_recv [RFC 3448, 4.4]
- *
- * If (p == 0 || X_calc > 2 * X_recv)
- * X_recv = max(X_recv / 2, s / (2 * t_mbi));
- * Else
- * X_recv = X_calc / 4;
- *
- * Note that X_recv is scaled by 2^6 while X_calc is not
- */
- BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
-
- if (hctx->ccid3hctx_p == 0 ||
- (hctx->ccid3hctx_x_calc >
- (hctx->ccid3hctx_x_recv >> 5))) {
-
- hctx->ccid3hctx_x_recv =
- max(hctx->ccid3hctx_x_recv / 2,
- (((__u64)hctx->ccid3hctx_s) << 6) /
- (2 * TFRC_T_MBI));
-
- if (hctx->ccid3hctx_p == 0)
- dccp_timestamp(sk, &now);
- } else {
- hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
- hctx->ccid3hctx_x_recv <<= 4;
- }
- /* Now recalculate X [RFC 3448, 4.3, step (4)] */
- ccid3_hc_tx_update_x(sk, &now);
+ hctx->ccid3hctx_x_recv =
+ max(hctx->ccid3hctx_x_recv / 2,
+ (((__u64)hctx->ccid3hctx_s) << 6) /
+ (2 * TFRC_T_MBI));
+
+ if (hctx->ccid3hctx_p == 0)
+ dccp_timestamp(sk, &now);
+ } else {
+ hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+ hctx->ccid3hctx_x_recv <<= 4;
}
+ /* Now recalculate X [RFC 3448, 4.3, step (4)] */
+ ccid3_hc_tx_update_x(sk, &now);
/*
* Schedule no feedback timer to expire in
* max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -280,8 +294,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
goto out;
}
- hctx->ccid3hctx_idle = 1;
-
restart_timer:
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
jiffies + usecs_to_jiffies(t_nfb));
@@ -322,24 +334,35 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
hctx->ccid3hctx_last_win_count = 0;
hctx->ccid3hctx_t_last_win_count = now;
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
- /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
- ccid3_hc_tx_update_s(hctx, skb->len);
- hctx->ccid3hctx_x = hctx->ccid3hctx_s;
- hctx->ccid3hctx_x <<= 6;
-
- /* First timeout, according to [RFC 3448, 4.2], is 1 second */
- hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
- /* Initial delta: minimum of 0.5 sec and t_gran/2 */
- hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
/* Set t_0 for initial packet */
hctx->ccid3hctx_t_nom = now;
+
+ hctx->ccid3hctx_s = skb->len;
+
+ /*
+ * Use initial RTT sample when available: recommended by erratum
+ * to RFC 4342. This implements the initialisation procedure of
+ * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
+ */
+ if (dp->dccps_syn_rtt) {
+ ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
+ hctx->ccid3hctx_rtt = dp->dccps_syn_rtt;
+ hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
+ hctx->ccid3hctx_t_ld = now;
+ } else {
+ /* Sender does not have RTT sample: X = MSS/second */
+ hctx->ccid3hctx_x = dp->dccps_mss_cache;
+ hctx->ccid3hctx_x <<= 6;
+ }
+ ccid3_update_send_interval(hctx);
+
+ ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
break;
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+ ccid3_pr_debug("delay=%ld\n", (long)delay);
/*
* Scheduling of packet transmissions [RFC 3448, 4.6]
*
@@ -361,6 +384,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
/* prepare to send now (add options etc.) */
dp->dccps_hc_tx_insert_options = 1;
DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+ hctx->ccid3hctx_idle = 0;
/* set the nominal send time for the next following packet */
timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -391,7 +415,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss;
packet->dccphtx_rtt = hctx->ccid3hctx_rtt;
packet->dccphtx_sent = 1;
- hctx->ccid3hctx_idle = 0;
}
static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -402,8 +425,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
struct dccp_tx_hist_entry *packet;
struct timeval now;
unsigned long t_nfb;
- u32 pinv;
- suseconds_t r_sample, t_elapsed;
+ u32 pinv, r_sample;
BUG_ON(hctx == NULL);
@@ -445,18 +467,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* Calculate new round trip sample as per [RFC 3448, 4.3] by
* R_sample = (now - t_recvdata) - t_elapsed
*/
- r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
- t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
-
- DCCP_BUG_ON(r_sample < 0);
- if (unlikely(r_sample <= t_elapsed))
- DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
- (int)r_sample, (int)t_elapsed);
- else
- r_sample -= t_elapsed;
- CCID3_RTT_SANITY_CHECK(r_sample);
+ r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
- /* Update RTT estimate by
+ /*
+ * Update RTT estimate by
* If (No feedback recv)
* R = R_sample;
* Else
@@ -467,27 +481,23 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
/*
* Larger Initial Windows [RFC 4342, sec. 5]
- * We deviate in that we use `s' instead of `MSS'.
*/
- __u64 w_init = min(4 * hctx->ccid3hctx_s,
- max(2 * hctx->ccid3hctx_s, 4380));
hctx->ccid3hctx_rtt = r_sample;
- hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample);
+ hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
hctx->ccid3hctx_t_ld = now;
- ccid3_update_send_time(hctx);
+ ccid3_update_send_interval(hctx);
- ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
- "R_sample=%dus, X=%u\n", dccp_role(sk),
+ ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
+ "R_sample=%uus, X=%u\n", dccp_role(sk),
sk, hctx->ccid3hctx_s,
- (unsigned long long)w_init,
- (int)r_sample,
+ dp->dccps_mss_cache, r_sample,
(unsigned)(hctx->ccid3hctx_x >> 6));
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
} else {
hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
- (u32)r_sample) / 10;
+ r_sample) / 10;
/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
if (hctx->ccid3hctx_p > 0)
@@ -497,10 +507,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
hctx->ccid3hctx_p);
ccid3_hc_tx_update_x(sk, &now);
- ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
+ ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
"p=%u, X_calc=%u, X_recv=%u, X=%u\n",
dccp_role(sk),
- sk, hctx->ccid3hctx_rtt, (int)r_sample,
+ sk, hctx->ccid3hctx_rtt, r_sample,
hctx->ccid3hctx_s, hctx->ccid3hctx_p,
hctx->ccid3hctx_x_calc,
(unsigned)(hctx->ccid3hctx_x_recv >> 6),
@@ -644,10 +654,50 @@ static void ccid3_hc_tx_exit(struct sock *sk)
dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
}
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+ const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+
+ /* Listen socks doesn't have a private CCID block */
+ if (sk->sk_state == DCCP_LISTEN)
+ return;
+
+ BUG_ON(hctx == NULL);
+
+ info->tcpi_rto = hctx->ccid3hctx_t_rto;
+ info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+
+static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+ u32 __user *optval, int __user *optlen)
+{
+ const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+ const void *val;
+
+ /* Listen socks doesn't have a private CCID block */
+ if (sk->sk_state == DCCP_LISTEN)
+ return -EINVAL;
+
+ switch (optname) {
+ case DCCP_SOCKOPT_CCID_TX_INFO:
+ if (len < sizeof(hctx->ccid3hctx_tfrc))
+ return -EINVAL;
+ len = sizeof(hctx->ccid3hctx_tfrc);
+ val = &hctx->ccid3hctx_tfrc;
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
/*
- * RX Half Connection methods
+ * Receiver Half-Connection Routines
*/
-
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
{
@@ -977,8 +1027,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
const struct dccp_options_received *opt_recv;
struct dccp_rx_hist_entry *packet;
struct timeval now;
- u32 p_prev, rtt_prev;
- suseconds_t r_sample, t_elapsed;
+ u32 p_prev, r_sample, rtt_prev;
int loss, payload_size;
BUG_ON(hcrx == NULL);
@@ -994,17 +1043,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
break;
rtt_prev = hcrx->ccid3hcrx_rtt;
dccp_timestamp(sk, &now);
- timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
- r_sample = timeval_usecs(&now);
- t_elapsed = opt_recv->dccpor_elapsed_time * 10;
-
- DCCP_BUG_ON(r_sample < 0);
- if (unlikely(r_sample <= t_elapsed))
- DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
- (long)r_sample, (long)t_elapsed);
- else
- r_sample -= t_elapsed;
- CCID3_RTT_SANITY_CHECK(r_sample);
+ r_sample = dccp_sample_rtt(sk, &now, NULL);
if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
hcrx->ccid3hcrx_rtt = r_sample;
@@ -1132,20 +1171,6 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt;
}
-static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
-{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return;
-
- BUG_ON(hctx == NULL);
-
- info->tcpi_rto = hctx->ccid3hctx_t_rto;
- info->tcpi_rtt = hctx->ccid3hctx_rtt;
-}
-
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
u32 __user *optval, int __user *optlen)
{
@@ -1173,33 +1198,6 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
return 0;
}
-static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
- u32 __user *optval, int __user *optlen)
-{
- const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
- const void *val;
-
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return -EINVAL;
-
- switch (optname) {
- case DCCP_SOCKOPT_CCID_TX_INFO:
- if (len < sizeof(hctx->ccid3hctx_tfrc))
- return -EINVAL;
- len = sizeof(hctx->ccid3hctx_tfrc);
- val = &hctx->ccid3hctx_tfrc;
- break;
- default:
- return -ENOPROTOOPT;
- }
-
- if (put_user(len, optlen) || copy_to_user(optval, val, len))
- return -EFAULT;
-
- return 0;
-}
-
static struct ccid_operations ccid3 = {
.ccid_id = DCCPC_CCID3,
.ccid_name = "ccid3",
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 15776a88c09..8d31b389c19 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -51,16 +51,6 @@
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
#define TFRC_T_MBI 64
-/* What we think is a reasonable upper limit on RTT values */
-#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC))
-
-#define CCID3_RTT_SANITY_CHECK(rtt) do { \
- if (rtt > CCID3_SANE_RTT_MAX) { \
- DCCP_CRIT("RTT (%d) too large, substituting %d", \
- (int)rtt, (int)CCID3_SANE_RTT_MAX); \
- rtt = CCID3_SANE_RTT_MAX; \
- } } while (0)
-
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
TFRC_OPT_LOSS_INTERVALS = 193,
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 0a0baef16b3..372d7e75cdd 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -91,7 +91,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
u32 w_tot = 0;
list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
- if (li_entry->dccplih_interval != ~0) {
+ if (li_entry->dccplih_interval != ~0U) {
i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
w_tot += dccp_li_hist_w[i];
if (i != 0)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e33a9edb403..d8ad27bfe01 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -31,13 +31,9 @@
__stringify(cond)); \
} while (0)
-#ifdef MODULE
#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
printk(fmt, ##args); \
} while(0)
-#else
-#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args)
-#endif
#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
"%s: " fmt, __FUNCTION__, ##a)
@@ -75,11 +71,15 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+/* bounds for sampled RTT values from packet exchanges (in usec) */
+#define DCCP_SANE_RTT_MIN 100
+#define DCCP_SANE_RTT_MAX (4 * USEC_PER_SEC)
+
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
-
/* sysctl variables for DCCP */
extern int sysctl_dccp_request_retries;
extern int sysctl_dccp_retries1;
@@ -92,17 +92,43 @@ extern int sysctl_dccp_feat_send_ack_vector;
extern int sysctl_dccp_feat_send_ndp_count;
extern int sysctl_dccp_tx_qlen;
+/*
+ * 48-bit sequence number arithmetic (signed and unsigned)
+ */
+#define INT48_MIN 0x800000000000LL /* 2^47 */
+#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */
+#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */
+#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
+#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x)))
+#define ADD48(a, b) (((a) + (b)) & UINT48_MAX)
+#define SUB48(a, b) ADD48((a), COMPLEMENT48(b))
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+ *seqno = value & UINT48_MAX;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+ *seqno = ADD48(*seqno, 1);
+}
+
+/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
+static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
+{
+ u64 delta = SUB48(seqno2, seqno1);
+
+ return TO_SIGNED48(delta);
+}
+
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
- return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+ return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
}
/* is seq1 > seq2 ? */
-static inline int after48(const u64 seq1, const u64 seq2)
-{
- return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
-}
+#define after48(seq1, seq2) before48(seq2, seq1)
/* is seq2 <= seq1 <= seq3 ? */
static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
@@ -118,9 +144,7 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
/* is seq1 next seqno after seq2 */
static inline int follows48(const u64 seq1, const u64 seq2)
{
- int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
-
- return diff==1;
+ return dccp_delta_seqno(seq2, seq1) == 1;
}
enum {
@@ -272,6 +296,8 @@ extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
+extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+ struct timeval *t_history);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
@@ -313,26 +339,7 @@ static inline int dccp_packet_without_ack(const struct sk_buff *skb)
return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
-#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
-#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
-
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
-{
- if (value > DCCP_MAX_SEQNO)
- value -= DCCP_MAX_SEQNO + 1;
- *seqno = value;
-}
-
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
-{
- return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
-}
-
-static inline void dccp_inc_seqno(u64 *seqno)
-{
- if (++*seqno > DCCP_MAX_SEQNO)
- *seqno = 0;
-}
+#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 78b043c458b..da6ec185ed5 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -86,7 +86,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh) &&
- !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+ dccp_delta_seqno(dp->dccps_swl,
+ DCCP_SKB_CB(skb)->dccpd_seq) >= 0)
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
else
return -1;
@@ -203,7 +204,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (dp->dccps_role != DCCP_ROLE_CLIENT)
goto send_sync;
check_seq:
- if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+ if (dccp_delta_seqno(dp->dccps_osr,
+ DCCP_SKB_CB(skb)->dccpd_seq) >= 0) {
send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNC);
@@ -298,6 +300,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
if (dccp_parse_options(sk, skb))
goto out_invalid_packet;
+ /* Obtain RTT sample from SYN exchange (used by CCID 3) */
+ if (dp->dccps_options_received.dccpor_timestamp_echo) {
+ struct timeval now;
+
+ dccp_timestamp(sk, &now);
+ dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL);
+ }
+
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
@@ -575,3 +585,43 @@ discard:
}
EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
+
+/**
+ * dccp_sample_rtt - Sample RTT from packet exchange
+ *
+ * @sk: connected dccp_sock
+ * @t_recv: receive timestamp of packet with timestamp echo
+ * @t_hist: packet history timestamp or NULL
+ */
+u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+ struct timeval *t_hist)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_options_received *or = &dp->dccps_options_received;
+ suseconds_t delta;
+
+ if (t_hist == NULL) {
+ if (!or->dccpor_timestamp_echo) {
+ DCCP_WARN("packet without timestamp echo\n");
+ return DCCP_SANE_RTT_MAX;
+ }
+ timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10);
+ delta = timeval_usecs(t_recv);
+ } else
+ delta = timeval_delta(t_recv, t_hist);
+
+ delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */
+
+ if (unlikely(delta <= 0)) {
+ DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta);
+ return DCCP_SANE_RTT_MIN;
+ }
+ if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) {
+ DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta);
+ return DCCP_SANE_RTT_MAX;
+ }
+
+ return delta;
+}
+
+EXPORT_SYMBOL_GPL(dccp_sample_rtt);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4a83978aa66..718f2fa923a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -207,8 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
(iph->ihl << 2));
struct dccp_sock *dp;
struct inet_sock *inet;
- const int type = skb->h.icmph->type;
- const int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
__u64 seq;
int err;
@@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_send_check);
static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
{
- return secure_dccp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
+ return secure_dccp_sequence_number(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
dccp_hdr(skb)->dccph_dport,
dccp_hdr(skb)->dccph_sport);
}
@@ -405,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
- newinet->mc_ttl = skb->nh.iph->ttl;
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->id = jiffies;
dccp_sync_mss(newsk, dst_mtu(dst));
@@ -428,7 +428,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
@@ -460,8 +460,8 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
struct rtable *rt;
struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
.nl_u = { .ip4_u =
- { .daddr = skb->nh.iph->saddr,
- .saddr = skb->nh.iph->daddr,
+ { .daddr = ip_hdr(skb)->saddr,
+ .saddr = ip_hdr(skb)->daddr,
.tos = RT_CONN_FLAGS(sk) } },
.proto = sk->sk_protocol,
.uli_u = { .ports =
@@ -513,6 +513,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ const struct iphdr *rxiph;
const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
@@ -559,13 +560,13 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
- rxskb->nh.iph->daddr);
+ rxiph = ip_hdr(rxskb);
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
+ rxiph->daddr);
bh_lock_sock(dccp_v4_ctl_socket->sk);
err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
- rxskb->nh.iph->daddr,
- rxskb->nh.iph->saddr, NULL);
+ rxiph->daddr, rxiph->saddr, NULL);
bh_unlock_sock(dccp_v4_ctl_socket->sk);
if (net_xmit_eval(err) == 0) {
@@ -640,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq = inet_rsk(req);
- ireq->loc_addr = skb->nh.iph->daddr;
- ireq->rmt_addr = skb->nh.iph->saddr;
+ ireq->loc_addr = ip_hdr(skb)->daddr;
+ ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->opt = NULL;
/*
@@ -809,6 +810,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet);
static int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
+ const struct iphdr *iph;
struct sock *sk;
int min_cov;
@@ -817,8 +819,9 @@ static int dccp_v4_rcv(struct sk_buff *skb)
if (dccp_invalid_packet(skb))
goto discard_it;
+ iph = ip_hdr(skb);
/* Step 1: If header checksum is incorrect, drop packet and return */
- if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+ if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) {
DCCP_WARN("dropped packet with invalid checksum\n");
goto discard_it;
}
@@ -832,8 +835,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
"src=%u.%u.%u.%u@%-5d "
"dst=%u.%u.%u.%u@%-5d seq=%llu",
dccp_packet_name(dh->dccph_type),
- NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
- NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+ NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
+ NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
(unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
if (dccp_packet_without_ack(skb)) {
@@ -848,10 +851,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
/* Step 2:
* Look up flow ID in table and get corresponding socket */
sk = __inet_lookup(&dccp_hashinfo,
- skb->nh.iph->saddr, dh->dccph_sport,
- skb->nh.iph->daddr, dh->dccph_dport,
- inet_iif(skb));
-
+ iph->saddr, dh->dccph_sport,
+ iph->daddr, dh->dccph_dport, inet_iif(skb));
/*
* Step 2:
* If no socket ...
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f51e8db396..64eac2515aa 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -84,8 +84,8 @@ static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
{
- return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
+ return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
dccp_hdr(skb)->dccph_dport,
dccp_hdr(skb)->dccph_sport );
@@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
if (rxopt->srcrt)
opt = ipv6_invert_rthdr(sk,
- (struct ipv6_rt_hdr *)(pktopts->nh.raw +
- rxopt->srcrt));
+ (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+ rxopt->srcrt));
}
if (opt != NULL && opt->srcrt != NULL) {
@@ -313,6 +313,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ struct ipv6hdr *rxip6h;
const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
@@ -352,12 +353,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr,
- &rxskb->nh.ipv6h->daddr);
+ rxip6h = ipv6_hdr(rxskb);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
+ &rxip6h->daddr);
memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
fl.proto = IPPROTO_DCCP;
fl.oif = inet6_iif(rxskb);
@@ -390,7 +392,7 @@ static struct request_sock_ops dccp6_request_sock_ops = {
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
- const struct ipv6hdr *iph = skb->nh.ipv6h;
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
@@ -460,8 +462,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq6 = inet6_rsk(req);
- ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
ireq6->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
@@ -546,7 +548,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -573,8 +575,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if (rxopt->srcrt)
opt = ipv6_invert_rthdr(sk,
- (struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
- rxopt->srcrt));
+ (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
+ rxopt->srcrt));
}
if (dst == NULL) {
@@ -653,7 +655,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
}
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/*
* Clone native IPv6 options from listening socket (if any)
@@ -826,8 +828,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
goto discard_it;
/* Step 1: If header checksum is incorrect, drop packet and return. */
- if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr)) {
+ if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr)) {
DCCP_WARN("dropped packet with invalid checksum\n");
goto discard_it;
}
@@ -844,9 +846,9 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
/* Step 2:
* Look up flow ID in table and get corresponding socket */
- sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
+ sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
dh->dccph_sport,
- &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
+ &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
inet6_iif(skb));
/*
* Step 2:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6d235b3013d..e18e249ac49 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,7 +27,7 @@
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
.period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = SPIN_LOCK_UNLOCKED,
+ .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
.hashinfo = &dccp_hashinfo,
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&dccp_death_row),
diff --git a/net/dccp/options.c b/net/dccp/options.c
index ca13f773199..34d536d5f1a 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,8 +29,6 @@ int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
-
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
@@ -174,21 +172,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
- "ackno=%llu, ", dccp_role(sk),
+ "ackno=%llu", dccp_role(sk),
opt_recv->dccpor_timestamp_echo,
len + 2,
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
- if (len == 4)
+ if (len == 4) {
+ dccp_pr_debug_cat("\n");
break;
+ }
if (len == 6)
elapsed_time = ntohs(*(__be16 *)(value + 4));
else
elapsed_time = ntohl(*(__be32 *)(value + 4));
+ dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time);
+
/* Give precedence to the biggest ELAPSED_TIME */
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
@@ -565,6 +567,14 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
dccp_insert_options_feat(sk, skb))
return -1;
+ /*
+ * Obtain RTT sample from Request/Response exchange.
+ * This is currently used in CCID 3 initialisation.
+ */
+ if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
+ dccp_insert_option_timestamp(sk, skb))
+ return -1;
+
/* XXX: insert other options when appropriate */
if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index aa21cc4de37..c8d843e983f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -194,6 +194,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (rc <= 0)
break;
+ dccp_pr_debug("delayed send by %d msec\n", rc);
delay = msecs_to_jiffies(rc);
sk->sk_write_pending++;
release_sock(sk);
@@ -255,7 +256,7 @@ void dccp_write_xmit(struct sock *sk, int block)
DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
err);
} else {
- dccp_pr_debug("packet discarded\n");
+ dccp_pr_debug("packet discarded due to err=%d\n", err);
kfree_skb(skb);
}
}
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 3b1f509f51d..1f5e3ba6206 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -90,15 +90,18 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
if (port == 0 || ntohs(inet->dport) == port ||
ntohs(inet->sport) == port) {
if (hctx)
- printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size,
- hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
- hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+ printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+ "%llu %llu %d\n",
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+ hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+ hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
+ hctx->ccid3hctx_x_recv >> 6,
+ hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
else
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
- NIPQUAD(inet->saddr), ntohs(inet->sport),
- NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+ NIPQUAD(inet->saddr), ntohs(inet->sport),
+ NIPQUAD(inet->daddr), ntohs(inet->dport), size);
}
jprobe_return();
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c6568d637e1..9fbe87c9380 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -721,7 +721,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
- struct net_device *dev;
+ struct net_device *dev, *ldev;
int rv;
if (addr_len != sizeof(struct sockaddr_dn))
@@ -746,14 +746,17 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(saddr->sdn_flags & SDF_WILD)) {
if (dn_ntohs(saddr->sdn_nodeaddrl)) {
read_lock(&dev_base_lock);
- for(dev = dev_base; dev; dev = dev->next) {
+ ldev = NULL;
+ for_each_netdev(dev) {
if (!dev->dn_ptr)
continue;
- if (dn_dev_islocal(dev, dn_saddr2dn(saddr)))
+ if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
+ ldev = dev;
break;
+ }
}
read_unlock(&dev_base_lock);
- if (dev == NULL)
+ if (ldev == NULL)
return -EADDRNOTAVAIL;
}
}
@@ -2413,6 +2416,7 @@ module_init(decnet_init);
static void __exit decnet_exit(void)
{
sock_unregister(AF_DECnet);
+ rtnl_unregister_all(PF_DECnet);
dev_remove_pack(&dn_dix_packet_type);
dn_unregister_sysctl();
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 060d725e294..764a56a13e3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,10 +799,10 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
skip_ndevs = cb->args[0];
skip_naddr = cb->args[1];
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < skip_ndevs)
- continue;
+ goto cont;
else if (idx > skip_ndevs) {
/* Only skip over addresses for first dev dumped
* in this iteration (idx == skip_ndevs) */
@@ -810,22 +810,22 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
}
if ((dn_db = dev->dn_ptr) == NULL)
- continue;
+ goto cont;
for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
ifa = ifa->ifa_next, dn_idx++) {
if (dn_idx < skip_naddr)
- continue;
+ goto cont;
if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWADDR,
NLM_F_MULTI) < 0)
goto done;
}
+cont:
+ idx++;
}
done:
- read_unlock(&dev_base_lock);
-
cb->args[0] = idx;
cb->args[1] = dn_idx;
@@ -913,7 +913,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
pktlen = (__le16 *)skb_push(skb,2);
*pktlen = dn_htons(skb->len - 2);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
}
@@ -1005,7 +1005,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
pktlen = (__le16 *)skb_push(skb, 2);
*pktlen = dn_htons(skb->len - 2);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (dn_am_i_a_router(dn, dn_db, ifa)) {
struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
@@ -1299,7 +1299,7 @@ void dn_dev_devices_off(void)
struct net_device *dev;
rtnl_lock();
- for(dev = dev_base; dev; dev = dev->next)
+ for_each_netdev(dev)
dn_dev_down(dev);
rtnl_unlock();
@@ -1310,7 +1310,7 @@ void dn_dev_devices_on(void)
struct net_device *dev;
rtnl_lock();
- for(dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if (dev->flags & IFF_UP)
dn_dev_up(dev);
}
@@ -1328,62 +1328,56 @@ int unregister_dnaddr_notifier(struct notifier_block *nb)
}
#ifdef CONFIG_PROC_FS
-static inline struct net_device *dn_dev_get_next(struct seq_file *seq, struct net_device *dev)
+static inline int is_dn_dev(struct net_device *dev)
{
- do {
- dev = dev->next;
- } while(dev && !dev->dn_ptr);
-
- return dev;
+ return dev->dn_ptr != NULL;
}
-static struct net_device *dn_dev_get_idx(struct seq_file *seq, loff_t pos)
+static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ int i;
struct net_device *dev;
- dev = dev_base;
- if (dev && !dev->dn_ptr)
- dev = dn_dev_get_next(seq, dev);
- if (pos) {
- while(dev && (dev = dn_dev_get_next(seq, dev)))
- --pos;
- }
- return dev;
-}
+ read_lock(&dev_base_lock);
-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
- if (*pos) {
- struct net_device *dev;
- read_lock(&dev_base_lock);
- dev = dn_dev_get_idx(seq, *pos - 1);
- if (dev == NULL)
- read_unlock(&dev_base_lock);
- return dev;
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ i = 1;
+ for_each_netdev(dev) {
+ if (!is_dn_dev(dev))
+ continue;
+
+ if (i++ == *pos)
+ return dev;
}
- return SEQ_START_TOKEN;
+
+ return NULL;
}
static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = v;
- loff_t one = 1;
+ struct net_device *dev;
- if (v == SEQ_START_TOKEN) {
- dev = dn_dev_seq_start(seq, &one);
- } else {
- dev = dn_dev_get_next(seq, dev);
- if (dev == NULL)
- read_unlock(&dev_base_lock);
- }
++*pos;
- return dev;
+
+ dev = (struct net_device *)v;
+ if (v == SEQ_START_TOKEN)
+ dev = net_device_entry(&dev_base_head);
+
+ for_each_netdev_continue(dev) {
+ if (!is_dn_dev(dev))
+ continue;
+
+ return dev;
+ }
+
+ return NULL;
}
static void dn_dev_seq_stop(struct seq_file *seq, void *v)
{
- if (v && v != SEQ_START_TOKEN)
- read_unlock(&dev_base_lock);
+ read_unlock(&dev_base_lock);
}
static char *dn_type2asc(char type)
@@ -1447,24 +1441,6 @@ static const struct file_operations dn_dev_seq_fops = {
#endif /* CONFIG_PROC_FS */
-static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
- [RTM_NEWADDR - RTM_BASE] = { .doit = dn_nl_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = dn_nl_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = dn_nl_dump_ifaddr, },
-#ifdef CONFIG_DECNET_ROUTER
- [RTM_NEWROUTE - RTM_BASE] = { .doit = dn_fib_rtm_newroute, },
- [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, },
- [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
- .dumpit = dn_fib_dump, },
- [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, },
-#else
- [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute,
- .dumpit = dn_cache_dump, },
-#endif
-
-};
-
static int __initdata addr[2];
module_param_array(addr, int, NULL, 0444);
MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
@@ -1485,7 +1461,9 @@ void __init dn_dev_init(void)
dn_dev_devices_on();
- rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table;
+ rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL);
+ rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
+ rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
@@ -1500,8 +1478,6 @@ void __init dn_dev_init(void)
void __exit dn_dev_cleanup(void)
{
- rtnetlink_links[PF_DECnet] = NULL;
-
#ifdef CONFIG_SYSCTL
{
int i;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 82d58a977e6..d2bc19d4795 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -504,7 +504,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
return 0;
}
-int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct dn_fib_table *tb;
struct rtattr **rta = arg;
@@ -520,7 +520,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return -ESRCH;
}
-int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct dn_fib_table *tb;
struct rtattr **rta = arg;
@@ -602,7 +602,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
/* Scan device list */
read_lock(&dev_base_lock);
- for(dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
dn_db = dev->dn_ptr;
if (dn_db == NULL)
continue;
@@ -748,11 +748,13 @@ void __exit dn_fib_cleanup(void)
void __init dn_fib_init(void)
{
-
dn_fib_table_init();
dn_fib_rules_init();
register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+
+ rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL);
+ rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL);
}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index bf701cf5a38..4bf066c416e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -261,7 +261,7 @@ static int dn_long_output(struct sk_buff *skb)
lp->s_class = 0;
lp->pt = 0;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
}
@@ -300,7 +300,7 @@ static int dn_short_output(struct sk_buff *skb)
sp->srcnode = cb->src;
sp->forward = cb->hops & 0x3f;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
}
@@ -342,7 +342,7 @@ static int dn_phase3_output(struct sk_buff *skb)
sp->srcnode = cb->src & dn_htons(0x03ff);
sp->forward = cb->hops & 0x3f;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 9d20904f6f5..4074a6e5d0d 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -362,7 +362,8 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
u16 dlen = *skb->data;
if ((dlen <= 16) && (dlen <= skb->len)) {
scp->conndata_in.opt_optl = dn_htons(dlen);
- memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen);
+ skb_copy_from_linear_data_offset(skb, 1,
+ scp->conndata_in.opt_data, dlen);
}
}
dn_nsp_send_link(sk, DN_NOCHANGE, 0);
@@ -406,7 +407,7 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
u16 dlen = *skb->data;
if ((dlen <= 16) && (dlen <= skb->len)) {
scp->discdata_in.opt_optl = dn_htons(dlen);
- memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen);
+ skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
}
}
@@ -725,7 +726,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
if (!pskb_may_pull(skb, 2))
goto free_out;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
cb->nsp_flags = *ptr++;
if (decnet_debug_level & 2)
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2d2cda82c7d..7404653880b 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -79,7 +79,7 @@ static void dn_nsp_send(struct sk_buff *skb)
struct dst_entry *dst;
struct flowi fl;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
scp->stamp = jiffies;
dst = sk_dst_check(sk, 0);
@@ -681,8 +681,10 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
if (scp->peer.sdn_objnum)
type = 0;
- skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type));
- skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2));
+ skb_put(skb, dn_sockaddr2username(&scp->peer,
+ skb_tail_pointer(skb), type));
+ skb_put(skb, dn_sockaddr2username(&scp->addr,
+ skb_tail_pointer(skb), 2));
menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
if (scp->peer.sdn_flags & SDF_PROXY)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c1b5502f195..a8bf106b7a6 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -77,6 +77,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <asm/errno.h>
+#include <net/netlink.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
@@ -386,7 +387,7 @@ static int dn_return_short(struct sk_buff *skb)
__le16 tmp;
/* Add back headers */
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
return NET_RX_DROP;
@@ -425,7 +426,7 @@ static int dn_return_long(struct sk_buff *skb)
unsigned char tmp[ETH_ALEN];
/* Add back all headers */
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
return NET_RX_DROP;
@@ -504,7 +505,7 @@ static int dn_route_rx_long(struct sk_buff *skb)
goto drop_it;
skb_pull(skb, 20);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
/* Destination info */
ptr += 2;
@@ -542,7 +543,7 @@ static int dn_route_rx_short(struct sk_buff *skb)
goto drop_it;
skb_pull(skb, 5);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
cb->dst = *(__le16 *)ptr;
ptr += 2;
@@ -615,7 +616,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
flags = *skb->data;
}
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/*
* Weed out future version DECnet
@@ -885,7 +886,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
.iif = loopback_dev.ifindex,
.oif = oldflp->oif };
struct dn_route *rt = NULL;
- struct net_device *dev_out = NULL;
+ struct net_device *dev_out = NULL, *dev;
struct neighbour *neigh = NULL;
unsigned hash;
unsigned flags = 0;
@@ -924,15 +925,17 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
goto out;
}
read_lock(&dev_base_lock);
- for(dev_out = dev_base; dev_out; dev_out = dev_out->next) {
- if (!dev_out->dn_ptr)
+ for_each_netdev(dev) {
+ if (!dev->dn_ptr)
continue;
- if (!dn_dev_islocal(dev_out, oldflp->fld_src))
+ if (!dn_dev_islocal(dev, oldflp->fld_src))
continue;
- if ((dev_out->flags & IFF_LOOPBACK) &&
+ if ((dev->flags & IFF_LOOPBACK) &&
oldflp->fld_dst &&
- !dn_dev_islocal(dev_out, oldflp->fld_dst))
+ !dn_dev_islocal(dev, oldflp->fld_dst))
continue;
+
+ dev_out = dev;
break;
}
read_unlock(&dev_base_lock);
@@ -1468,7 +1471,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
struct dn_route *rt = (struct dn_route *)skb->dst;
struct rtmsg *r;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
long expires;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
@@ -1509,19 +1512,19 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (rt->fl.iif)
RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
/*
* This is called by both endnodes and routers now.
*/
-int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
{
struct rtattr **rta = arg;
struct rtmsg *rtm = NLMSG_DATA(nlh);
@@ -1537,7 +1540,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
return -ENOBUFS;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
cb = DN_SKB_CB(skb);
if (rta[RTA_SRC-1])
@@ -1812,6 +1815,13 @@ void __init dn_route_init(void)
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+
+#ifdef CONFIG_DECNET_ROUTER
+ rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+#else
+ rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
+ dn_cache_dump);
+#endif
}
void __exit dn_route_cleanup(void)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 5e86dd54230..17a1932216d 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -31,6 +31,7 @@
#include <net/dn_fib.h>
#include <net/dn_neigh.h>
#include <net/dn_dev.h>
+#include <net/dn_route.h>
static struct fib_rules_ops dn_fib_rules_ops;
@@ -239,9 +240,9 @@ static u32 dn_fib_rule_default_pref(void)
return 0;
}
-int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static void dn_fib_rule_flush_cache(void)
{
- return fib_rules_dump(skb, cb, AF_DECnet);
+ dn_rt_cache_flush(-1);
}
static struct fib_rules_ops dn_fib_rules_ops = {
@@ -254,6 +255,7 @@ static struct fib_rules_ops dn_fib_rules_ops = {
.compare = dn_fib_rule_compare,
.fill = dn_fib_rule_fill,
.default_pref = dn_fib_rule_default_pref,
+ .flush_cache = dn_fib_rule_flush_cache,
.nlgroup = RTNLGRP_DECnet_RULE,
.policy = dn_fib_rule_policy,
.rules_list = &dn_fib_rules,
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 780a141f834..d6615c9361e 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -28,6 +28,7 @@
#include <asm/uaccess.h>
#include <linux/route.h> /* RTF_xxx */
#include <net/neighbour.h>
+#include <net/netlink.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/fib_rules.h>
@@ -295,7 +296,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
rtm = NLMSG_DATA(nlh);
@@ -337,19 +338,19 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
nhp->rtnh_ifindex = nh->nh_oif;
if (nh->nh_gw)
RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw);
- nhp->rtnh_len = skb->tail - (unsigned char *)nhp;
+ nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
} endfor_nexthops(fi);
mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb->tail - (u8*)mp_head;
+ mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -EMSGSIZE;
}
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 0e62def05a5..696234688cf 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -33,7 +33,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
{
struct sk_buff *skb = NULL;
size_t size;
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
struct nlmsghdr *nlh;
unsigned char *ptr;
struct nf_dn_rtmsg *rtm;
@@ -48,7 +48,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
rtm->nfdn_ifindex = rt_skb->dev->ifindex;
ptr = NFDN_RTMSG(rtm);
- memcpy(ptr, rt_skb->data, rt_skb->len);
+ skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
nlh->nlmsg_len = skb->tail - old_tail;
return skb;
@@ -102,7 +102,7 @@ static unsigned int dnrmg_hook(unsigned int hook,
static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
{
- struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return;
@@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void)
int rv = 0;
dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
- dnrmg_receive_user_sk, THIS_MODULE);
+ dnrmg_receive_user_sk, NULL, THIS_MODULE);
if (dnrmg == NULL) {
printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
return -ENOMEM;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index bc12e36263f..b5524f32ac2 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
if (err)
goto out_free;
- skb_get_timestamp(skb, &sk->sk_stamp);
+ sk->sk_stamp = skb->tstamp;
if (msg->msg_name)
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
@@ -345,7 +345,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out_unlock;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
eb = (struct ec_cb *)&skb->cb;
@@ -366,7 +366,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
fh->cb = cb;
fh->port = port;
if (sock->type != SOCK_DGRAM) {
- skb->tail = skb->data;
+ skb_reset_tail_pointer(skb);
skb->len = 0;
} else if (res < 0)
goto out_free;
@@ -727,6 +727,9 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
case SIOCGSTAMP:
return sock_get_timestamp(sk, argp);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, argp);
+
case SIOCSIFADDR:
case SIOCGIFADDR:
return ec_dev_ioctl(sock, cmd, argp);
@@ -845,7 +848,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
{
- struct iphdr *ip = skb->nh.iph;
+ struct iphdr *ip = ip_hdr(skb);
unsigned char stn = ntohl(ip->saddr) & 0xff;
struct sock *sk;
struct sk_buff *newskb;
@@ -940,10 +943,10 @@ static void aun_data_available(struct sock *sk, int slen)
printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
}
- data = skb->h.raw + sizeof(struct udphdr);
+ data = skb_transport_header(skb) + sizeof(struct udphdr);
ah = (struct aunhdr *)data;
len = skb->len - sizeof(struct udphdr);
- ip = skb->nh.iph;
+ ip = ip_hdr(skb);
switch (ah->code)
{
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 7391f55904d..0ac2524f3b6 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -156,7 +156,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
struct ethhdr *eth;
unsigned char *rawp;
- skb->mac.raw = skb->data;
+ skb->dev = dev;
+ skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
eth = eth_hdr(skb);
@@ -228,7 +229,7 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
eth = (struct ethhdr *)
(((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
- if (type == __constant_htons(ETH_P_802_3))
+ if (type == htons(ETH_P_802_3))
return -1;
eth->h_proto = type;
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 6ef766ef961..1438adedbc8 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -56,7 +56,8 @@ config IEEE80211_CRYPT_CCMP
config IEEE80211_CRYPT_TKIP
tristate "IEEE 802.11i TKIP encryption"
- depends on IEEE80211 && NET_RADIO
+ depends on IEEE80211
+ select WIRELESS_EXT
select CRYPTO
select CRYPTO_MICHAEL_MIC
select CRYPTO_ECB
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
index 5ed0a98b2d7..df5592c9339 100644
--- a/net/ieee80211/ieee80211_crypt.c
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -1,7 +1,7 @@
/*
* Host AP crypto routines
*
- * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
* Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 35aa3426c3f..b016b4104de 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -1,7 +1,7 @@
/*
* Host AP crypt: host-based CCMP encryption implementation for Host AP driver
*
- * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -338,7 +338,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
if (ccmp_replay_check(pn, key->rx_pn)) {
if (net_ratelimit()) {
- printk(KERN_DEBUG "CCMP: replay detected: STA=" MAC_FMT
+ IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=" MAC_FMT
" previous PN %02x%02x%02x%02x%02x%02x "
"received PN %02x%02x%02x%02x%02x%02x\n",
MAC_ARG(hdr->addr2), MAC_ARG(key->rx_pn),
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index fc1f99a5973..5a48d8e0aec 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -1,7 +1,7 @@
/*
* Host AP crypt: host-based TKIP encryption implementation for Host AP driver
*
- * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -465,7 +465,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
if (net_ratelimit()) {
- printk(KERN_DEBUG "TKIP: replay detected: STA=" MAC_FMT
+ IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=" MAC_FMT
" previous TSC %08x%04x received TSC "
"%08x%04x\n", MAC_ARG(hdr->addr2),
tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
@@ -507,7 +507,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
tkey->rx_phase1_done = 0;
}
if (net_ratelimit()) {
- printk(KERN_DEBUG "TKIP: ICV error detected: STA="
+ IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA="
MAC_FMT "\n", MAC_ARG(hdr->addr2));
}
tkey->dot11RSNAStatsTKIPICVErrors++;
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index ec6d8851a06..8d182459344 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -1,7 +1,7 @@
/*
* Host AP crypt: host-based WEP encryption implementation for Host AP driver
*
- * Copyright (c) 2002-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -152,7 +152,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
return -1;
/* Copy the IV into the first 3 bytes of the key */
- memcpy(key, skb->data + hdr_len, 3);
+ skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
/* Copy rest of the WEP key (the secret part) */
memcpy(key + 3, wep->key, wep->key_len);
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index b1c6d1f717d..7ec6610841b 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -5,8 +5,8 @@
Portions of this file are based on the WEP enablement code provided by the
Host AP project hostap-drivers v0.1.3
Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- <jkmaline@cc.hut.fi>
- Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ <j@w1.fi>
+ Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as
@@ -229,6 +229,7 @@ void free_ieee80211(struct net_device *dev)
static int debug = 0;
u32 ieee80211_debug_level = 0;
+EXPORT_SYMBOL_GPL(ieee80211_debug_level);
static struct proc_dir_entry *ieee80211_proc = NULL;
static int show_debug_level(char *page, char **start, off_t offset,
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 4084909f6f9..f2de2e48b02 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -3,8 +3,8 @@
* for Intersil Prism2/2.5/3 - hostap.o module, common routines
*
* Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- * <jkmaline@cc.hut.fi>
- * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * <j@w1.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
* Copyright (c) 2004-2005, Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
u16 fc = le16_to_cpu(hdr->frame_ctl);
skb->dev = ieee->dev;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_pull(skb, ieee80211_get_hdrlen(fc));
skb->pkt_type = PACKET_OTHERHOST;
skb->protocol = __constant_htons(ETH_P_80211_RAW);
@@ -606,12 +606,12 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
if (frag == 0) {
/* copy first fragment (including full headers) into
* beginning of the fragment cache skb */
- memcpy(skb_put(frag_skb, flen), skb->data, flen);
+ skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen);
} else {
/* append frame payload to the end of the fragment
* cache skb */
- memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
- flen);
+ skb_copy_from_linear_data_offset(skb, hdrlen,
+ skb_put(frag_skb, flen), flen);
}
dev_kfree_skb_any(skb);
skb = NULL;
@@ -759,8 +759,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) {
/* Non-standard frame: get addr4 from its bogus location after
* the payload */
- memcpy(skb->data + ETH_ALEN,
- skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+ skb_copy_to_linear_data_offset(skb, ETH_ALEN,
+ skb->data + skb->len - ETH_ALEN,
+ ETH_ALEN);
skb_trim(skb, skb->len - ETH_ALEN);
}
#endif
@@ -789,10 +790,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
if (skb2 != NULL) {
/* send to wireless media */
- skb2->protocol = __constant_htons(ETH_P_802_3);
- skb2->mac.raw = skb2->nh.raw = skb2->data;
- /* skb2->nh.raw = skb2->data + ETH_HLEN; */
skb2->dev = dev;
+ skb2->protocol = __constant_htons(ETH_P_802_3);
+ skb_reset_mac_header(skb2);
+ skb_reset_network_header(skb2);
+ /* skb2->network_header += ETH_HLEN; */
dev_queue_xmit(skb2);
}
#endif
@@ -800,7 +802,6 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
if (skb) {
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- skb->dev = dev;
skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */
if (netif_rx(skb) == NET_RX_DROP) {
/* netif_rx always succeeds, but it might drop
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 0292d6348e1..a4c3c51140a 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -225,10 +225,10 @@ static int ieee80211_classify(struct sk_buff *skb)
struct iphdr *ip;
eth = (struct ethhdr *)skb->data;
- if (eth->h_proto != __constant_htons(ETH_P_IP))
+ if (eth->h_proto != htons(ETH_P_IP))
return 0;
- ip = skb->nh.iph;
+ ip = ip_hdr(skb);
switch (ip->tos & 0xfc) {
case 0x20:
return 2;
@@ -309,8 +309,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
}
/* Save source and destination addresses */
- memcpy(dest, skb->data, ETH_ALEN);
- memcpy(src, skb->data + ETH_ALEN, ETH_ALEN);
+ skb_copy_from_linear_data(skb, dest, ETH_ALEN);
+ skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN);
if (host_encrypt || host_build_iv)
fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
@@ -363,7 +363,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
snapped = 1;
ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
ether_type);
- memcpy(skb_put(skb_new, skb->len), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len);
res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv);
if (res < 0) {
IEEE80211_ERROR("msdu encryption failed\n");
@@ -492,7 +492,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
bytes -= SNAP_SIZE + sizeof(u16);
}
- memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+ skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes);
/* Advance the SKB... */
skb_pull(skb, bytes);
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 40d7a55fe03..cee5e13bc42 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -5,8 +5,8 @@
Portions of this file are based on the WEP enablement code provided by the
Host AP project hostap-drivers v0.1.3
Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- <jkmaline@cc.hut.fi>
- Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ <j@w1.fi>
+ Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9e8ef509c51..e62aee0ec4c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -574,6 +574,33 @@ config TCP_CONG_VENO
loss packets.
See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
+config TCP_CONG_YEAH
+ tristate "YeAH TCP"
+ depends on EXPERIMENTAL
+ default n
+ ---help---
+ YeAH-TCP is a sender-side high-speed enabled TCP congestion control
+ algorithm, which uses a mixed loss/delay approach to compute the
+ congestion window. It's design goals target high efficiency,
+ internal, RTT and Reno fairness, resilience to link loss while
+ keeping network elements load as low as possible.
+
+ For further details look here:
+ http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+
+config TCP_CONG_ILLINOIS
+ tristate "TCP Illinois"
+ depends on EXPERIMENTAL
+ default n
+ ---help---
+ TCP-Illinois is a sender-side modificatio of TCP Reno for
+ high speed long delay links. It uses round-trip-time to
+ adjust the alpha and beta parameters to achieve a higher average
+ throughput and maintain fairness.
+
+ For further details see:
+ http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+
choice
prompt "Default TCP congestion control"
default DEFAULT_CUBIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7a068626fee..4ff6c151d7f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
+obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf358c84c44..16aae8ef555 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -87,6 +87,7 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -217,6 +218,26 @@ out:
return err;
}
+u32 inet_ehash_secret __read_mostly;
+EXPORT_SYMBOL(inet_ehash_secret);
+
+/*
+ * inet_ehash_secret must be set exactly once
+ * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
+ */
+void build_ehash_secret(void)
+{
+ u32 rnd;
+ do {
+ get_random_bytes(&rnd, sizeof(rnd));
+ } while (rnd == 0);
+ spin_lock_bh(&inetsw_lock);
+ if (!inet_ehash_secret)
+ inet_ehash_secret = rnd;
+ spin_unlock_bh(&inetsw_lock);
+}
+EXPORT_SYMBOL(build_ehash_secret);
+
/*
* Create an inet socket.
*/
@@ -233,6 +254,11 @@ static int inet_create(struct socket *sock, int protocol)
int try_loading_module = 0;
int err;
+ if (sock->type != SOCK_RAW &&
+ sock->type != SOCK_DGRAM &&
+ !inet_ehash_secret)
+ build_ehash_secret();
+
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
@@ -755,6 +781,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
err = sock_get_timestamp(sk, (struct timeval __user *)arg);
break;
+ case SIOCGSTAMPNS:
+ err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+ break;
case SIOCADDRT:
case SIOCDELRT:
case SIOCRTMSG:
@@ -1109,7 +1138,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (ihl < sizeof(*iph))
goto out;
@@ -1117,8 +1146,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
- skb->h.raw = __skb_pull(skb, ihl);
- iph = skb->nh.iph;
+ __skb_pull(skb, ihl);
+ skb_reset_transport_header(skb);
+ iph = ip_hdr(skb);
proto = iph->protocol & (MAX_INET_PROTOS - 1);
err = -EPROTONOSUPPORT;
@@ -1152,7 +1182,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (ihl < sizeof(*iph))
goto out;
@@ -1160,8 +1190,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
- skb->h.raw = __skb_pull(skb, ihl);
- iph = skb->nh.iph;
+ __skb_pull(skb, ihl);
+ skb_reset_transport_header(skb);
+ iph = ip_hdr(skb);
id = ntohs(iph->id);
proto = iph->protocol & (MAX_INET_PROTOS - 1);
segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -1177,17 +1208,57 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
skb = segs;
do {
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->id = htons(id++);
iph->tot_len = htons(skb->len - skb->mac_len);
iph->check = 0;
- iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+ iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
} while ((skb = skb->next));
out:
return segs;
}
+unsigned long snmp_fold_field(void *mib[], int offt)
+{
+ unsigned long res = 0;
+ int i;
+
+ for_each_possible_cpu(i) {
+ res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+ res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+ }
+ return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field);
+
+int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+ BUG_ON(ptr == NULL);
+ ptr[0] = __alloc_percpu(mibsize);
+ if (!ptr[0])
+ goto err0;
+ ptr[1] = __alloc_percpu(mibsize);
+ if (!ptr[1])
+ goto err1;
+ return 0;
+err1:
+ free_percpu(ptr[0]);
+ ptr[0] = NULL;
+err0:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_init);
+
+void snmp_mib_free(void *ptr[2])
+{
+ BUG_ON(ptr == NULL);
+ free_percpu(ptr[0]);
+ free_percpu(ptr[1]);
+ ptr[0] = ptr[1] = NULL;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_free);
+
#ifdef CONFIG_IP_MULTICAST
static struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
@@ -1214,28 +1285,47 @@ static struct net_protocol icmp_protocol = {
static int __init init_ipv4_mibs(void)
{
- net_statistics[0] = alloc_percpu(struct linux_mib);
- net_statistics[1] = alloc_percpu(struct linux_mib);
- ip_statistics[0] = alloc_percpu(struct ipstats_mib);
- ip_statistics[1] = alloc_percpu(struct ipstats_mib);
- icmp_statistics[0] = alloc_percpu(struct icmp_mib);
- icmp_statistics[1] = alloc_percpu(struct icmp_mib);
- tcp_statistics[0] = alloc_percpu(struct tcp_mib);
- tcp_statistics[1] = alloc_percpu(struct tcp_mib);
- udp_statistics[0] = alloc_percpu(struct udp_mib);
- udp_statistics[1] = alloc_percpu(struct udp_mib);
- udplite_statistics[0] = alloc_percpu(struct udp_mib);
- udplite_statistics[1] = alloc_percpu(struct udp_mib);
- if (!
- (net_statistics[0] && net_statistics[1] && ip_statistics[0]
- && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
- && udp_statistics[0] && udp_statistics[1]
- && udplite_statistics[0] && udplite_statistics[1] ) )
- return -ENOMEM;
-
- (void) tcp_mib_init();
+ if (snmp_mib_init((void **)net_statistics,
+ sizeof(struct linux_mib),
+ __alignof__(struct linux_mib)) < 0)
+ goto err_net_mib;
+ if (snmp_mib_init((void **)ip_statistics,
+ sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip_mib;
+ if (snmp_mib_init((void **)icmp_statistics,
+ sizeof(struct icmp_mib),
+ __alignof__(struct icmp_mib)) < 0)
+ goto err_icmp_mib;
+ if (snmp_mib_init((void **)tcp_statistics,
+ sizeof(struct tcp_mib),
+ __alignof__(struct tcp_mib)) < 0)
+ goto err_tcp_mib;
+ if (snmp_mib_init((void **)udp_statistics,
+ sizeof(struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udp_mib;
+ if (snmp_mib_init((void **)udplite_statistics,
+ sizeof(struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udplite_mib;
+
+ tcp_mib_init();
return 0;
+
+err_udplite_mib:
+ snmp_mib_free((void **)udp_statistics);
+err_udp_mib:
+ snmp_mib_free((void **)tcp_statistics);
+err_tcp_mib:
+ snmp_mib_free((void **)icmp_statistics);
+err_icmp_mib:
+ snmp_mib_free((void **)ip_statistics);
+err_ip_mib:
+ snmp_mib_free((void **)net_statistics);
+err_net_mib:
+ return -ENOMEM;
}
static int ipv4_proc_init(void);
@@ -1336,7 +1426,7 @@ static int __init inet_init(void)
* Initialise per-cpu ipv4 mibs
*/
- if(init_ipv4_mibs())
+ if (init_ipv4_mibs())
printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
ipv4_proc_init();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7194eb40b6d..6da8ff597ad 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -65,7 +65,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
char buf[60];
} tmp_iph;
- top_iph = skb->nh.iph;
+ top_iph = ip_hdr(skb);
iph = &tmp_iph.iph;
iph->tos = top_iph->tos;
@@ -152,9 +152,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
ah = (struct ip_auth_hdr*)skb->data;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
- ihl = skb->data - skb->nh.raw;
+ ihl = skb->data - skb_network_header(skb);
memcpy(work_buf, iph, ihl);
iph->ttl = 0;
@@ -181,7 +181,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
}
}
((struct iphdr*)work_buf)->protocol = ah->nexthdr;
- skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl);
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), work_buf, ihl);
+ skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + ihl);
return 0;
@@ -196,8 +198,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
- skb->h.icmph->code != ICMP_FRAG_NEEDED)
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1a3488a83f4..7110779a024 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -342,13 +342,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
default:
case 0: /* By default announce any local IP */
- if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
- saddr = skb->nh.iph->saddr;
+ if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
+ saddr = ip_hdr(skb)->saddr;
break;
case 1: /* Restrict announcements of saddr in same subnet */
if (!skb)
break;
- saddr = skb->nh.iph->saddr;
+ saddr = ip_hdr(skb)->saddr;
if (inet_addr_type(saddr) == RTN_LOCAL) {
/* saddr should be known to target */
if (inet_addr_onlink(in_dev, target, saddr))
@@ -578,7 +578,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
return NULL;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
skb->dev = dev;
skb->protocol = htons(ETH_P_ARP);
@@ -721,7 +721,7 @@ static int arp_process(struct sk_buff *skb)
if (in_dev == NULL)
goto out;
- arp = skb->nh.arph;
+ arp = arp_hdr(skb);
switch (dev_type) {
default:
@@ -937,7 +937,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
(2 * sizeof(u32)))))
goto freeskb;
- arp = skb->nh.arph;
+ arp = arp_hdr(skb);
if (arp->ar_hln != dev->addr_len ||
dev->flags & IFF_NOARP ||
skb->pkt_type == PACKET_OTHERHOST ||
@@ -1178,7 +1178,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
goto out;
}
- switch(cmd) {
+ switch (cmd) {
case SIOCDARP:
err = arp_req_delete(&r, dev);
break;
@@ -1360,7 +1360,7 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
/* ------------------------------------------------------------------------ */
-static struct seq_operations arp_seq_ops = {
+static const struct seq_operations arp_seq_ops = {
.start = arp_seq_start,
.next = neigh_seq_next,
.stop = neigh_seq_stop,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ce5b693a8b..e1f18489db1 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -92,6 +92,33 @@ int cipso_v4_rbm_optfmt = 0;
int cipso_v4_rbm_strictvalid = 1;
/*
+ * Protocol Constants
+ */
+
+/* Maximum size of the CIPSO IP option, derived from the fact that the maximum
+ * IPv4 header size is 60 bytes and the base IPv4 header is 20 bytes long. */
+#define CIPSO_V4_OPT_LEN_MAX 40
+
+/* Length of the base CIPSO option, this includes the option type (1 byte), the
+ * option length (1 byte), and the DOI (4 bytes). */
+#define CIPSO_V4_HDR_LEN 6
+
+/* Base length of the restrictive category bitmap tag (tag #1). */
+#define CIPSO_V4_TAG_RBM_BLEN 4
+
+/* Base length of the enumerated category tag (tag #2). */
+#define CIPSO_V4_TAG_ENUM_BLEN 4
+
+/* Base length of the ranged categories bitmap tag (tag #5). */
+#define CIPSO_V4_TAG_RNG_BLEN 4
+/* The maximum number of category ranges permitted in the ranged category tag
+ * (tag #5). You may note that the IETF draft states that the maximum number
+ * of category ranges is 7, but if the low end of the last category range is
+ * zero then it is possibile to fit 8 category ranges because the zero should
+ * be omitted. */
+#define CIPSO_V4_TAG_RNG_CAT_MAX 8
+
+/*
* Helper Functions
*/
@@ -1109,16 +1136,15 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def,
unsigned char *net_cat,
u32 net_cat_len)
{
- /* The constant '16' is not random, it is the maximum number of
- * high/low category range pairs as permitted by the CIPSO draft based
- * on a maximum IPv4 header length of 60 bytes - the BUG_ON() assertion
- * does a sanity check to make sure we don't overflow the array. */
int iter = -1;
- u16 array[16];
+ u16 array[CIPSO_V4_TAG_RNG_CAT_MAX * 2];
u32 array_cnt = 0;
u32 cat_size = 0;
- BUG_ON(net_cat_len > 30);
+ /* make sure we don't overflow the 'array[]' variable */
+ if (net_cat_len >
+ (CIPSO_V4_OPT_LEN_MAX - CIPSO_V4_HDR_LEN - CIPSO_V4_TAG_RNG_BLEN))
+ return -ENOSPC;
for (;;) {
iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1);
@@ -1174,7 +1200,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
u16 cat_low;
u16 cat_high;
- for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
+ for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
if ((net_iter + 4) <= net_cat_len)
cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
@@ -1196,9 +1222,6 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
* Protocol Handling Functions
*/
-#define CIPSO_V4_OPT_LEN_MAX 40
-#define CIPSO_V4_HDR_LEN 6
-
/**
* cipso_v4_gentag_hdr - Generate a CIPSO option header
* @doi_def: the DOI definition
@@ -1676,7 +1699,7 @@ validate_return:
*/
void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
- if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
if (gateway)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 98a00d0edc7..7f95e6e9bee 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -48,7 +48,6 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/inetdevice.h>
@@ -62,7 +61,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
struct ipv4_devconf ipv4_devconf = {
.accept_redirects = 1,
@@ -633,7 +632,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
dev_load(ifr.ifr_name);
#endif
- switch(cmd) {
+ switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
case SIOCGIFBRDADDR: /* Get the broadcast address */
case SIOCGIFDSTADDR: /* Get the destination address */
@@ -708,7 +707,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
goto done;
- switch(cmd) {
+ switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
sin->sin_addr.s_addr = ifa->ifa_local;
goto rarok;
@@ -911,7 +910,7 @@ no_in_dev:
*/
read_lock(&dev_base_lock);
rcu_read_lock();
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
@@ -990,7 +989,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local,
read_lock(&dev_base_lock);
rcu_read_lock();
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if ((in_dev = __in_dev_get_rcu(dev))) {
addr = confirm_addr_indev(in_dev, dst, local, scope);
if (addr)
@@ -1183,34 +1182,29 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
int s_ip_idx, s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_ip_idx = 0;
- rcu_read_lock();
- if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
- rcu_read_unlock();
- continue;
- }
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
+ goto cont;
for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
ifa = ifa->ifa_next, ip_idx++) {
if (ip_idx < s_ip_idx)
- continue;
+ goto cont;
if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- RTM_NEWADDR, NLM_F_MULTI) <= 0) {
- rcu_read_unlock();
+ RTM_NEWADDR, NLM_F_MULTI) <= 0)
goto done;
- }
}
- rcu_read_unlock();
+cont:
+ idx++;
}
done:
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
cb->args[1] = ip_idx;
@@ -1241,19 +1235,6 @@ errout:
rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
}
-static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
- [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
- [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
- [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
- [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
- .dumpit = inet_dump_fib, },
-#ifdef CONFIG_IP_MULTIPLE_TABLES
- [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
-#endif
-};
-
#ifdef CONFIG_SYSCTL
void inet_forward_change(void)
@@ -1265,7 +1246,7 @@ void inet_forward_change(void)
ipv4_devconf_dflt.forwarding = on;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
struct in_device *in_dev;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
@@ -1636,7 +1617,10 @@ void __init devinet_init(void)
{
register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
- rtnetlink_links[PF_INET] = inet_rtnetlink_table;
+
+ rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
+ rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
#ifdef CONFIG_SYSCTL
devinet_sysctl.sysctl_header =
register_sysctl_table(devinet_sysctl.devinet_root_dir);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 31041127eeb..47c95e8ef04 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -21,13 +21,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct blkcipher_desc desc;
struct esp_data *esp;
struct sk_buff *trailer;
+ u8 *tail;
int blksize;
int clen;
int alen;
int nfrags;
/* Strip IP+ESP header. */
- __skb_pull(skb, skb->h.raw - skb->data);
+ __skb_pull(skb, skb_transport_offset(skb));
/* Now skb is pure payload to encrypt */
err = -ENOMEM;
@@ -49,19 +50,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
goto error;
/* Fill padding... */
+ tail = skb_tail_pointer(trailer);
do {
int i;
for (i=0; i<clen-skb->len - 2; i++)
- *(u8*)(trailer->tail + i) = i+1;
+ tail[i] = i + 1;
} while (0);
- *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ tail[clen - skb->len - 2] = (clen - skb->len) - 2;
pskb_put(skb, trailer, clen - skb->len);
- __skb_push(skb, skb->data - skb->nh.raw);
- top_iph = skb->nh.iph;
- esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+ __skb_push(skb, skb->data - skb_network_header(skb));
+ top_iph = ip_hdr(skb);
+ esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
+ top_iph->ihl * 4);
top_iph->tot_len = htons(skb->len + alen);
- *(u8*)(trailer->tail - 1) = top_iph->protocol;
+ *(skb_tail_pointer(trailer) - 1) = top_iph->protocol;
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
@@ -217,12 +220,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
/* ... check padding bits here. Silly. :-) */
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
- struct udphdr *uh = (void *)(skb->nh.raw + ihl);
+ struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
/*
* 1) if the NAT-T peer's IP or port changed then
@@ -260,7 +263,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
iph->protocol = nexthdr[1];
pskb_trim(skb, skb->len - alen - padlen - 2);
- skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl;
+ __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+ skb_set_transport_header(skb, -ihl);
return 0;
@@ -268,32 +272,33 @@ out:
return -EINVAL;
}
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
- int enclen = 0;
+ u32 align = max_t(u32, blksize, esp->conf.padlen);
+ u32 rem;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ rem = mtu & (align - 1);
+ mtu &= ~(align - 1);
switch (x->props.mode) {
case XFRM_MODE_TUNNEL:
- mtu = ALIGN(mtu +2, blksize);
break;
default:
case XFRM_MODE_TRANSPORT:
/* The worst case */
- mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+ mtu -= blksize - 4;
+ mtu += min_t(u32, blksize - 4, rem);
break;
case XFRM_MODE_BEET:
/* The worst case. */
- enclen = IPV4_BEET_PHMAXLEN;
- mtu = ALIGN(mtu + enclen + 2, blksize);
+ mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
break;
}
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
-
- return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
+ return mtu - 2;
}
static void esp4_err(struct sk_buff *skb, u32 info)
@@ -302,8 +307,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
- skb->h.icmph->code != ICMP_FRAG_NEEDED)
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
@@ -336,6 +341,7 @@ static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp = NULL;
struct crypto_blkcipher *tfm;
+ u32 align;
/* null auth and encryption can have zero length keys */
if (x->aalg) {
@@ -402,6 +408,8 @@ static int esp_init_state(struct xfrm_state *x)
x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
+ else if (x->props.mode == XFRM_MODE_BEET)
+ x->props.header_len += IPV4_BEET_PHMAXLEN;
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
@@ -417,7 +425,10 @@ static int esp_init_state(struct xfrm_state *x)
}
}
x->data = esp;
- x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+ align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+ if (esp->conf.padlen)
+ align = max_t(u32, align, esp->conf.padlen);
+ x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len;
return 0;
error:
@@ -434,7 +445,7 @@ static struct xfrm_type esp_type =
.proto = IPPROTO_ESP,
.init_state = esp_init_state,
.destructor = esp_destroy,
- .get_max_size = esp4_get_max_size,
+ .get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output
};
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cac06c43f00..837f2957fa8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,7 +34,6 @@
#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
-#include <linux/netlink.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -46,6 +45,7 @@
#include <net/icmp.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/rtnetlink.h>
#define FFprint(a...) printk(KERN_DEBUG a)
@@ -540,7 +540,7 @@ errout:
return err;
}
-int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_config cfg;
struct fib_table *tb;
@@ -561,7 +561,7 @@ errout:
return err;
}
-int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_config cfg;
struct fib_table *tb;
@@ -582,7 +582,7 @@ errout:
return err;
}
-int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
unsigned int h, s_h;
unsigned int e = 0, s_e;
@@ -777,6 +777,10 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
.tos = frn->fl_tos,
.scope = frn->fl_scope } } };
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ res.r = NULL;
+#endif
+
frn->err = -ENOENT;
if (tb) {
local_bh_disable();
@@ -807,7 +811,7 @@ static void nl_fib_input(struct sock *sk, int len)
if (skb == NULL)
return;
- nlh = (struct nlmsghdr *)skb->data;
+ nlh = nlmsg_hdr(skb);
if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
kfree_skb(skb);
@@ -827,7 +831,8 @@ static void nl_fib_input(struct sock *sk, int len)
static void nl_fib_lookup_init(void)
{
- netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
+ netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+ THIS_MODULE);
}
static void fib_disable_ip(struct net_device *dev, int force)
@@ -925,6 +930,10 @@ void __init ip_fib_init(void)
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
nl_fib_lookup_init();
+
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
}
EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a4949f957ab..9cfecf1215c 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1027,7 +1027,7 @@ out:
return 0;
}
-static struct seq_operations fib_seq_ops = {
+static const struct seq_operations fib_seq_ops = {
.start = fib_seq_start,
.next = fib_seq_next,
.stop = fib_seq_stop,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index c660c074c76..33083ad52e9 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -274,11 +274,6 @@ nla_put_failure:
return -ENOBUFS;
}
-int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return fib_rules_dump(skb, cb, AF_INET);
-}
-
static u32 fib4_rule_default_pref(void)
{
struct list_head *pos;
@@ -303,6 +298,11 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(4); /* flow */
}
+static void fib4_rule_flush_cache(void)
+{
+ rt_cache_flush(-1);
+}
+
static struct fib_rules_ops fib4_rules_ops = {
.family = AF_INET,
.rule_size = sizeof(struct fib4_rule),
@@ -314,6 +314,7 @@ static struct fib_rules_ops fib4_rules_ops = {
.fill = fib4_rule_fill,
.default_pref = fib4_rule_default_pref,
.nlmsg_payload = fib4_rule_nlmsg_payload,
+ .flush_cache = fib4_rule_flush_cache,
.nlgroup = RTNLGRP_IPV4_RULE,
.policy = fib4_rule_policy,
.rules_list = &fib4_rules,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3dad12ee76c..406ea7050ae 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
default:
printk(KERN_DEBUG "impossible 102\n");
return -EINVAL;
- };
+ }
}
return err;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 214c34732e8..9be7da7c3a8 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
* Patrick McHardy <kaber@trash.net>
*/
-#define VERSION "0.407"
+#define VERSION "0.408"
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -292,8 +292,8 @@ static inline void check_tnode(const struct tnode *tn)
static int halve_threshold = 25;
static int inflate_threshold = 50;
-static int halve_threshold_root = 15;
-static int inflate_threshold_root = 25;
+static int halve_threshold_root = 8;
+static int inflate_threshold_root = 15;
static void __alias_free_mem(struct rcu_head *head)
@@ -350,11 +350,10 @@ static void __tnode_free_rcu(struct rcu_head *head)
static inline void tnode_free(struct tnode *tn)
{
- if(IS_LEAF(tn)) {
+ if (IS_LEAF(tn)) {
struct leaf *l = (struct leaf *) tn;
call_rcu_bh(&l->rcu, __leaf_free_rcu);
- }
- else
+ } else
call_rcu(&tn->rcu, __tnode_free_rcu);
}
@@ -459,6 +458,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
struct tnode *old_tn;
int inflate_threshold_use;
int halve_threshold_use;
+ int max_resize;
if (!tn)
return NULL;
@@ -553,13 +553,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if(!tn->parent)
+ if (!tn->parent)
inflate_threshold_use = inflate_threshold_root;
else
inflate_threshold_use = inflate_threshold;
err = 0;
- while ((tn->full_children > 0 &&
+ max_resize = 10;
+ while ((tn->full_children > 0 && max_resize-- &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
inflate_threshold_use * tnode_child_length(tn))) {
@@ -574,6 +575,15 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
+ if (max_resize < 0) {
+ if (!tn->parent)
+ printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
+ inflate_threshold_root, tn->bits);
+ else
+ printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
+ inflate_threshold, tn->bits);
+ }
+
check_tnode(tn);
/*
@@ -584,13 +594,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if(!tn->parent)
+ if (!tn->parent)
halve_threshold_use = halve_threshold_root;
else
halve_threshold_use = halve_threshold;
err = 0;
- while (tn->bits > 1 &&
+ max_resize = 10;
+ while (tn->bits > 1 && max_resize-- &&
100 * (tnode_child_length(tn) - tn->empty_children) <
halve_threshold_use * tnode_child_length(tn)) {
@@ -605,6 +616,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
}
}
+ if (max_resize < 0) {
+ if (!tn->parent)
+ printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
+ halve_threshold_root, tn->bits);
+ else
+ printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
+ halve_threshold, tn->bits);
+ }
/* Only one child remains */
if (tn->empty_children == tnode_child_length(tn) - 1)
@@ -2039,12 +2058,12 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
{
struct node *n ;
- if(!t)
+ if (!t)
return NULL;
n = rcu_dereference(t->trie);
- if(!iter)
+ if (!iter)
return NULL;
if (n) {
@@ -2084,7 +2103,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
int i;
s->tnodes++;
- if(tn->bits < MAX_STAT_DEPTH)
+ if (tn->bits < MAX_STAT_DEPTH)
s->nodesizes[tn->bits]++;
for (i = 0; i < (1<<tn->bits); i++)
@@ -2250,7 +2269,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
{
static char buf[32];
- switch(s) {
+ switch (s) {
case RT_SCOPE_UNIVERSE: return "universe";
case RT_SCOPE_SITE: return "site";
case RT_SCOPE_LINK: return "link";
@@ -2340,7 +2359,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations fib_trie_seq_ops = {
+static const struct seq_operations fib_trie_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
@@ -2461,7 +2480,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations fib_route_seq_ops = {
+static const struct seq_operations fib_route_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b7a0d946a0..d38cbba92a4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -355,7 +355,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
ipc, rt, MSG_DONTWAIT) < 0)
ip_flush_pending_frames(icmp_socket->sk);
else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
- struct icmphdr *icmph = skb->h.icmph;
+ struct icmphdr *icmph = icmp_hdr(skb);
__wsum csum = 0;
struct sk_buff *skb1;
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_param->data.icmph.checksum = 0;
icmp_out_count(icmp_param->data.icmph.type);
- inet->tos = skb->nh.iph->tos;
+ inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
if (icmp_param->replyopts.optlen) {
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
- .tos = RT_TOS(skb->nh.iph->tos) } },
+ .tos = RT_TOS(ip_hdr(skb)->tos) } },
.proto = IPPROTO_ICMP };
security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
@@ -448,9 +448,10 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
* Check this, icmp_send is called from the most obscure devices
* sometimes.
*/
- iph = skb_in->nh.iph;
+ iph = ip_hdr(skb_in);
- if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail)
+ if ((u8 *)iph < skb_in->head ||
+ (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
goto out;
/*
@@ -484,7 +485,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
u8 _inner_type, *itp;
itp = skb_header_pointer(skb_in,
- skb_in->nh.raw +
+ skb_network_header(skb_in) +
(iph->ihl << 2) +
offsetof(struct icmphdr,
type) -
@@ -536,7 +537,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param.data.icmph.un.gateway = info;
icmp_param.data.icmph.checksum = 0;
icmp_param.skb = skb_in;
- icmp_param.offset = skb_in->nh.raw - skb_in->data;
+ icmp_param.offset = skb_network_offset(skb_in);
icmp_out_count(icmp_param.data.icmph.type);
inet_sk(icmp_socket->sk)->tos = tos;
ipc.addr = iph->saddr;
@@ -613,7 +614,7 @@ static void icmp_unreach(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out_err;
- icmph = skb->h.icmph;
+ icmph = icmp_hdr(skb);
iph = (struct iphdr *)skb->data;
if (iph->ihl < 5) /* Mangled header, drop. */
@@ -676,7 +677,7 @@ static void icmp_unreach(struct sk_buff *skb)
printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
"type %u, code %u "
"error to a broadcast: %u.%u.%u.%u on %s\n",
- NIPQUAD(skb->nh.iph->saddr),
+ NIPQUAD(ip_hdr(skb)->saddr),
icmph->type, icmph->code,
NIPQUAD(iph->daddr),
skb->dev->name);
@@ -743,7 +744,7 @@ static void icmp_redirect(struct sk_buff *skb)
iph = (struct iphdr *)skb->data;
- switch (skb->h.icmph->code & 7) {
+ switch (icmp_hdr(skb)->code & 7) {
case ICMP_REDIR_NET:
case ICMP_REDIR_NETTOS:
/*
@@ -751,8 +752,8 @@ static void icmp_redirect(struct sk_buff *skb)
*/
case ICMP_REDIR_HOST:
case ICMP_REDIR_HOSTTOS:
- ip_rt_redirect(skb->nh.iph->saddr, iph->daddr,
- skb->h.icmph->un.gateway,
+ ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
+ icmp_hdr(skb)->un.gateway,
iph->saddr, skb->dev);
break;
}
@@ -780,7 +781,7 @@ static void icmp_echo(struct sk_buff *skb)
if (!sysctl_icmp_echo_ignore_all) {
struct icmp_bxm icmp_param;
- icmp_param.data.icmph = *skb->h.icmph;
+ icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
icmp_param.skb = skb;
icmp_param.offset = 0;
@@ -816,7 +817,7 @@ static void icmp_timestamp(struct sk_buff *skb)
icmp_param.data.times[2] = icmp_param.data.times[1];
if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
BUG();
- icmp_param.data.icmph = *skb->h.icmph;
+ icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
icmp_param.data.icmph.code = 0;
icmp_param.skb = skb;
@@ -943,7 +944,7 @@ int icmp_rcv(struct sk_buff *skb)
if (!pskb_pull(skb, sizeof(struct icmphdr)))
goto error;
- icmph = skb->h.icmph;
+ icmph = icmp_hdr(skb);
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8cedb2a2c9d..f4dd4745310 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -314,7 +314,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+ skb_reset_network_header(skb);
+ pip = ip_hdr(skb);
+ skb_put(skb, sizeof(struct iphdr) + 4);
pip->version = 4;
pip->ihl = (sizeof(struct iphdr)+4)>>2;
@@ -331,8 +333,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
((u8*)&pip[1])[2] = 0;
((u8*)&pip[1])[3] = 0;
- pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig));
- skb->h.igmph = (struct igmphdr *)pig;
+ skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
+ skb_put(skb, sizeof(*pig));
+ pig = igmpv3_report_hdr(skb);
pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
pig->resv1 = 0;
pig->csum = 0;
@@ -343,16 +346,14 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
static int igmpv3_sendpack(struct sk_buff *skb)
{
- struct iphdr *pip = skb->nh.iph;
- struct igmphdr *pig = skb->h.igmph;
- int iplen, igmplen;
+ struct iphdr *pip = ip_hdr(skb);
+ struct igmphdr *pig = igmp_hdr(skb);
+ const int iplen = skb->tail - skb->network_header;
+ const int igmplen = skb->tail - skb->transport_header;
- iplen = skb->tail - (unsigned char *)skb->nh.iph;
pip->tot_len = htons(iplen);
ip_send_check(pip);
-
- igmplen = skb->tail - (unsigned char *)skb->h.igmph;
- pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
+ pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
dst_output);
@@ -379,7 +380,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_auxwords = 0;
pgr->grec_nsrcs = 0;
pgr->grec_mca = pmc->multiaddr;
- pih = (struct igmpv3_report *)skb->h.igmph;
+ pih = igmpv3_report_hdr(skb);
pih->ngrec = htons(ntohs(pih->ngrec)+1);
*ppgr = pgr;
return skb;
@@ -412,7 +413,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (!*psf_list)
goto empty_source;
- pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
+ pih = skb ? igmpv3_report_hdr(skb) : NULL;
/* EX and TO_EX get a fresh packet, if needed */
if (truncate) {
@@ -664,7 +665,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ skb_put(skb, sizeof(struct iphdr) + 4);
iph->version = 4;
iph->ihl = (sizeof(struct iphdr)+4)>>2;
@@ -827,8 +830,8 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
int len)
{
- struct igmphdr *ih = skb->h.igmph;
- struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
+ struct igmphdr *ih = igmp_hdr(skb);
+ struct igmpv3_query *ih3 = igmpv3_query_hdr(skb);
struct ip_mc_list *im;
__be32 group = ih->group;
int max_delay;
@@ -861,12 +864,12 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
return;
- ih3 = (struct igmpv3_query *) skb->h.raw;
+ ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs) {
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
+ ntohs(ih3->nsrcs)*sizeof(__be32)))
return;
- ih3 = (struct igmpv3_query *) skb->h.raw;
+ ih3 = igmpv3_query_hdr(skb);
}
max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
@@ -943,7 +946,7 @@ int igmp_rcv(struct sk_buff *skb)
goto drop;
}
- ih = skb->h.igmph;
+ ih = igmp_hdr(skb);
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_QUERY:
igmp_heard_query(in_dev, skb, len);
@@ -2285,9 +2288,8 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
struct ip_mc_list *im = NULL;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
- for (state->dev = dev_base, state->in_dev = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->in_dev = NULL;
+ for_each_netdev(state->dev) {
struct in_device *in_dev;
in_dev = in_dev_get(state->dev);
if (!in_dev)
@@ -2313,7 +2315,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
read_unlock(&state->in_dev->mc_list_lock);
in_dev_put(state->in_dev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->in_dev = NULL;
break;
@@ -2397,7 +2399,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations igmp_mc_seq_ops = {
+static const struct seq_operations igmp_mc_seq_ops = {
.start = igmp_mc_seq_start,
.next = igmp_mc_seq_next,
.stop = igmp_mc_seq_stop,
@@ -2447,9 +2449,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
struct ip_mc_list *im = NULL;
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ state->im = NULL;
+ for_each_netdev(state->dev) {
struct in_device *idev;
idev = in_dev_get(state->dev);
if (unlikely(idev == NULL))
@@ -2485,7 +2487,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
read_unlock(&state->idev->mc_list_lock);
in_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
goto out;
@@ -2571,7 +2573,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations igmp_mcf_seq_ops = {
+static const struct seq_operations igmp_mcf_seq_ops = {
.start = igmp_mcf_seq_start,
.next = igmp_mcf_seq_next,
.stop = igmp_mcf_seq_stop,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5df71cd08da..dbeacd8b0f9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -27,6 +27,7 @@
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
#include <net/inet6_hashtables.h>
+#include <net/netlink.h>
#include <linux/inet.h>
#include <linux/stddef.h>
@@ -60,7 +61,7 @@ static int inet_csk_diag_fill(struct sock *sk,
struct nlmsghdr *nlh;
void *info = NULL;
struct inet_diag_meminfo *minfo = NULL;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
const struct inet_diag_handler *handler;
handler = inet_diag_table[unlh->nlmsg_type];
@@ -147,12 +148,12 @@ static int inet_csk_diag_fill(struct sock *sk,
icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
icsk->icsk_ca_ops->get_info(sk, ext, skb);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -EMSGSIZE;
}
@@ -163,7 +164,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
{
long tmo;
struct inet_diag_msg *r;
- const unsigned char *previous_tail = skb->tail;
+ const unsigned char *previous_tail = skb_tail_pointer(skb);
struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
unlh->nlmsg_type, sizeof(*r));
@@ -205,10 +206,10 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
&tw6->tw_v6_daddr);
}
#endif
- nlh->nlmsg_len = skb->tail - previous_tail;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
return skb->len;
nlmsg_failure:
- skb_trim(skb, previous_tail - skb->data);
+ nlmsg_trim(skb, previous_tail);
return -EMSGSIZE;
}
@@ -535,7 +536,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *inet = inet_sk(sk);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
long tmo;
@@ -574,12 +575,12 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
&inet6_rsk(req)->rmt_addr);
}
#endif
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -805,68 +806,43 @@ done:
return skb->len;
}
-static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
- return 0;
+ int hdrlen = sizeof(struct inet_diag_req);
- if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX)
- goto err_inval;
+ if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
+ nlmsg_len(nlh) < hdrlen)
+ return -EINVAL;
if (inet_diag_table[nlh->nlmsg_type] == NULL)
return -ENOENT;
- if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len)
- goto err_inval;
-
- if (nlh->nlmsg_flags&NLM_F_DUMP) {
- if (nlh->nlmsg_len >
- (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) {
- struct rtattr *rta = (void *)(NLMSG_DATA(nlh) +
- sizeof(struct inet_diag_req));
- if (rta->rta_type != INET_DIAG_REQ_BYTECODE ||
- rta->rta_len < 8 ||
- rta->rta_len >
- (nlh->nlmsg_len -
- NLMSG_SPACE(sizeof(struct inet_diag_req))))
- goto err_inval;
- if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
- goto err_inval;
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (nlmsg_attrlen(nlh, hdrlen)) {
+ struct nlattr *attr;
+
+ attr = nlmsg_find_attr(nlh, hdrlen,
+ INET_DIAG_REQ_BYTECODE);
+ if (attr == NULL ||
+ nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+ inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+ return -EINVAL;
}
+
return netlink_dump_start(idiagnl, skb, nlh,
inet_diag_dump, NULL);
- } else
- return inet_diag_get_exact(skb, nlh);
-
-err_inval:
- return -EINVAL;
-}
-
-
-static inline void inet_diag_rcv_skb(struct sk_buff *skb)
-{
- if (skb->len >= NLMSG_SPACE(0)) {
- int err;
- struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
-
- if (nlh->nlmsg_len < sizeof(*nlh) ||
- skb->len < nlh->nlmsg_len)
- return;
- err = inet_diag_rcv_msg(skb, nlh);
- if (err || nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(skb, nlh, err);
}
+
+ return inet_diag_get_exact(skb, nlh);
}
static void inet_diag_rcv(struct sock *sk, int len)
{
- struct sk_buff *skb;
- unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+ unsigned int qlen = 0;
- while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
- inet_diag_rcv_skb(skb);
- kfree_skb(skb);
- }
+ do {
+ netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg);
+ } while (qlen);
}
static DEFINE_SPINLOCK(inet_diag_register_lock);
@@ -917,7 +893,7 @@ static int __init inet_diag_init(void)
goto out;
idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (idiagnl == NULL)
goto out_free_table;
err = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index db3ef96bdfd..2f44e612806 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -87,10 +87,12 @@ static DEFINE_RWLOCK(peer_pool_lock);
static int peer_total;
/* Exported for sysctl_net_ipv4. */
-int inet_peer_threshold = 65536 + 128; /* start to throw entries more
+int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
* aggressively at this stage */
-int inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */
-int inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */
+int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
+int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
+int inet_peer_gc_mintime __read_mostly = 10 * HZ;
+int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
static struct inet_peer *inet_peer_unused_head;
static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
@@ -99,9 +101,6 @@ static DEFINE_SPINLOCK(inet_peer_unused_lock);
static void peer_check_expire(unsigned long dummy);
static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
-/* Exported for sysctl_net_ipv4. */
-int inet_peer_gc_mintime = 10 * HZ,
- inet_peer_gc_maxtime = 120 * HZ;
/* Called from ip_output.c:ip_init */
void __init inet_initpeers(void)
@@ -151,20 +150,27 @@ static void unlink_from_unused(struct inet_peer *p)
spin_unlock_bh(&inet_peer_unused_lock);
}
-/* Called with local BH disabled and the pool lock held. */
-#define lookup(daddr) \
+/*
+ * Called with local BH disabled and the pool lock held.
+ * _stack is known to be NULL or not at compile time,
+ * so compiler will optimize the if (_stack) tests.
+ */
+#define lookup(_daddr,_stack) \
({ \
struct inet_peer *u, **v; \
- stackptr = stack; \
- *stackptr++ = &peer_root; \
+ if (_stack) { \
+ stackptr = _stack; \
+ *stackptr++ = &peer_root; \
+ } \
for (u = peer_root; u != peer_avl_empty; ) { \
- if (daddr == u->v4daddr) \
+ if (_daddr == u->v4daddr) \
break; \
- if ((__force __u32)daddr < (__force __u32)u->v4daddr) \
+ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
v = &u->avl_left; \
else \
v = &u->avl_right; \
- *stackptr++ = v; \
+ if (_stack) \
+ *stackptr++ = v; \
u = *v; \
} \
u; \
@@ -288,7 +294,7 @@ static void unlink_from_pool(struct inet_peer *p)
if (atomic_read(&p->refcnt) == 1) {
struct inet_peer **stack[PEER_MAXDEPTH];
struct inet_peer ***stackptr, ***delp;
- if (lookup(p->v4daddr) != p)
+ if (lookup(p->v4daddr, stack) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
if (p->avl_left == peer_avl_empty) {
@@ -373,7 +379,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
/* Look up for the address quickly. */
read_lock_bh(&peer_pool_lock);
- p = lookup(daddr);
+ p = lookup(daddr, NULL);
if (p != peer_avl_empty)
atomic_inc(&p->refcnt);
read_unlock_bh(&peer_pool_lock);
@@ -400,7 +406,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
write_lock_bh(&peer_pool_lock);
/* Check if an entry has suddenly appeared. */
- p = lookup(daddr);
+ p = lookup(daddr, stack);
if (p != peer_avl_empty)
goto out_free;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 369e721c4ba..9cb04df0054 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,14 +67,14 @@ int ip_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
- skb->ip_summed = CHECKSUM_NONE;
+ skb_forward_csum(skb);
/*
* According to the RFC, we must first decrease the TTL field. If
* that reaches zero, we must reply an ICMP control message telling
* that the packet's lifetime expired.
*/
- if (skb->nh.iph->ttl <= 1)
+ if (ip_hdr(skb)->ttl <= 1)
goto too_many_hops;
if (!xfrm4_route_forward(skb))
@@ -85,10 +85,18 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
goto sr_failed;
+ if (unlikely(skb->len > dst_mtu(&rt->u.dst) &&
+ (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+ IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(dst_mtu(&rt->u.dst)));
+ goto drop;
+ }
+
/* We are about to mangle packet. Copy it! */
if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
goto drop;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
/* Decrease ttl after skb cow done */
ip_decrease_ttl(iph);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6f05538037..0231bdcb2ab 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -92,7 +92,7 @@ struct ipq {
spinlock_t lock;
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
- struct timeval stamp;
+ ktime_t stamp;
int iif;
unsigned int rid;
struct inet_peer *peer;
@@ -184,7 +184,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
{
struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
- if(!qp)
+ if (!qp)
return NULL;
atomic_add(sizeof(struct ipq), &ip_frag_mem);
return qp;
@@ -321,11 +321,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
* promoted read lock to write lock.
*/
hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
- if(qp->id == qp_in->id &&
- qp->saddr == qp_in->saddr &&
- qp->daddr == qp_in->daddr &&
- qp->protocol == qp_in->protocol &&
- qp->user == qp_in->user) {
+ if (qp->id == qp_in->id &&
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+ qp->protocol == qp_in->protocol &&
+ qp->user == qp_in->user) {
atomic_inc(&qp->refcnt);
write_unlock(&ipfrag_lock);
qp_in->last_in |= COMPLETE;
@@ -398,11 +398,11 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
read_lock(&ipfrag_lock);
hash = ipqhashfn(id, saddr, daddr, protocol);
hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
- if(qp->id == id &&
- qp->saddr == saddr &&
- qp->daddr == daddr &&
- qp->protocol == protocol &&
- qp->user == user) {
+ if (qp->id == id &&
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+ qp->protocol == protocol &&
+ qp->user == user) {
atomic_inc(&qp->refcnt);
read_unlock(&ipfrag_lock);
return qp;
@@ -479,11 +479,11 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
goto err;
}
- offset = ntohs(skb->nh.iph->frag_off);
+ offset = ntohs(ip_hdr(skb)->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
offset <<= 3; /* offset is in 8-byte chunks */
- ihl = skb->nh.iph->ihl * 4;
+ ihl = ip_hdrlen(skb);
/* Determine the position of this fragment. */
end = offset + skb->len - ihl;
@@ -524,7 +524,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = NULL;
- for(next = qp->fragments; next != NULL; next = next->next) {
+ for (next = qp->fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (skb->dev)
qp->iif = skb->dev->ifindex;
skb->dev = NULL;
- skb_get_timestamp(skb, &qp->stamp);
+ qp->stamp = skb->tstamp;
qp->meat += skb->len;
atomic_add(skb->truesize, &ip_frag_mem);
if (offset == 0)
@@ -624,10 +624,10 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
BUG_TRAP(FRAG_CB(head)->offset == 0);
/* Allocate a new buffer for the datagram. */
- ihlen = head->nh.iph->ihl*4;
+ ihlen = ip_hdrlen(head);
len = ihlen + qp->len;
- if(len > 65535)
+ if (len > 65535)
goto out_oversize;
/* Head of list must not be cloned. */
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
}
skb_shinfo(head)->frag_list = head->next;
- skb_push(head, head->data - head->nh.raw);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &ip_frag_mem);
for (fp=head->next; fp; fp = fp->next) {
@@ -674,9 +674,9 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &qp->stamp);
+ head->tstamp = qp->stamp;
- iph = head->nh.iph;
+ iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
@@ -700,7 +700,6 @@ out_fail:
/* Process an incoming IP datagram fragment. */
struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
{
- struct iphdr *iph = skb->nh.iph;
struct ipq *qp;
struct net_device *dev;
@@ -713,7 +712,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
dev = skb->dev;
/* Lookup (or create) queue header */
- if ((qp = ip_find(iph, user)) != NULL) {
+ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
struct sk_buff *ret = NULL;
spin_lock(&qp->lock);
@@ -734,7 +733,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
return NULL;
}
-void ipfrag_init(void)
+void __init ipfrag_init(void)
{
ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9151da64231..63282934725 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -191,11 +191,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
return NULL;
}
-static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
{
- __be32 remote = t->parms.iph.daddr;
- __be32 local = t->parms.iph.saddr;
- __be32 key = t->parms.i_key;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
+ __be32 key = parms->i_key;
unsigned h = HASH(key);
int prio = 0;
@@ -209,6 +209,11 @@ static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
return &tunnels[prio][h];
}
+static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+{
+ return __ipgre_bucket(&t->parms);
+}
+
static void ipgre_tunnel_link(struct ip_tunnel *t)
{
struct ip_tunnel **tp = ipgre_bucket(t);
@@ -240,17 +245,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
__be32 key = parms->i_key;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
- unsigned h = HASH(key);
- int prio = 0;
char name[IFNAMSIZ];
- if (local)
- prio |= 1;
- if (remote && !MULTICAST(remote)) {
- prio |= 2;
- h ^= HASH(remote);
- }
- for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
if (key == t->parms.i_key)
return t;
@@ -320,8 +317,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
struct iphdr *iph = (struct iphdr*)skb->data;
__be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
int grehlen = (iph->ihl<<2) + 4;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
__be16 flags;
@@ -388,8 +385,8 @@ out:
struct iphdr *iph = (struct iphdr*)dp;
struct iphdr *eiph;
__be16 *p = (__be16*)(dp+(iph->ihl<<2));
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int rel_type = 0;
int rel_code = 0;
__be32 rel_info = 0;
@@ -422,7 +419,7 @@ out:
default:
return;
case ICMP_PARAMETERPROB:
- n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
if (n < (iph->ihl<<2))
return;
@@ -442,7 +439,7 @@ out:
return;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- n = ntohs(skb->h.icmph->un.frag.mtu);
+ n = ntohs(icmp_hdr(skb)->un.frag.mtu);
if (n < grehlen+68)
return;
n -= grehlen;
@@ -474,7 +471,7 @@ out:
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, skb->data - (u8*)eiph);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
memset(&fl, 0, sizeof(fl));
@@ -533,9 +530,9 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos)) {
if (skb->protocol == htons(ETH_P_IP)) {
- IP_ECN_set_ce(skb->nh.iph);
+ IP_ECN_set_ce(ip_hdr(skb));
} else if (skb->protocol == htons(ETH_P_IPV6)) {
- IP6_ECN_set_ce(skb->nh.ipv6h);
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
}
}
@@ -565,7 +562,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, 16))
goto drop_nolock;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
h = skb->data;
flags = *(__be16*)h;
@@ -616,9 +613,10 @@ static int ipgre_rcv(struct sk_buff *skb)
offset += 4;
}
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb->h.raw, offset);
+ skb_reset_mac_header(skb);
+ __pskb_pull(skb, offset);
+ skb_reset_network_header(skb);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (MULTICAST(iph->daddr)) {
@@ -669,7 +667,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
u8 tos;
__be16 df;
@@ -720,7 +718,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &skb->nh.ipv6h->daddr;
+ addr6 = &ipv6_hdr(skb)->daddr;
addr_type = ipv6_addr_type(addr6);
}
@@ -824,11 +822,12 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = skb->nh.raw;
- skb->nh.raw = skb_push(skb, gre_hlen);
+ skb->transport_header = skb->network_header;
+ skb_push(skb, gre_hlen);
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
@@ -839,7 +838,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->frag_off = df;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f38e97647ac..97069399d86 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -158,7 +158,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
int ip_call_ra_chain(struct sk_buff *skb)
{
struct ip_ra_chain *ra;
- u8 protocol = skb->nh.iph->protocol;
+ u8 protocol = ip_hdr(skb)->protocol;
struct sock *last = NULL;
read_lock(&ip_ra_lock);
@@ -171,7 +171,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
if (sk && inet_sk(sk)->num == protocol &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
- if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
if (skb == NULL) {
read_unlock(&ip_ra_lock);
@@ -198,17 +198,15 @@ int ip_call_ra_chain(struct sk_buff *skb)
static inline int ip_local_deliver_finish(struct sk_buff *skb)
{
- int ihl = skb->nh.iph->ihl*4;
-
- __skb_pull(skb, ihl);
+ __skb_pull(skb, ip_hdrlen(skb));
/* Point into the IP datagram, just past the header. */
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
rcu_read_lock();
{
/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
- int protocol = skb->nh.iph->protocol;
+ int protocol = ip_hdr(skb)->protocol;
int hash;
struct sock *raw_sk;
struct net_protocol *ipprot;
@@ -220,7 +218,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
/* If there maybe a raw socket we must check - if not we
* don't care less
*/
- if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash))
+ if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
raw_sk = NULL;
if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
@@ -266,7 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
* Reassemble IP fragments.
*/
- if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
if (!skb)
return 0;
@@ -294,7 +292,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
goto drop;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (ip_options_compile(NULL, skb)) {
IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
@@ -330,7 +328,8 @@ drop:
static inline int ip_rcv_finish(struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt;
/*
* Initialise the virtual path cache for the packet. It describes
@@ -342,6 +341,8 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
if (unlikely(err)) {
if (err == -EHOSTUNREACH)
IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ else if (err == -ENETUNREACH)
+ IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
goto drop;
}
}
@@ -360,6 +361,12 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
if (iph->ihl > 5 && ip_rcv_options(skb))
goto drop;
+ rt = (struct rtable*)skb->dst;
+ if (rt->rt_type == RTN_MULTICAST)
+ IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+ else if (rt->rt_type == RTN_BROADCAST)
+ IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+
return dst_input(skb);
drop:
@@ -391,7 +398,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
/*
* RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
@@ -410,13 +417,16 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto inhdr_error;
len = ntohs(iph->tot_len);
- if (skb->len < len || len < (iph->ihl*4))
+ if (skb->len < len) {
+ IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ } else if (len < (iph->ihl*4))
goto inhdr_error;
/* Our transport medium may have padded the buffer out. Now we know it
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f906a80d5a8..251346828cb 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -40,7 +40,7 @@
void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
__be32 daddr, struct rtable *rt, int is_frag)
{
- unsigned char * iph = skb->nh.raw;
+ unsigned char *iph = skb_network_header(skb);
memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
@@ -104,13 +104,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
return 0;
}
- sptr = skb->nh.raw;
+ sptr = skb_network_header(skb);
dptr = dopt->__data;
if (skb->dst)
daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
else
- daddr = skb->nh.iph->daddr;
+ daddr = ip_hdr(skb)->daddr;
if (sopt->rr) {
optlen = sptr[sopt->rr+1];
@@ -180,7 +180,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
/*
* RFC1812 requires to fix illegal source routes.
*/
- if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
+ if (memcmp(&ip_hdr(skb)->saddr,
+ &start[soffset + 3], 4) == 0)
doffset -= 4;
}
if (doffset > 3) {
@@ -217,7 +218,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
void ip_options_fragment(struct sk_buff * skb)
{
- unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
+ unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
struct ip_options * opt = &(IPCB(skb)->opt);
int l = opt->optlen;
int optlen;
@@ -264,12 +265,13 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
if (!opt) {
opt = &(IPCB(skb)->opt);
- iph = skb->nh.raw;
+ iph = skb_network_header(skb);
opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
optptr = iph + sizeof(struct iphdr);
opt->is_data = 0;
} else {
- optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
+ optptr = opt->is_data ? opt->__data :
+ (unsigned char *)&(ip_hdr(skb)[1]);
iph = optptr - sizeof(struct iphdr);
}
@@ -563,7 +565,7 @@ void ip_forward_options(struct sk_buff *skb)
struct ip_options * opt = &(IPCB(skb)->opt);
unsigned char * optptr;
struct rtable *rt = (struct rtable*)skb->dst;
- unsigned char *raw = skb->nh.raw;
+ unsigned char *raw = skb_network_header(skb);
if (opt->rr_needaddr) {
optptr = (unsigned char *)raw + opt->rr;
@@ -587,7 +589,7 @@ void ip_forward_options(struct sk_buff *skb)
if (srrptr + 3 <= srrspace) {
opt->is_changed = 1;
ip_rt_get_source(&optptr[srrptr-1], rt);
- skb->nh.iph->daddr = rt->rt_dst;
+ ip_hdr(skb)->daddr = rt->rt_dst;
optptr[2] = srrptr+4;
} else if (net_ratelimit())
printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -599,7 +601,7 @@ void ip_forward_options(struct sk_buff *skb)
}
if (opt->is_changed) {
opt->is_changed = 0;
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
}
}
@@ -608,8 +610,8 @@ int ip_options_rcv_srr(struct sk_buff *skb)
struct ip_options *opt = &(IPCB(skb)->opt);
int srrspace, srrptr;
__be32 nexthop;
- struct iphdr *iph = skb->nh.iph;
- unsigned char * optptr = skb->nh.raw + opt->srr;
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned char *optptr = skb_network_header(skb) + opt->srr;
struct rtable *rt = (struct rtable*)skb->dst;
struct rtable *rt2;
int err;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d096332f6c6..d6427d91851 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph)
/* dev_loopback_xmit for use with netfilter. */
static int ip_dev_loopback_xmit(struct sk_buff *newskb)
{
- newskb->mac.raw = newskb->data;
- __skb_pull(newskb, newskb->nh.raw - newskb->data);
+ skb_reset_mac_header(newskb);
+ __skb_pull(newskb, skb_network_offset(newskb));
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
BUG_TRAP(newskb->dst);
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
struct iphdr *iph;
/* Build the IP header. */
- if (opt)
- iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
- else
- iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
-
+ skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->protocol = sk->sk_protocol;
iph->tot_len = htons(skb->len);
ip_select_ident(iph, &rt->u.dst, sk);
- skb->nh.iph = iph;
if (opt && opt->optlen) {
iph->ihl += opt->optlen>>2;
@@ -163,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static inline int ip_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
+ struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
int hh_len = LL_RESERVED_SPACE(dev);
+ if (rt->rt_type == RTN_MULTICAST)
+ IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ else if (rt->rt_type == RTN_BROADCAST)
+ IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
struct sk_buff *skb2;
@@ -192,6 +195,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+ struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
+
+ return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
+ skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
static inline int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -201,7 +212,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
- if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+ if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -248,7 +259,7 @@ int ip_mc_output(struct sk_buff *skb)
/* Multicasts with ttl 0 must not go beyond the host */
- if (skb->nh.iph->ttl == 0) {
+ if (ip_hdr(skb)->ttl == 0) {
kfree_skb(skb);
return 0;
}
@@ -333,7 +344,9 @@ packet_routed:
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
- iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
iph->tot_len = htons(skb->len);
if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -344,7 +357,6 @@ packet_routed:
iph->protocol = sk->sk_protocol;
iph->saddr = rt->rt_src;
iph->daddr = rt->rt_dst;
- skb->nh.iph = iph;
/* Transport layer set skb->h.foo itself. */
if (opt && opt->optlen) {
@@ -386,21 +398,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
-#ifdef CONFIG_NETFILTER
- /* Connection association is same as pre-frag packet */
- nf_conntrack_put(to->nfct);
- to->nfct = from->nfct;
- nf_conntrack_get(to->nfct);
- to->nfctinfo = from->nfctinfo;
+ nf_copy(to, from);
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
to->ipvs_property = from->ipvs_property;
#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- nf_bridge_put(to->nf_bridge);
- to->nf_bridge = from->nf_bridge;
- nf_bridge_get(to->nf_bridge);
-#endif
-#endif
skb_copy_secmark(to, from);
}
@@ -430,12 +431,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
* Point into the IP datagram header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dst_mtu(&rt->u.dst)));
+ htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -502,10 +503,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
* before previous one went down. */
if (frag) {
frag->ip_summed = CHECKSUM_NONE;
- frag->h.raw = frag->data;
- frag->nh.raw = __skb_push(frag, hlen);
- memcpy(frag->nh.raw, iph, hlen);
- iph = frag->nh.iph;
+ skb_reset_transport_header(frag);
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iph, hlen);
+ iph = ip_hdr(frag);
iph->tot_len = htons(frag->len);
ip_copy_metadata(frag, skb);
if (offset == 0)
@@ -566,7 +568,7 @@ slow_path:
* Keep copying data until we run out.
*/
- while(left > 0) {
+ while (left > 0) {
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
@@ -593,8 +595,8 @@ slow_path:
ip_copy_metadata(skb2, skb);
skb_reserve(skb2, ll_rs);
skb_put(skb2, len + hlen);
- skb2->nh.raw = skb2->data;
- skb2->h.raw = skb2->data + hlen;
+ skb_reset_network_header(skb2);
+ skb2->transport_header = skb2->network_header + hlen;
/*
* Charge the memory for the fragment to any owner
@@ -608,19 +610,19 @@ slow_path:
* Copy the packet header into the new buffer.
*/
- memcpy(skb2->nh.raw, skb->data, hlen);
+ skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+ if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
BUG();
left -= len;
/*
* Fill in the new header fields.
*/
- iph = skb2->nh.iph;
+ iph = ip_hdr(skb2);
iph->frag_off = htons((offset >> 3));
/* ANK: dirty, but effective trick. Upgrade options only if
@@ -722,10 +724,10 @@ static inline int ip_ufo_append_data(struct sock *sk,
skb_put(skb,fragheaderlen + transhdrlen);
/* initialize network header pointer */
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* initialize protocol header pointer */
- skb->h.raw = skb->data + fragheaderlen;
+ skb->transport_header = skb->network_header + fragheaderlen;
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
@@ -799,7 +801,9 @@ int ip_append_data(struct sock *sk,
inet->cork.addr = ipc->addr;
}
dst_hold(&rt->u.dst);
- inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+ inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
+ rt->u.dst.dev->mtu :
+ dst_mtu(rt->u.dst.path);
inet->cork.rt = rt;
inet->cork.length = 0;
sk->sk_sndmsg_page = NULL;
@@ -929,9 +933,10 @@ alloc_new_skb:
* Find where to start putting bytes.
*/
data = skb_put(skb, fraglen);
- skb->nh.raw = data + exthdrlen;
+ skb_set_network_header(skb, exthdrlen);
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
data += fragheaderlen;
- skb->h.raw = data + exthdrlen;
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
@@ -1100,8 +1105,6 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
}
if (len <= 0) {
struct sk_buff *skb_prev;
- char *data;
- struct iphdr *iph;
int alloclen;
skb_prev = skb;
@@ -1124,15 +1127,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fragheaderlen + fraggap);
- skb->nh.iph = iph = (struct iphdr *)data;
- data += fragheaderlen;
- skb->h.raw = data;
-
+ skb_put(skb, fragheaderlen + fraggap);
+ skb_reset_network_header(skb);
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
if (fraggap) {
- skb->csum = skb_copy_and_csum_bits(
- skb_prev, maxfraglen,
- data, fraggap, 0);
+ skb->csum = skb_copy_and_csum_bits(skb_prev,
+ maxfraglen,
+ skb_transport_header(skb),
+ fraggap, 0);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
pskb_trim_unique(skb_prev, maxfraglen);
@@ -1198,10 +1201,10 @@ int ip_push_pending_frames(struct sock *sk)
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
- if (skb->data < skb->nh.raw)
- __skb_pull(skb, skb->nh.raw - skb->data);
+ if (skb->data < skb_network_header(skb))
+ __skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
- __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ __skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
@@ -1216,13 +1219,13 @@ int ip_push_pending_frames(struct sock *sk)
* to fragment the frame generated here. No matter, what transforms
* how transforms change size of the packet, it will come out.
*/
- if (inet->pmtudisc != IP_PMTUDISC_DO)
+ if (inet->pmtudisc < IP_PMTUDISC_DO)
skb->local_df = 1;
/* DF bit is set when we want to see DF on outgoing frames.
* If local_df is set too, we still allow to fragment this frame
* locally. */
- if (inet->pmtudisc == IP_PMTUDISC_DO ||
+ if (inet->pmtudisc >= IP_PMTUDISC_DO ||
(skb->len <= dst_mtu(&rt->u.dst) &&
ip_dont_fragment(sk, &rt->u.dst)))
df = htons(IP_DF);
@@ -1352,11 +1355,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
- .tos = RT_TOS(skb->nh.iph->tos) } },
+ .tos = RT_TOS(ip_hdr(skb)->tos) } },
/* Not quite clean, but right. */
.uli_u = { .ports =
- { .sport = skb->h.th->dest,
- .dport = skb->h.th->source } },
+ { .sport = tcp_hdr(skb)->dest,
+ .dport = tcp_hdr(skb)->source } },
.proto = sk->sk_protocol };
security_skb_classify_flow(skb, &fl);
if (ip_route_output_key(&rt, &fl))
@@ -1370,14 +1373,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
with locally disabled BH and that sk cannot be already spinlocked.
*/
bh_lock_sock(sk);
- inet->tos = skb->nh.iph->tos;
+ inet->tos = ip_hdr(skb)->tos;
sk->sk_priority = skb->priority;
- sk->sk_protocol = skb->nh.iph->protocol;
+ sk->sk_protocol = ip_hdr(skb)->protocol;
ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
&ipc, rt, MSG_DONTWAIT);
if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
if (arg->csumoffset >= 0)
- *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+ *((__sum16 *)skb_transport_header(skb) +
+ arg->csumoffset) = csum_fold(csum_add(skb->csum,
+ arg->csum));
skb->ip_summed = CHECKSUM_NONE;
ip_push_pending_frames(sk);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 23048d9f358..4d544573f48 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -59,7 +59,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
struct in_pktinfo info;
struct rtable *rt = (struct rtable *)skb->dst;
- info.ipi_addr.s_addr = skb->nh.iph->daddr;
+ info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
if (rt) {
info.ipi_ifindex = rt->rt_iif;
info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
@@ -73,13 +73,13 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
{
- int ttl = skb->nh.iph->ttl;
+ int ttl = ip_hdr(skb)->ttl;
put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
}
static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
{
- put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
+ put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
}
static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
@@ -87,7 +87,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
if (IPCB(skb)->opt.optlen == 0)
return;
- put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
+ put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
+ ip_hdr(skb) + 1);
}
@@ -268,18 +269,21 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr = SKB_EXT_ERR(skb);
serr->ee.ee_errno = err;
serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
- serr->ee.ee_type = skb->h.icmph->type;
- serr->ee.ee_code = skb->h.icmph->code;
+ serr->ee.ee_type = icmp_hdr(skb)->type;
+ serr->ee.ee_code = icmp_hdr(skb)->code;
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+ serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
+ skb_network_header(skb);
serr->port = port;
- skb->h.raw = payload;
- if (!skb_pull(skb, payload - skb->data) ||
- sock_queue_err_skb(sk, skb))
- kfree_skb(skb);
+ if (skb_pull(skb, payload - skb->data) != NULL) {
+ skb_reset_transport_header(skb);
+ if (sock_queue_err_skb(sk, skb) == 0)
+ return;
+ }
+ kfree_skb(skb);
}
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
@@ -296,8 +300,9 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
if (!skb)
return;
- iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
- skb->nh.iph = iph;
+ skb_put(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
iph->daddr = daddr;
serr = SKB_EXT_ERR(skb);
@@ -308,11 +313,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+ serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
serr->port = port;
- skb->h.raw = skb->tail;
- __skb_pull(skb, skb->tail - skb->data);
+ __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+ skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
@@ -354,7 +359,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin = (struct sockaddr_in *)msg->msg_name;
if (sin) {
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset);
+ sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
+ serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
@@ -366,7 +372,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
struct inet_sock *inet = inet_sk(sk);
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
if (inet->cmsg_flags)
@@ -403,20 +409,20 @@ out:
*/
static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, int optlen)
{
struct inet_sock *inet = inet_sk(sk);
int val=0,err;
if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
- (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
- (1<<IP_RETOPTS) | (1<<IP_TOS) |
- (1<<IP_TTL) | (1<<IP_HDRINCL) |
- (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
- (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
- (1<<IP_PASSSEC))) ||
- optname == IP_MULTICAST_TTL ||
- optname == IP_MULTICAST_LOOP) {
+ (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
+ (1<<IP_RETOPTS) | (1<<IP_TOS) |
+ (1<<IP_TTL) | (1<<IP_HDRINCL) |
+ (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
+ (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
+ (1<<IP_PASSSEC))) ||
+ optname == IP_MULTICAST_TTL ||
+ optname == IP_MULTICAST_LOOP) {
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -440,444 +446,444 @@ static int do_ip_setsockopt(struct sock *sk, int level,
lock_sock(sk);
switch (optname) {
- case IP_OPTIONS:
- {
- struct ip_options * opt = NULL;
- if (optlen > 40 || optlen < 0)
- goto e_inval;
- err = ip_options_get_from_user(&opt, optval, optlen);
- if (err)
- break;
- if (inet->is_icsk) {
- struct inet_connection_sock *icsk = inet_csk(sk);
+ case IP_OPTIONS:
+ {
+ struct ip_options * opt = NULL;
+ if (optlen > 40 || optlen < 0)
+ goto e_inval;
+ err = ip_options_get_from_user(&opt, optval, optlen);
+ if (err)
+ break;
+ if (inet->is_icsk) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- if (sk->sk_family == PF_INET ||
- (!((1 << sk->sk_state) &
- (TCPF_LISTEN | TCPF_CLOSE)) &&
- inet->daddr != LOOPBACK4_IPV6)) {
+ if (sk->sk_family == PF_INET ||
+ (!((1 << sk->sk_state) &
+ (TCPF_LISTEN | TCPF_CLOSE)) &&
+ inet->daddr != LOOPBACK4_IPV6)) {
#endif
- if (inet->opt)
- icsk->icsk_ext_hdr_len -= inet->opt->optlen;
- if (opt)
- icsk->icsk_ext_hdr_len += opt->optlen;
- icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ if (inet->opt)
+ icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+ if (opt)
+ icsk->icsk_ext_hdr_len += opt->optlen;
+ icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- }
-#endif
}
- opt = xchg(&inet->opt, opt);
- kfree(opt);
- break;
+#endif
}
- case IP_PKTINFO:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PKTINFO;
- else
- inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
- break;
- case IP_RECVTTL:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TTL;
- else
- inet->cmsg_flags &= ~IP_CMSG_TTL;
- break;
- case IP_RECVTOS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TOS;
- else
- inet->cmsg_flags &= ~IP_CMSG_TOS;
- break;
- case IP_RECVOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
- break;
- case IP_RETOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RETOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+ opt = xchg(&inet->opt, opt);
+ kfree(opt);
+ break;
+ }
+ case IP_PKTINFO:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_PKTINFO;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
+ break;
+ case IP_RECVTTL:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_TTL;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_TTL;
+ break;
+ case IP_RECVTOS:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_TOS;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_TOS;
+ break;
+ case IP_RECVOPTS:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_RECVOPTS;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
+ break;
+ case IP_RETOPTS:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_RETOPTS;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+ break;
+ case IP_PASSSEC:
+ if (val)
+ inet->cmsg_flags |= IP_CMSG_PASSSEC;
+ else
+ inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+ break;
+ case IP_TOS: /* This sets both TOS and Precedence */
+ if (sk->sk_type == SOCK_STREAM) {
+ val &= ~3;
+ val |= inet->tos & 3;
+ }
+ if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
+ !capable(CAP_NET_ADMIN)) {
+ err = -EPERM;
break;
- case IP_PASSSEC:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PASSSEC;
- else
- inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+ }
+ if (inet->tos != val) {
+ inet->tos = val;
+ sk->sk_priority = rt_tos2priority(val);
+ sk_dst_reset(sk);
+ }
+ break;
+ case IP_TTL:
+ if (optlen<1)
+ goto e_inval;
+ if (val != -1 && (val < 1 || val>255))
+ goto e_inval;
+ inet->uc_ttl = val;
+ break;
+ case IP_HDRINCL:
+ if (sk->sk_type != SOCK_RAW) {
+ err = -ENOPROTOOPT;
break;
- case IP_TOS: /* This sets both TOS and Precedence */
- if (sk->sk_type == SOCK_STREAM) {
- val &= ~3;
- val |= inet->tos & 3;
- }
- if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
- !capable(CAP_NET_ADMIN)) {
- err = -EPERM;
+ }
+ inet->hdrincl = val ? 1 : 0;
+ break;
+ case IP_MTU_DISCOVER:
+ if (val<0 || val>3)
+ goto e_inval;
+ inet->pmtudisc = val;
+ break;
+ case IP_RECVERR:
+ inet->recverr = !!val;
+ if (!val)
+ skb_queue_purge(&sk->sk_error_queue);
+ break;
+ case IP_MULTICAST_TTL:
+ if (sk->sk_type == SOCK_STREAM)
+ goto e_inval;
+ if (optlen<1)
+ goto e_inval;
+ if (val==-1)
+ val = 1;
+ if (val < 0 || val > 255)
+ goto e_inval;
+ inet->mc_ttl = val;
+ break;
+ case IP_MULTICAST_LOOP:
+ if (optlen<1)
+ goto e_inval;
+ inet->mc_loop = !!val;
+ break;
+ case IP_MULTICAST_IF:
+ {
+ struct ip_mreqn mreq;
+ struct net_device *dev = NULL;
+
+ if (sk->sk_type == SOCK_STREAM)
+ goto e_inval;
+ /*
+ * Check the arguments are allowable
+ */
+
+ err = -EFAULT;
+ if (optlen >= sizeof(struct ip_mreqn)) {
+ if (copy_from_user(&mreq,optval,sizeof(mreq)))
break;
- }
- if (inet->tos != val) {
- inet->tos = val;
- sk->sk_priority = rt_tos2priority(val);
- sk_dst_reset(sk);
- }
- break;
- case IP_TTL:
- if (optlen<1)
- goto e_inval;
- if (val != -1 && (val < 1 || val>255))
- goto e_inval;
- inet->uc_ttl = val;
- break;
- case IP_HDRINCL:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
+ } else {
+ memset(&mreq, 0, sizeof(mreq));
+ if (optlen >= sizeof(struct in_addr) &&
+ copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+ break;
+ }
+
+ if (!mreq.imr_ifindex) {
+ if (mreq.imr_address.s_addr == INADDR_ANY) {
+ inet->mc_index = 0;
+ inet->mc_addr = 0;
+ err = 0;
break;
}
- inet->hdrincl = val ? 1 : 0;
- break;
- case IP_MTU_DISCOVER:
- if (val<0 || val>2)
- goto e_inval;
- inet->pmtudisc = val;
- break;
- case IP_RECVERR:
- inet->recverr = !!val;
- if (!val)
- skb_queue_purge(&sk->sk_error_queue);
- break;
- case IP_MULTICAST_TTL:
- if (sk->sk_type == SOCK_STREAM)
- goto e_inval;
- if (optlen<1)
- goto e_inval;
- if (val==-1)
- val = 1;
- if (val < 0 || val > 255)
- goto e_inval;
- inet->mc_ttl = val;
- break;
- case IP_MULTICAST_LOOP:
- if (optlen<1)
- goto e_inval;
- inet->mc_loop = !!val;
- break;
- case IP_MULTICAST_IF:
- {
- struct ip_mreqn mreq;
- struct net_device *dev = NULL;
+ dev = ip_dev_find(mreq.imr_address.s_addr);
+ if (dev) {
+ mreq.imr_ifindex = dev->ifindex;
+ dev_put(dev);
+ }
+ } else
+ dev = __dev_get_by_index(mreq.imr_ifindex);
- if (sk->sk_type == SOCK_STREAM)
- goto e_inval;
- /*
- * Check the arguments are allowable
- */
- err = -EFAULT;
- if (optlen >= sizeof(struct ip_mreqn)) {
- if (copy_from_user(&mreq,optval,sizeof(mreq)))
- break;
- } else {
- memset(&mreq, 0, sizeof(mreq));
- if (optlen >= sizeof(struct in_addr) &&
- copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
- break;
- }
+ err = -EADDRNOTAVAIL;
+ if (!dev)
+ break;
- if (!mreq.imr_ifindex) {
- if (mreq.imr_address.s_addr == INADDR_ANY) {
- inet->mc_index = 0;
- inet->mc_addr = 0;
- err = 0;
- break;
- }
- dev = ip_dev_find(mreq.imr_address.s_addr);
- if (dev) {
- mreq.imr_ifindex = dev->ifindex;
- dev_put(dev);
- }
- } else
- dev = __dev_get_by_index(mreq.imr_ifindex);
+ err = -EINVAL;
+ if (sk->sk_bound_dev_if &&
+ mreq.imr_ifindex != sk->sk_bound_dev_if)
+ break;
+ inet->mc_index = mreq.imr_ifindex;
+ inet->mc_addr = mreq.imr_address.s_addr;
+ err = 0;
+ break;
+ }
- err = -EADDRNOTAVAIL;
- if (!dev)
- break;
+ case IP_ADD_MEMBERSHIP:
+ case IP_DROP_MEMBERSHIP:
+ {
+ struct ip_mreqn mreq;
- err = -EINVAL;
- if (sk->sk_bound_dev_if &&
- mreq.imr_ifindex != sk->sk_bound_dev_if)
+ if (optlen < sizeof(struct ip_mreq))
+ goto e_inval;
+ err = -EFAULT;
+ if (optlen >= sizeof(struct ip_mreqn)) {
+ if (copy_from_user(&mreq,optval,sizeof(mreq)))
break;
+ } else {
+ memset(&mreq, 0, sizeof(mreq));
+ if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+ break;
+ }
- inet->mc_index = mreq.imr_ifindex;
- inet->mc_addr = mreq.imr_address.s_addr;
- err = 0;
+ if (optname == IP_ADD_MEMBERSHIP)
+ err = ip_mc_join_group(sk, &mreq);
+ else
+ err = ip_mc_leave_group(sk, &mreq);
+ break;
+ }
+ case IP_MSFILTER:
+ {
+ extern int sysctl_igmp_max_msf;
+ struct ip_msfilter *msf;
+
+ if (optlen < IP_MSFILTER_SIZE(0))
+ goto e_inval;
+ if (optlen > sysctl_optmem_max) {
+ err = -ENOBUFS;
break;
}
+ msf = kmalloc(optlen, GFP_KERNEL);
+ if (msf == 0) {
+ err = -ENOBUFS;
+ break;
+ }
+ err = -EFAULT;
+ if (copy_from_user(msf, optval, optlen)) {
+ kfree(msf);
+ break;
+ }
+ /* numsrc >= (1G-4) overflow in 32 bits */
+ if (msf->imsf_numsrc >= 0x3ffffffcU ||
+ msf->imsf_numsrc > sysctl_igmp_max_msf) {
+ kfree(msf);
+ err = -ENOBUFS;
+ break;
+ }
+ if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
+ kfree(msf);
+ err = -EINVAL;
+ break;
+ }
+ err = ip_mc_msfilter(sk, msf, 0);
+ kfree(msf);
+ break;
+ }
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE:
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ {
+ struct ip_mreq_source mreqs;
+ int omode, add;
- case IP_ADD_MEMBERSHIP:
- case IP_DROP_MEMBERSHIP:
- {
- struct ip_mreqn mreq;
-
- if (optlen < sizeof(struct ip_mreq))
- goto e_inval;
+ if (optlen != sizeof(struct ip_mreq_source))
+ goto e_inval;
+ if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
err = -EFAULT;
- if (optlen >= sizeof(struct ip_mreqn)) {
- if(copy_from_user(&mreq,optval,sizeof(mreq)))
- break;
- } else {
- memset(&mreq, 0, sizeof(mreq));
- if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
- break;
- }
-
- if (optname == IP_ADD_MEMBERSHIP)
- err = ip_mc_join_group(sk, &mreq);
- else
- err = ip_mc_leave_group(sk, &mreq);
break;
}
- case IP_MSFILTER:
- {
- extern int sysctl_igmp_max_msf;
- struct ip_msfilter *msf;
+ if (optname == IP_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == IP_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
+ struct ip_mreqn mreq;
- if (optlen < IP_MSFILTER_SIZE(0))
- goto e_inval;
- if (optlen > sysctl_optmem_max) {
- err = -ENOBUFS;
- break;
- }
- msf = kmalloc(optlen, GFP_KERNEL);
- if (msf == 0) {
- err = -ENOBUFS;
+ mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
+ mreq.imr_address.s_addr = mreqs.imr_interface;
+ mreq.imr_ifindex = 0;
+ err = ip_mc_join_group(sk, &mreq);
+ if (err && err != -EADDRINUSE)
break;
- }
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ err = ip_mc_source(add, omode, sk, &mreqs, 0);
+ break;
+ }
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ {
+ struct group_req greq;
+ struct sockaddr_in *psin;
+ struct ip_mreqn mreq;
+
+ if (optlen < sizeof(struct group_req))
+ goto e_inval;
+ err = -EFAULT;
+ if (copy_from_user(&greq, optval, sizeof(greq)))
+ break;
+ psin = (struct sockaddr_in *)&greq.gr_group;
+ if (psin->sin_family != AF_INET)
+ goto e_inval;
+ memset(&mreq, 0, sizeof(mreq));
+ mreq.imr_multiaddr = psin->sin_addr;
+ mreq.imr_ifindex = greq.gr_interface;
+
+ if (optname == MCAST_JOIN_GROUP)
+ err = ip_mc_join_group(sk, &mreq);
+ else
+ err = ip_mc_leave_group(sk, &mreq);
+ break;
+ }
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ {
+ struct group_source_req greqs;
+ struct ip_mreq_source mreqs;
+ struct sockaddr_in *psin;
+ int omode, add;
+
+ if (optlen != sizeof(struct group_source_req))
+ goto e_inval;
+ if (copy_from_user(&greqs, optval, sizeof(greqs))) {
err = -EFAULT;
- if (copy_from_user(msf, optval, optlen)) {
- kfree(msf);
- break;
- }
- /* numsrc >= (1G-4) overflow in 32 bits */
- if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > sysctl_igmp_max_msf) {
- kfree(msf);
- err = -ENOBUFS;
- break;
- }
- if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
- kfree(msf);
- err = -EINVAL;
- break;
- }
- err = ip_mc_msfilter(sk, msf, 0);
- kfree(msf);
break;
}
- case IP_BLOCK_SOURCE:
- case IP_UNBLOCK_SOURCE:
- case IP_ADD_SOURCE_MEMBERSHIP:
- case IP_DROP_SOURCE_MEMBERSHIP:
- {
- struct ip_mreq_source mreqs;
- int omode, add;
-
- if (optlen != sizeof(struct ip_mreq_source))
- goto e_inval;
- if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
- err = -EFAULT;
- break;
- }
- if (optname == IP_BLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 1;
- } else if (optname == IP_UNBLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 0;
- } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
- struct ip_mreqn mreq;
-
- mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
- mreq.imr_address.s_addr = mreqs.imr_interface;
- mreq.imr_ifindex = 0;
- err = ip_mc_join_group(sk, &mreq);
- if (err && err != -EADDRINUSE)
- break;
- omode = MCAST_INCLUDE;
- add = 1;
- } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
- omode = MCAST_INCLUDE;
- add = 0;
- }
- err = ip_mc_source(add, omode, sk, &mreqs, 0);
+ if (greqs.gsr_group.ss_family != AF_INET ||
+ greqs.gsr_source.ss_family != AF_INET) {
+ err = -EADDRNOTAVAIL;
break;
}
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- {
- struct group_req greq;
- struct sockaddr_in *psin;
+ psin = (struct sockaddr_in *)&greqs.gsr_group;
+ mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+ psin = (struct sockaddr_in *)&greqs.gsr_source;
+ mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+ mreqs.imr_interface = 0; /* use index for mc_source */
+
+ if (optname == MCAST_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == MCAST_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
struct ip_mreqn mreq;
- if (optlen < sizeof(struct group_req))
- goto e_inval;
- err = -EFAULT;
- if(copy_from_user(&greq, optval, sizeof(greq)))
- break;
- psin = (struct sockaddr_in *)&greq.gr_group;
- if (psin->sin_family != AF_INET)
- goto e_inval;
- memset(&mreq, 0, sizeof(mreq));
+ psin = (struct sockaddr_in *)&greqs.gsr_group;
mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_ifindex = greq.gr_interface;
-
- if (optname == MCAST_JOIN_GROUP)
- err = ip_mc_join_group(sk, &mreq);
- else
- err = ip_mc_leave_group(sk, &mreq);
+ mreq.imr_address.s_addr = 0;
+ mreq.imr_ifindex = greqs.gsr_interface;
+ err = ip_mc_join_group(sk, &mreq);
+ if (err && err != -EADDRINUSE)
+ break;
+ greqs.gsr_interface = mreq.imr_ifindex;
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ err = ip_mc_source(add, omode, sk, &mreqs,
+ greqs.gsr_interface);
+ break;
+ }
+ case MCAST_MSFILTER:
+ {
+ extern int sysctl_igmp_max_msf;
+ struct sockaddr_in *psin;
+ struct ip_msfilter *msf = NULL;
+ struct group_filter *gsf = NULL;
+ int msize, i, ifindex;
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ goto e_inval;
+ if (optlen > sysctl_optmem_max) {
+ err = -ENOBUFS;
break;
}
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- {
- struct group_source_req greqs;
- struct ip_mreq_source mreqs;
- struct sockaddr_in *psin;
- int omode, add;
-
- if (optlen != sizeof(struct group_source_req))
- goto e_inval;
- if (copy_from_user(&greqs, optval, sizeof(greqs))) {
- err = -EFAULT;
- break;
- }
- if (greqs.gsr_group.ss_family != AF_INET ||
- greqs.gsr_source.ss_family != AF_INET) {
- err = -EADDRNOTAVAIL;
- break;
- }
- psin = (struct sockaddr_in *)&greqs.gsr_group;
- mreqs.imr_multiaddr = psin->sin_addr.s_addr;
- psin = (struct sockaddr_in *)&greqs.gsr_source;
- mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
- mreqs.imr_interface = 0; /* use index for mc_source */
-
- if (optname == MCAST_BLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 1;
- } else if (optname == MCAST_UNBLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 0;
- } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
- struct ip_mreqn mreq;
-
- psin = (struct sockaddr_in *)&greqs.gsr_group;
- mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_address.s_addr = 0;
- mreq.imr_ifindex = greqs.gsr_interface;
- err = ip_mc_join_group(sk, &mreq);
- if (err && err != -EADDRINUSE)
- break;
- greqs.gsr_interface = mreq.imr_ifindex;
- omode = MCAST_INCLUDE;
- add = 1;
- } else /* MCAST_LEAVE_SOURCE_GROUP */ {
- omode = MCAST_INCLUDE;
- add = 0;
- }
- err = ip_mc_source(add, omode, sk, &mreqs,
- greqs.gsr_interface);
+ gsf = kmalloc(optlen,GFP_KERNEL);
+ if (gsf == 0) {
+ err = -ENOBUFS;
break;
}
- case MCAST_MSFILTER:
- {
- extern int sysctl_igmp_max_msf;
- struct sockaddr_in *psin;
- struct ip_msfilter *msf = NULL;
- struct group_filter *gsf = NULL;
- int msize, i, ifindex;
-
- if (optlen < GROUP_FILTER_SIZE(0))
- goto e_inval;
- if (optlen > sysctl_optmem_max) {
- err = -ENOBUFS;
- break;
- }
- gsf = kmalloc(optlen,GFP_KERNEL);
- if (gsf == 0) {
- err = -ENOBUFS;
- break;
- }
- err = -EFAULT;
- if (copy_from_user(gsf, optval, optlen)) {
- goto mc_msf_out;
- }
- /* numsrc >= (4G-140)/128 overflow in 32 bits */
- if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sysctl_igmp_max_msf) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
- err = -EINVAL;
- goto mc_msf_out;
- }
- msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
- msf = kmalloc(msize,GFP_KERNEL);
- if (msf == 0) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- ifindex = gsf->gf_interface;
- psin = (struct sockaddr_in *)&gsf->gf_group;
- if (psin->sin_family != AF_INET) {
- err = -EADDRNOTAVAIL;
- goto mc_msf_out;
- }
- msf->imsf_multiaddr = psin->sin_addr.s_addr;
- msf->imsf_interface = 0;
- msf->imsf_fmode = gsf->gf_fmode;
- msf->imsf_numsrc = gsf->gf_numsrc;
+ err = -EFAULT;
+ if (copy_from_user(gsf, optval, optlen)) {
+ goto mc_msf_out;
+ }
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (gsf->gf_numsrc >= 0x1ffffff ||
+ gsf->gf_numsrc > sysctl_igmp_max_msf) {
+ err = -ENOBUFS;
+ goto mc_msf_out;
+ }
+ if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+ err = -EINVAL;
+ goto mc_msf_out;
+ }
+ msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
+ msf = kmalloc(msize,GFP_KERNEL);
+ if (msf == 0) {
+ err = -ENOBUFS;
+ goto mc_msf_out;
+ }
+ ifindex = gsf->gf_interface;
+ psin = (struct sockaddr_in *)&gsf->gf_group;
+ if (psin->sin_family != AF_INET) {
err = -EADDRNOTAVAIL;
- for (i=0; i<gsf->gf_numsrc; ++i) {
- psin = (struct sockaddr_in *)&gsf->gf_slist[i];
-
- if (psin->sin_family != AF_INET)
- goto mc_msf_out;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
- }
- kfree(gsf);
- gsf = NULL;
-
- err = ip_mc_msfilter(sk, msf, ifindex);
-mc_msf_out:
- kfree(msf);
- kfree(gsf);
- break;
+ goto mc_msf_out;
}
- case IP_ROUTER_ALERT:
- err = ip_ra_control(sk, val ? 1 : 0, NULL);
- break;
-
- case IP_FREEBIND:
- if (optlen<1)
- goto e_inval;
- inet->freebind = !!val;
- break;
+ msf->imsf_multiaddr = psin->sin_addr.s_addr;
+ msf->imsf_interface = 0;
+ msf->imsf_fmode = gsf->gf_fmode;
+ msf->imsf_numsrc = gsf->gf_numsrc;
+ err = -EADDRNOTAVAIL;
+ for (i=0; i<gsf->gf_numsrc; ++i) {
+ psin = (struct sockaddr_in *)&gsf->gf_slist[i];
- case IP_IPSEC_POLICY:
- case IP_XFRM_POLICY:
- err = -EPERM;
- if (!capable(CAP_NET_ADMIN))
- break;
- err = xfrm_user_policy(sk, optname, optval, optlen);
+ if (psin->sin_family != AF_INET)
+ goto mc_msf_out;
+ msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ }
+ kfree(gsf);
+ gsf = NULL;
+
+ err = ip_mc_msfilter(sk, msf, ifindex);
+ mc_msf_out:
+ kfree(msf);
+ kfree(gsf);
+ break;
+ }
+ case IP_ROUTER_ALERT:
+ err = ip_ra_control(sk, val ? 1 : 0, NULL);
+ break;
+
+ case IP_FREEBIND:
+ if (optlen<1)
+ goto e_inval;
+ inet->freebind = !!val;
+ break;
+
+ case IP_IPSEC_POLICY:
+ case IP_XFRM_POLICY:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
break;
+ err = xfrm_user_policy(sk, optname, optval, optlen);
+ break;
- default:
- err = -ENOPROTOOPT;
- break;
+ default:
+ err = -ENOPROTOOPT;
+ break;
}
release_sock(sk);
return err;
@@ -948,214 +954,213 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen)
{
struct inet_sock *inet = inet_sk(sk);
int val;
int len;
- if(level!=SOL_IP)
+ if (level != SOL_IP)
return -EOPNOTSUPP;
#ifdef CONFIG_IP_MROUTE
- if(optname>=MRT_BASE && optname <=MRT_BASE+10)
- {
+ if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
return ip_mroute_getsockopt(sk,optname,optval,optlen);
}
#endif
- if(get_user(len,optlen))
+ if (get_user(len,optlen))
return -EFAULT;
- if(len < 0)
+ if (len < 0)
return -EINVAL;
lock_sock(sk);
- switch(optname) {
- case IP_OPTIONS:
- {
- unsigned char optbuf[sizeof(struct ip_options)+40];
- struct ip_options * opt = (struct ip_options*)optbuf;
- opt->optlen = 0;
- if (inet->opt)
- memcpy(optbuf, inet->opt,
- sizeof(struct ip_options)+
- inet->opt->optlen);
- release_sock(sk);
-
- if (opt->optlen == 0)
- return put_user(0, optlen);
-
- ip_options_undo(opt);
-
- len = min_t(unsigned int, len, opt->optlen);
- if(put_user(len, optlen))
- return -EFAULT;
- if(copy_to_user(optval, opt->__data, len))
- return -EFAULT;
- return 0;
- }
- case IP_PKTINFO:
- val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
- break;
- case IP_RECVTTL:
- val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
- break;
- case IP_RECVTOS:
- val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
- break;
- case IP_RECVOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
- break;
- case IP_RETOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
- break;
- case IP_PASSSEC:
- val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
- break;
- case IP_TOS:
- val = inet->tos;
- break;
- case IP_TTL:
- val = (inet->uc_ttl == -1 ?
- sysctl_ip_default_ttl :
- inet->uc_ttl);
- break;
- case IP_HDRINCL:
- val = inet->hdrincl;
- break;
- case IP_MTU_DISCOVER:
- val = inet->pmtudisc;
- break;
- case IP_MTU:
- {
- struct dst_entry *dst;
- val = 0;
- dst = sk_dst_get(sk);
- if (dst) {
- val = dst_mtu(dst);
- dst_release(dst);
- }
- if (!val) {
- release_sock(sk);
- return -ENOTCONN;
- }
- break;
+ switch (optname) {
+ case IP_OPTIONS:
+ {
+ unsigned char optbuf[sizeof(struct ip_options)+40];
+ struct ip_options * opt = (struct ip_options*)optbuf;
+ opt->optlen = 0;
+ if (inet->opt)
+ memcpy(optbuf, inet->opt,
+ sizeof(struct ip_options)+
+ inet->opt->optlen);
+ release_sock(sk);
+
+ if (opt->optlen == 0)
+ return put_user(0, optlen);
+
+ ip_options_undo(opt);
+
+ len = min_t(unsigned int, len, opt->optlen);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, opt->__data, len))
+ return -EFAULT;
+ return 0;
+ }
+ case IP_PKTINFO:
+ val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
+ break;
+ case IP_RECVTTL:
+ val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
+ break;
+ case IP_RECVTOS:
+ val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
+ break;
+ case IP_RECVOPTS:
+ val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+ break;
+ case IP_RETOPTS:
+ val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
+ break;
+ case IP_PASSSEC:
+ val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
+ break;
+ case IP_TOS:
+ val = inet->tos;
+ break;
+ case IP_TTL:
+ val = (inet->uc_ttl == -1 ?
+ sysctl_ip_default_ttl :
+ inet->uc_ttl);
+ break;
+ case IP_HDRINCL:
+ val = inet->hdrincl;
+ break;
+ case IP_MTU_DISCOVER:
+ val = inet->pmtudisc;
+ break;
+ case IP_MTU:
+ {
+ struct dst_entry *dst;
+ val = 0;
+ dst = sk_dst_get(sk);
+ if (dst) {
+ val = dst_mtu(dst);
+ dst_release(dst);
}
- case IP_RECVERR:
- val = inet->recverr;
- break;
- case IP_MULTICAST_TTL:
- val = inet->mc_ttl;
- break;
- case IP_MULTICAST_LOOP:
- val = inet->mc_loop;
- break;
- case IP_MULTICAST_IF:
- {
- struct in_addr addr;
- len = min_t(unsigned int, len, sizeof(struct in_addr));
- addr.s_addr = inet->mc_addr;
+ if (!val) {
release_sock(sk);
-
- if(put_user(len, optlen))
- return -EFAULT;
- if(copy_to_user(optval, &addr, len))
- return -EFAULT;
- return 0;
+ return -ENOTCONN;
}
- case IP_MSFILTER:
- {
- struct ip_msfilter msf;
- int err;
+ break;
+ }
+ case IP_RECVERR:
+ val = inet->recverr;
+ break;
+ case IP_MULTICAST_TTL:
+ val = inet->mc_ttl;
+ break;
+ case IP_MULTICAST_LOOP:
+ val = inet->mc_loop;
+ break;
+ case IP_MULTICAST_IF:
+ {
+ struct in_addr addr;
+ len = min_t(unsigned int, len, sizeof(struct in_addr));
+ addr.s_addr = inet->mc_addr;
+ release_sock(sk);
- if (len < IP_MSFILTER_SIZE(0)) {
- release_sock(sk);
- return -EINVAL;
- }
- if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
- release_sock(sk);
- return -EFAULT;
- }
- err = ip_mc_msfget(sk, &msf,
- (struct ip_msfilter __user *)optval, optlen);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &addr, len))
+ return -EFAULT;
+ return 0;
+ }
+ case IP_MSFILTER:
+ {
+ struct ip_msfilter msf;
+ int err;
+
+ if (len < IP_MSFILTER_SIZE(0)) {
release_sock(sk);
- return err;
+ return -EINVAL;
}
- case MCAST_MSFILTER:
- {
- struct group_filter gsf;
- int err;
-
- if (len < GROUP_FILTER_SIZE(0)) {
- release_sock(sk);
- return -EINVAL;
- }
- if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
- release_sock(sk);
- return -EFAULT;
- }
- err = ip_mc_gsfget(sk, &gsf,
- (struct group_filter __user *)optval, optlen);
+ if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
release_sock(sk);
- return err;
+ return -EFAULT;
}
- case IP_PKTOPTIONS:
- {
- struct msghdr msg;
+ err = ip_mc_msfget(sk, &msf,
+ (struct ip_msfilter __user *)optval, optlen);
+ release_sock(sk);
+ return err;
+ }
+ case MCAST_MSFILTER:
+ {
+ struct group_filter gsf;
+ int err;
+ if (len < GROUP_FILTER_SIZE(0)) {
release_sock(sk);
+ return -EINVAL;
+ }
+ if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ err = ip_mc_gsfget(sk, &gsf,
+ (struct group_filter __user *)optval, optlen);
+ release_sock(sk);
+ return err;
+ }
+ case IP_PKTOPTIONS:
+ {
+ struct msghdr msg;
+
+ release_sock(sk);
- if (sk->sk_type != SOCK_STREAM)
- return -ENOPROTOOPT;
+ if (sk->sk_type != SOCK_STREAM)
+ return -ENOPROTOOPT;
- msg.msg_control = optval;
- msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_control = optval;
+ msg.msg_controllen = len;
+ msg.msg_flags = 0;
- if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
- struct in_pktinfo info;
+ if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+ struct in_pktinfo info;
- info.ipi_addr.s_addr = inet->rcv_saddr;
- info.ipi_spec_dst.s_addr = inet->rcv_saddr;
- info.ipi_ifindex = inet->mc_index;
- put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
- }
- if (inet->cmsg_flags & IP_CMSG_TTL) {
- int hlim = inet->mc_ttl;
- put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
- }
- len -= msg.msg_controllen;
- return put_user(len, optlen);
+ info.ipi_addr.s_addr = inet->rcv_saddr;
+ info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+ info.ipi_ifindex = inet->mc_index;
+ put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
- case IP_FREEBIND:
- val = inet->freebind;
- break;
- default:
- release_sock(sk);
- return -ENOPROTOOPT;
+ if (inet->cmsg_flags & IP_CMSG_TTL) {
+ int hlim = inet->mc_ttl;
+ put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
+ }
+ len -= msg.msg_controllen;
+ return put_user(len, optlen);
+ }
+ case IP_FREEBIND:
+ val = inet->freebind;
+ break;
+ default:
+ release_sock(sk);
+ return -ENOPROTOOPT;
}
release_sock(sk);
if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
unsigned char ucval = (unsigned char)val;
len = 1;
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval,&ucval,1))
+ if (copy_to_user(optval,&ucval,1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval,&val,len))
+ if (copy_to_user(optval,&val,len))
return -EFAULT;
}
return 0;
}
int ip_getsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int __user *optlen)
+ int optname, char __user *optval, int __user *optlen)
{
int err;
@@ -1169,7 +1174,7 @@ int ip_getsockopt(struct sock *sk, int level,
) {
int len;
- if(get_user(len,optlen))
+ if (get_user(len,optlen))
return -EFAULT;
lock_sock(sk);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index aa704b88f01..ab86137c71d 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -43,21 +43,15 @@ static LIST_HEAD(ipcomp_tfms_list);
static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
{
- int err, plen, dlen;
struct ipcomp_data *ipcd = x->data;
- u8 *start, *scratch;
- struct crypto_comp *tfm;
- int cpu;
-
- plen = skb->len;
- dlen = IPCOMP_SCRATCH_SIZE;
- start = skb->data;
+ const int plen = skb->len;
+ int dlen = IPCOMP_SCRATCH_SIZE;
+ const u8 *start = skb->data;
+ const int cpu = get_cpu();
+ u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+ struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+ int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
- cpu = get_cpu();
- scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
- tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
- err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
if (err)
goto out;
@@ -72,7 +66,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
skb->truesize += dlen - plen;
__skb_put(skb, dlen - plen);
- memcpy(skb->data, scratch, dlen);
+ skb_copy_to_linear_data(skb, scratch, dlen);
out:
put_cpu();
return err;
@@ -90,10 +84,10 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
/* Remove ipcomp header and decompress original payload */
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
ipch = (void *)skb->data;
iph->protocol = ipch->nexthdr;
- skb->h.raw = skb->nh.raw + sizeof(*ipch);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
__skb_pull(skb, sizeof(*ipch));
err = ipcomp_decompress(x, skb);
@@ -103,23 +97,16 @@ out:
static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
{
- int err, plen, dlen, ihlen;
- struct iphdr *iph = skb->nh.iph;
struct ipcomp_data *ipcd = x->data;
- u8 *start, *scratch;
- struct crypto_comp *tfm;
- int cpu;
+ const int ihlen = ip_hdrlen(skb);
+ const int plen = skb->len - ihlen;
+ int dlen = IPCOMP_SCRATCH_SIZE;
+ u8 *start = skb->data + ihlen;
+ const int cpu = get_cpu();
+ u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+ struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+ int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
- ihlen = iph->ihl * 4;
- plen = skb->len - ihlen;
- dlen = IPCOMP_SCRATCH_SIZE;
- start = skb->data + ihlen;
-
- cpu = get_cpu();
- scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
- tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
- err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
if (err)
goto out;
@@ -142,12 +129,11 @@ out:
static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
- struct iphdr *iph;
struct ip_comp_hdr *ipch;
struct ipcomp_data *ipcd = x->data;
int hdr_len = 0;
+ struct iphdr *iph = ip_hdr(skb);
- iph = skb->nh.iph;
iph->tot_len = htons(skb->len);
hdr_len = iph->ihl * 4;
if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -159,7 +145,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
goto out_ok;
err = ipcomp_compress(x, skb);
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (err) {
goto out_ok;
@@ -188,8 +174,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
- if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
- skb->h.icmph->code != ICMP_FRAG_NEEDED)
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
spi = htonl(ntohs(ipch->cpi));
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cf49de1a498..342ca8d8945 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -192,7 +192,7 @@ static int __init ic_open_devs(void)
if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
if (dev == &loopback_dev)
continue;
if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop;
/* Basic sanity checks can be done without the lock. */
- rarp = (struct arphdr *)skb->h.raw;
+ rarp = (struct arphdr *)skb_transport_header(skb);
/* If this test doesn't pass, it's not IP, or we should
* ignore it anyway.
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop;
/* OK, it is all there and looks valid, process... */
- rarp = (struct arphdr *)skb->h.raw;
+ rarp = (struct arphdr *)skb_transport_header(skb);
rarp_ptr = (unsigned char *) (rarp + 1);
/* One reply at a time, please. */
@@ -702,7 +702,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
memset(b, 0, sizeof(struct bootp_pkt));
/* Construct IP header */
- skb->nh.iph = h = &b->iph;
+ skb_reset_network_header(skb);
+ h = ip_hdr(skb);
h->version = 4;
h->ihl = 5;
h->tot_len = htons(sizeof(struct bootp_pkt));
@@ -782,7 +783,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
u8 *c;
printk("DHCP/BOOTP: Got extension %d:",*ext);
- for(c=ext+2; c<ext+2+ext[1]; c++)
+ for (c=ext+2; c<ext+2+ext[1]; c++)
printk(" %02x", *c);
printk("\n");
#endif
@@ -845,7 +846,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
sizeof(struct udphdr)))
goto drop;
- b = (struct bootp_pkt *) skb->nh.iph;
+ b = (struct bootp_pkt *)skb_network_header(skb);
h = &b->iph;
if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
@@ -883,7 +884,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
if (!pskb_may_pull(skb, skb->len))
goto drop;
- b = (struct bootp_pkt *) skb->nh.iph;
+ b = (struct bootp_pkt *)skb_network_header(skb);
h = &b->iph;
/* One reply at a time, please. */
@@ -938,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
if (opt[1] >= 4)
memcpy(&server_id, opt + 2, 4);
break;
- };
+ }
}
#ifdef IPCONFIG_DEBUG
@@ -983,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
ic_myaddr = NONE;
ic_servaddr = NONE;
goto drop_unlock;
- };
+ }
ic_dhcp_msgtype = mt;
@@ -1094,7 +1095,7 @@ static int __init ic_dynamic(void)
retries = CONF_SEND_RETRIES;
get_random_bytes(&timeout, sizeof(timeout));
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
- for(;;) {
+ for (;;) {
#ifdef IPCONFIG_BOOTP
if (do_bootp && (d->able & IC_BOOTP))
ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3ec5ce0f549..ebd2f2d532f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -157,10 +157,10 @@ static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
return NULL;
}
-static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
{
- __be32 remote = t->parms.iph.daddr;
- __be32 local = t->parms.iph.saddr;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
unsigned h = 0;
int prio = 0;
@@ -175,6 +175,10 @@ static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
return &tunnels[prio][h];
}
+static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+{
+ return __ipip_bucket(&t->parms);
+}
static void ipip_tunnel_unlink(struct ip_tunnel *t)
{
@@ -206,19 +210,9 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
- unsigned h = 0;
- int prio = 0;
char name[IFNAMSIZ];
- if (remote) {
- prio |= 2;
- h ^= HASH(remote);
- }
- if (local) {
- prio |= 1;
- h ^= HASH(local);
- }
- for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -280,8 +274,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
ICMP in the real Internet is absolutely infeasible.
*/
struct iphdr *iph = (struct iphdr*)skb->data;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err;
@@ -336,8 +330,8 @@ out:
struct iphdr *iph = (struct iphdr*)dp;
int hlen = iph->ihl<<2;
struct iphdr *eiph;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int rel_type = 0;
int rel_code = 0;
__be32 rel_info = 0;
@@ -354,7 +348,7 @@ out:
default:
return 0;
case ICMP_PARAMETERPROB:
- n = ntohl(skb->h.icmph->un.gateway) >> 24;
+ n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
if (n < hlen)
return 0;
@@ -373,7 +367,7 @@ out:
return 0;
case ICMP_FRAG_NEEDED:
/* And it is the only really necessary thing :-) */
- n = ntohs(skb->h.icmph->un.frag.mtu);
+ n = ntohs(icmp_hdr(skb)->un.frag.mtu);
if (n < hlen+68)
return 0;
n -= hlen;
@@ -405,7 +399,7 @@ out:
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, skb->data - (u8*)eiph);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
memset(&fl, 0, sizeof(fl));
@@ -461,9 +455,10 @@ out:
#endif
}
-static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
+static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
+ struct sk_buff *skb)
{
- struct iphdr *inner_iph = skb->nh.iph;
+ struct iphdr *inner_iph = ip_hdr(skb);
if (INET_ECN_is_ce(outer_iph->tos))
IP_ECN_set_ce(inner_iph);
@@ -471,10 +466,8 @@ static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff
static int ipip_rcv(struct sk_buff *skb)
{
- struct iphdr *iph;
struct ip_tunnel *tunnel;
-
- iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
read_lock(&ipip_lock);
if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
@@ -486,8 +479,8 @@ static int ipip_rcv(struct sk_buff *skb)
secpath_reset(skb);
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
@@ -521,7 +514,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
__be32 dst = tiph->daddr;
@@ -615,11 +608,12 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = skb->nh.raw;
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb->transport_header = skb->network_header;
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
@@ -630,7 +624,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 601e3df6925..0ebae413ae8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
#include <linux/netfilter_ipv4.h>
#include <net/ipip.h>
#include <net/checksum.h>
+#include <net/netlink.h>
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
#define CONFIG_IP_PIMSM 1
@@ -302,8 +303,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
atomic_dec(&cache_resolve_queue_len);
- while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
- if (skb->nh.iph->version == 0) {
+ while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -479,7 +480,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
static struct mfc_cache *ipmr_cache_alloc(void)
{
struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
- if(c==NULL)
+ if (c==NULL)
return NULL;
c->mfc_un.res.minvif = MAXVIFS;
return c;
@@ -488,7 +489,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
static struct mfc_cache *ipmr_cache_alloc_unres(void)
{
struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
- if(c==NULL)
+ if (c==NULL)
return NULL;
skb_queue_head_init(&c->mfc_un.unres.unresolved);
c->mfc_un.unres.expires = jiffies + 10*HZ;
@@ -508,12 +509,13 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
* Play the pending entries through our router
*/
- while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
- if (skb->nh.iph->version == 0) {
+ while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
- nlh->nlmsg_len = skb->tail - (u8*)nlh;
+ nlh->nlmsg_len = (skb_tail_pointer(skb) -
+ (u8 *)nlh);
} else {
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -539,7 +541,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
{
struct sk_buff *skb;
- int ihl = pkt->nh.iph->ihl<<2;
+ const int ihl = ip_hdrlen(pkt);
struct igmphdr *igmp;
struct igmpmsg *msg;
int ret;
@@ -551,7 +553,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
#endif
skb = alloc_skb(128, GFP_ATOMIC);
- if(!skb)
+ if (!skb)
return -ENOBUFS;
#ifdef CONFIG_IP_PIMSM
@@ -561,14 +563,17 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
And all this only to mangle msg->im_msgtype and
to set msg->im_mbz to "mbz" :-)
*/
- msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
- skb->nh.raw = skb->h.raw = (u8*)msg;
- memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ msg = (struct igmpmsg *)skb_network_header(skb);
+ memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
msg->im_msgtype = IGMPMSG_WHOLEPKT;
msg->im_mbz = 0;
msg->im_vif = reg_vif_num;
- skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
- skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+ ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
+ ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
+ sizeof(struct iphdr));
} else
#endif
{
@@ -577,10 +582,11 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
* Copy the IP header
*/
- skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
- memcpy(skb->data,pkt->data,ihl);
- skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
- msg = (struct igmpmsg*)skb->nh.iph;
+ skb->network_header = skb->tail;
+ skb_put(skb, ihl);
+ skb_copy_to_linear_data(skb, pkt->data, ihl);
+ ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
+ msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
skb->dst = dst_clone(pkt->dst);
@@ -592,8 +598,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
igmp->type =
msg->im_msgtype = assert;
igmp->code = 0;
- skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
- skb->h.raw = skb->nh.raw;
+ ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
+ skb->transport_header = skb->network_header;
}
if (mroute_socket == NULL) {
@@ -622,11 +628,12 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
{
int err;
struct mfc_cache *c;
+ const struct iphdr *iph = ip_hdr(skb);
spin_lock_bh(&mfc_unres_lock);
for (c=mfc_unres_queue; c; c=c->next) {
- if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
- c->mfc_origin == skb->nh.iph->saddr)
+ if (c->mfc_mcastgrp == iph->daddr &&
+ c->mfc_origin == iph->saddr)
break;
}
@@ -646,9 +653,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
/*
* Fill in the new cache entry
*/
- c->mfc_parent=-1;
- c->mfc_origin=skb->nh.iph->saddr;
- c->mfc_mcastgrp=skb->nh.iph->daddr;
+ c->mfc_parent = -1;
+ c->mfc_origin = iph->saddr;
+ c->mfc_mcastgrp = iph->daddr;
/*
* Reflect first query at mrouted.
@@ -734,7 +741,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
return 0;
}
- if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+ if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
return -EINVAL;
c=ipmr_cache_alloc();
@@ -788,7 +795,7 @@ static void mroute_clean_tables(struct sock *sk)
/*
* Shut down all active vif entries
*/
- for(i=0; i<maxvif; i++) {
+ for (i=0; i<maxvif; i++) {
if (!(vif_table[i].flags&VIFF_STATIC))
vif_delete(i);
}
@@ -858,119 +865,117 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
struct vifctl vif;
struct mfcctl mfc;
- if(optname!=MRT_INIT)
- {
- if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
+ if (optname != MRT_INIT) {
+ if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
return -EACCES;
}
- switch(optname)
- {
- case MRT_INIT:
- if (sk->sk_type != SOCK_RAW ||
- inet_sk(sk)->num != IPPROTO_IGMP)
- return -EOPNOTSUPP;
- if(optlen!=sizeof(int))
- return -ENOPROTOOPT;
-
- rtnl_lock();
- if (mroute_socket) {
- rtnl_unlock();
- return -EADDRINUSE;
- }
-
- ret = ip_ra_control(sk, 1, mrtsock_destruct);
- if (ret == 0) {
- write_lock_bh(&mrt_lock);
- mroute_socket=sk;
- write_unlock_bh(&mrt_lock);
+ switch (optname) {
+ case MRT_INIT:
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->num != IPPROTO_IGMP)
+ return -EOPNOTSUPP;
+ if (optlen!=sizeof(int))
+ return -ENOPROTOOPT;
- ipv4_devconf.mc_forwarding++;
- }
+ rtnl_lock();
+ if (mroute_socket) {
rtnl_unlock();
- return ret;
- case MRT_DONE:
- if (sk!=mroute_socket)
- return -EACCES;
- return ip_ra_control(sk, 0, NULL);
- case MRT_ADD_VIF:
- case MRT_DEL_VIF:
- if(optlen!=sizeof(vif))
- return -EINVAL;
- if (copy_from_user(&vif,optval,sizeof(vif)))
- return -EFAULT;
- if(vif.vifc_vifi >= MAXVIFS)
- return -ENFILE;
- rtnl_lock();
- if (optname==MRT_ADD_VIF) {
- ret = vif_add(&vif, sk==mroute_socket);
- } else {
- ret = vif_delete(vif.vifc_vifi);
- }
- rtnl_unlock();
- return ret;
+ return -EADDRINUSE;
+ }
+
+ ret = ip_ra_control(sk, 1, mrtsock_destruct);
+ if (ret == 0) {
+ write_lock_bh(&mrt_lock);
+ mroute_socket=sk;
+ write_unlock_bh(&mrt_lock);
+
+ ipv4_devconf.mc_forwarding++;
+ }
+ rtnl_unlock();
+ return ret;
+ case MRT_DONE:
+ if (sk!=mroute_socket)
+ return -EACCES;
+ return ip_ra_control(sk, 0, NULL);
+ case MRT_ADD_VIF:
+ case MRT_DEL_VIF:
+ if (optlen!=sizeof(vif))
+ return -EINVAL;
+ if (copy_from_user(&vif,optval,sizeof(vif)))
+ return -EFAULT;
+ if (vif.vifc_vifi >= MAXVIFS)
+ return -ENFILE;
+ rtnl_lock();
+ if (optname==MRT_ADD_VIF) {
+ ret = vif_add(&vif, sk==mroute_socket);
+ } else {
+ ret = vif_delete(vif.vifc_vifi);
+ }
+ rtnl_unlock();
+ return ret;
/*
* Manipulate the forwarding caches. These live
* in a sort of kernel/user symbiosis.
*/
- case MRT_ADD_MFC:
- case MRT_DEL_MFC:
- if(optlen!=sizeof(mfc))
- return -EINVAL;
- if (copy_from_user(&mfc,optval, sizeof(mfc)))
- return -EFAULT;
- rtnl_lock();
- if (optname==MRT_DEL_MFC)
- ret = ipmr_mfc_delete(&mfc);
- else
- ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
- rtnl_unlock();
- return ret;
+ case MRT_ADD_MFC:
+ case MRT_DEL_MFC:
+ if (optlen!=sizeof(mfc))
+ return -EINVAL;
+ if (copy_from_user(&mfc,optval, sizeof(mfc)))
+ return -EFAULT;
+ rtnl_lock();
+ if (optname==MRT_DEL_MFC)
+ ret = ipmr_mfc_delete(&mfc);
+ else
+ ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
+ rtnl_unlock();
+ return ret;
/*
* Control PIM assert.
*/
- case MRT_ASSERT:
- {
- int v;
- if(get_user(v,(int __user *)optval))
- return -EFAULT;
- mroute_do_assert=(v)?1:0;
- return 0;
- }
+ case MRT_ASSERT:
+ {
+ int v;
+ if (get_user(v,(int __user *)optval))
+ return -EFAULT;
+ mroute_do_assert=(v)?1:0;
+ return 0;
+ }
#ifdef CONFIG_IP_PIMSM
- case MRT_PIM:
- {
- int v, ret;
- if(get_user(v,(int __user *)optval))
- return -EFAULT;
- v = (v)?1:0;
- rtnl_lock();
- ret = 0;
- if (v != mroute_do_pim) {
- mroute_do_pim = v;
- mroute_do_assert = v;
+ case MRT_PIM:
+ {
+ int v, ret;
+ if (get_user(v,(int __user *)optval))
+ return -EFAULT;
+ v = (v)?1:0;
+ rtnl_lock();
+ ret = 0;
+ if (v != mroute_do_pim) {
+ mroute_do_pim = v;
+ mroute_do_assert = v;
#ifdef CONFIG_IP_PIMSM_V2
- if (mroute_do_pim)
- ret = inet_add_protocol(&pim_protocol,
- IPPROTO_PIM);
- else
- ret = inet_del_protocol(&pim_protocol,
- IPPROTO_PIM);
- if (ret < 0)
- ret = -EAGAIN;
+ if (mroute_do_pim)
+ ret = inet_add_protocol(&pim_protocol,
+ IPPROTO_PIM);
+ else
+ ret = inet_del_protocol(&pim_protocol,
+ IPPROTO_PIM);
+ if (ret < 0)
+ ret = -EAGAIN;
#endif
- }
- rtnl_unlock();
- return ret;
}
+ rtnl_unlock();
+ return ret;
+ }
#endif
- /*
- * Spurious command, or MRT_VERSION which you cannot
- * set.
- */
- default:
- return -ENOPROTOOPT;
+ /*
+ * Spurious command, or MRT_VERSION which you cannot
+ * set.
+ */
+ default:
+ return -ENOPROTOOPT;
}
}
@@ -983,7 +988,7 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
int olr;
int val;
- if(optname!=MRT_VERSION &&
+ if (optname!=MRT_VERSION &&
#ifdef CONFIG_IP_PIMSM
optname!=MRT_PIM &&
#endif
@@ -997,17 +1002,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
if (olr < 0)
return -EINVAL;
- if(put_user(olr,optlen))
+ if (put_user(olr,optlen))
return -EFAULT;
- if(optname==MRT_VERSION)
+ if (optname==MRT_VERSION)
val=0x0305;
#ifdef CONFIG_IP_PIMSM
- else if(optname==MRT_PIM)
+ else if (optname==MRT_PIM)
val=mroute_do_pim;
#endif
else
val=mroute_do_assert;
- if(copy_to_user(optval,&val,olr))
+ if (copy_to_user(optval,&val,olr))
return -EFAULT;
return 0;
}
@@ -1023,48 +1028,47 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
struct vif_device *vif;
struct mfc_cache *c;
- switch(cmd)
- {
- case SIOCGETVIFCNT:
- if (copy_from_user(&vr,arg,sizeof(vr)))
- return -EFAULT;
- if(vr.vifi>=maxvif)
- return -EINVAL;
- read_lock(&mrt_lock);
- vif=&vif_table[vr.vifi];
- if(VIF_EXISTS(vr.vifi)) {
- vr.icount=vif->pkt_in;
- vr.ocount=vif->pkt_out;
- vr.ibytes=vif->bytes_in;
- vr.obytes=vif->bytes_out;
- read_unlock(&mrt_lock);
-
- if (copy_to_user(arg,&vr,sizeof(vr)))
- return -EFAULT;
- return 0;
- }
+ switch (cmd) {
+ case SIOCGETVIFCNT:
+ if (copy_from_user(&vr,arg,sizeof(vr)))
+ return -EFAULT;
+ if (vr.vifi>=maxvif)
+ return -EINVAL;
+ read_lock(&mrt_lock);
+ vif=&vif_table[vr.vifi];
+ if (VIF_EXISTS(vr.vifi)) {
+ vr.icount=vif->pkt_in;
+ vr.ocount=vif->pkt_out;
+ vr.ibytes=vif->bytes_in;
+ vr.obytes=vif->bytes_out;
read_unlock(&mrt_lock);
- return -EADDRNOTAVAIL;
- case SIOCGETSGCNT:
- if (copy_from_user(&sr,arg,sizeof(sr)))
- return -EFAULT;
- read_lock(&mrt_lock);
- c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
- if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
-
- if (copy_to_user(arg,&sr,sizeof(sr)))
- return -EFAULT;
- return 0;
- }
+ if (copy_to_user(arg,&vr,sizeof(vr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ case SIOCGETSGCNT:
+ if (copy_from_user(&sr,arg,sizeof(sr)))
+ return -EFAULT;
+
+ read_lock(&mrt_lock);
+ c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+ if (c) {
+ sr.pktcnt = c->mfc_un.res.pkt;
+ sr.bytecnt = c->mfc_un.res.bytes;
+ sr.wrong_if = c->mfc_un.res.wrong_if;
read_unlock(&mrt_lock);
- return -EADDRNOTAVAIL;
- default:
- return -ENOIOCTLCMD;
+
+ if (copy_to_user(arg,&sr,sizeof(sr)))
+ return -EFAULT;
+ return 0;
+ }
+ read_unlock(&mrt_lock);
+ return -EADDRNOTAVAIL;
+ default:
+ return -ENOIOCTLCMD;
}
}
@@ -1076,7 +1080,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
v=&vif_table[0];
- for(ct=0;ct<maxvif;ct++,v++) {
+ for (ct=0;ct<maxvif;ct++,v++) {
if (v->dev==ptr)
vif_delete(ct);
}
@@ -1096,11 +1100,17 @@ static struct notifier_block ip_mr_notifier={
static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
- struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+ struct iphdr *iph;
+ struct iphdr *old_iph = ip_hdr(skb);
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb->transport_header = skb->network_header;
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
iph->version = 4;
- iph->tos = skb->nh.iph->tos;
- iph->ttl = skb->nh.iph->ttl;
+ iph->tos = old_iph->tos;
+ iph->ttl = old_iph->ttl;
iph->frag_off = 0;
iph->daddr = daddr;
iph->saddr = saddr;
@@ -1110,8 +1120,6 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
ip_select_ident(iph, skb->dst, NULL);
ip_send_check(iph);
- skb->h.ipiph = skb->nh.iph;
- skb->nh.iph = iph;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
nf_reset(skb);
}
@@ -1134,7 +1142,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &vif_table[vifi];
struct net_device *dev;
struct rtable *rt;
@@ -1200,8 +1208,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
dst_release(skb->dst);
skb->dst = &rt->u.dst;
- iph = skb->nh.iph;
- ip_decrease_ttl(iph);
+ ip_decrease_ttl(ip_hdr(skb));
/* FIXME: forward and output firewalls used to be called here.
* What do we do with netfilter? -- RR */
@@ -1301,7 +1308,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
* Forward the frame
*/
for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
- if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
+ if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
@@ -1347,7 +1354,7 @@ int ip_mr_input(struct sk_buff *skb)
if (IPCB(skb)->opt.router_alert) {
if (ip_call_ra_chain(skb))
return 0;
- } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
/* IGMPv1 (and broken IGMPv2 implementations sort of
Cisco IOS <= 11.2(8)) do not put router alert
option to IGMP packets destined to routable
@@ -1366,7 +1373,7 @@ int ip_mr_input(struct sk_buff *skb)
}
read_lock(&mrt_lock);
- cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+ cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
/*
* No usable cache entry
@@ -1426,14 +1433,15 @@ int pim_rcv_v1(struct sk_buff * skb)
if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
goto drop;
- pim = (struct igmphdr*)skb->h.raw;
+ pim = igmp_hdr(skb);
if (!mroute_do_pim ||
skb->len < sizeof(*pim) + sizeof(*encap) ||
pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
goto drop;
- encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+ encap = (struct iphdr *)(skb_transport_header(skb) +
+ sizeof(struct igmphdr));
/*
Check that:
a. packet is really destinted to a multicast group
@@ -1455,9 +1463,9 @@ int pim_rcv_v1(struct sk_buff * skb)
if (reg_dev == NULL)
goto drop;
- skb->mac.raw = skb->nh.raw;
+ skb->mac_header = skb->network_header;
skb_pull(skb, (u8*)encap - skb->data);
- skb->nh.iph = (struct iphdr *)skb->data;
+ skb_reset_network_header(skb);
skb->dev = reg_dev;
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
@@ -1486,7 +1494,7 @@ static int pim_rcv(struct sk_buff * skb)
if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
goto drop;
- pim = (struct pimreghdr*)skb->h.raw;
+ pim = (struct pimreghdr *)skb_transport_header(skb);
if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
(pim->flags&PIM_NULL_REGISTER) ||
(ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
@@ -1494,7 +1502,8 @@ static int pim_rcv(struct sk_buff * skb)
goto drop;
/* check if the inner packet is destined to mcast group */
- encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+ encap = (struct iphdr *)(skb_transport_header(skb) +
+ sizeof(struct pimreghdr));
if (!MULTICAST(encap->daddr) ||
encap->tot_len == 0 ||
ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
@@ -1510,9 +1519,9 @@ static int pim_rcv(struct sk_buff * skb)
if (reg_dev == NULL)
goto drop;
- skb->mac.raw = skb->nh.raw;
+ skb->mac_header = skb->network_header;
skb_pull(skb, (u8*)encap - skb->data);
- skb->nh.iph = (struct iphdr *)skb->data;
+ skb_reset_network_header(skb);
skb->dev = reg_dev;
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
@@ -1537,7 +1546,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
int ct;
struct rtnexthop *nhp;
struct net_device *dev = vif_table[c->mfc_parent].dev;
- u8 *b = skb->tail;
+ u8 *b = skb_tail_pointer(skb);
struct rtattr *mp_head;
if (dev)
@@ -1557,12 +1566,12 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
}
}
mp_head->rta_type = RTA_MULTIPATH;
- mp_head->rta_len = skb->tail - (u8*)mp_head;
+ mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
rtm->rtm_type = RTN_MULTICAST;
return 1;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -EMSGSIZE;
}
@@ -1577,6 +1586,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
if (cache==NULL) {
struct sk_buff *skb2;
+ struct iphdr *iph;
struct net_device *dev;
int vif;
@@ -1596,11 +1606,13 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
return -ENOMEM;
}
- skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
- skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
- skb2->nh.iph->saddr = rt->rt_src;
- skb2->nh.iph->daddr = rt->rt_dst;
- skb2->nh.iph->version = 0;
+ skb_push(skb2, sizeof(struct iphdr));
+ skb_reset_network_header(skb2);
+ iph = ip_hdr(skb2);
+ iph->ihl = sizeof(struct iphdr) >> 2;
+ iph->saddr = rt->rt_src;
+ iph->daddr = rt->rt_dst;
+ iph->version = 0;
err = ipmr_cache_unresolved(vif, skb2);
read_unlock(&mrt_lock);
return err;
@@ -1625,7 +1637,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
loff_t pos)
{
for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
- if(!VIF_EXISTS(iter->ct))
+ if (!VIF_EXISTS(iter->ct))
continue;
if (pos-- == 0)
return &vif_table[iter->ct];
@@ -1649,7 +1661,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return ipmr_vif_seq_idx(iter, 0);
while (++iter->ct < maxvif) {
- if(!VIF_EXISTS(iter->ct))
+ if (!VIF_EXISTS(iter->ct))
continue;
return &vif_table[iter->ct];
}
@@ -1680,7 +1692,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations ipmr_vif_seq_ops = {
+static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
.next = ipmr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
@@ -1732,14 +1744,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
it->cache = mfc_cache_array;
read_lock(&mrt_lock);
for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
- for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
+ for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
if (pos-- == 0)
return mfc;
read_unlock(&mrt_lock);
it->cache = &mfc_unres_queue;
spin_lock_bh(&mfc_unres_lock);
- for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
+ for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
if (pos-- == 0)
return mfc;
spin_unlock_bh(&mfc_unres_lock);
@@ -1829,9 +1841,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
mfc->mfc_un.res.wrong_if);
if (it->cache != &mfc_unres_queue) {
- for(n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++ ) {
- if(VIF_EXISTS(n)
+ for (n = mfc->mfc_un.res.minvif;
+ n < mfc->mfc_un.res.maxvif; n++ ) {
+ if (VIF_EXISTS(n)
&& mfc->mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
@@ -1843,7 +1855,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
.next = ipmr_mfc_seq_next,
.stop = ipmr_mfc_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 22e104c6a49..15ad5dd2d98 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -331,14 +331,14 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+ const unsigned int tcp_offset = ip_hdrlen(*pskb);
struct tcphdr *th;
__u32 seq;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -406,14 +406,14 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+ const unsigned int tcp_offset = ip_hdrlen(*pskb);
struct tcphdr *th;
__u32 seq;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -577,7 +577,6 @@ static const struct file_operations ip_vs_app_fops = {
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
- struct iphdr *iph;
int diff;
int o_offset;
int o_left;
@@ -603,12 +602,11 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
skb_put(skb, diff);
memmove(skb->data + o_offset + n_len,
skb->data + o_offset + o_len, o_left);
- memcpy(skb->data + o_offset, n_buf, n_len);
+ skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
}
/* must update the iph total length here */
- iph = skb->nh.iph;
- iph->tot_len = htons(skb->len);
+ ip_hdr(skb)->tot_len = htons(skb->len);
LeaveFunction(9);
return 0;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 24d7b66eb6d..f005a2f929f 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -212,7 +212,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
__be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
__be16 dport; /* destination port to forward */
@@ -381,7 +381,7 @@ struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
@@ -447,7 +447,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
__be16 _ports[2], *pptr;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
pptr = skb_header_pointer(skb, iph->ihl*4,
sizeof(_ports), _ports);
@@ -546,7 +546,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
skb = ip_defrag(skb, user);
if (skb)
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
return skb;
}
@@ -557,9 +557,10 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int inout)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
unsigned int icmp_offset = iph->ihl*4;
- struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+ struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
+ icmp_offset);
struct iphdr *ciph = (struct iphdr *)(icmph + 1);
if (inout) {
@@ -617,14 +618,14 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
*related = 1;
/* reassemble IP fragments */
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb)
return NF_STOLEN;
*pskb = skb;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
offset = ihl = iph->ihl * 4;
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
@@ -659,7 +660,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
@@ -680,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
}
/* Ensure the checksum is correct */
- if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- ip_vs_checksum_complete(skb, ihl)) {
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
@@ -712,8 +712,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
return th->rst;
@@ -740,14 +739,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
if (skb->ipvs_property)
return NF_ACCEPT;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_out_icmp(pskb, &related);
if (related)
return verdict;
skb = *pskb;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
}
pp = ip_vs_proto_get(iph->protocol);
@@ -755,12 +754,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
return NF_ACCEPT;
/* reassemble IP fragments */
- if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
if (!skb)
return NF_STOLEN;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
*pskb = skb;
}
@@ -810,8 +809,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
goto drop;
skb = *pskb;
- skb->nh.iph->saddr = cp->vaddr;
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->saddr = cp->vaddr;
+ ip_send_check(ip_hdr(skb));
/* For policy routing, packets originating from this
* machine itself may be routed differently to packets
@@ -861,7 +860,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
*related = 1;
/* reassemble IP fragments */
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
skb = ip_vs_gather_frags(skb,
hooknum == NF_IP_LOCAL_IN ?
IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
@@ -870,7 +869,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
*pskb = skb;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
offset = ihl = iph->ihl * 4;
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
@@ -905,7 +904,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
verdict = NF_DROP;
/* Ensure the checksum is correct */
- if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- ip_vs_checksum_complete(skb, ihl)) {
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
@@ -966,19 +964,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
|| skb->dev == &loopback_dev || skb->sk)) {
IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
skb->pkt_type,
- skb->nh.iph->protocol,
- NIPQUAD(skb->nh.iph->daddr));
+ ip_hdr(skb)->protocol,
+ NIPQUAD(ip_hdr(skb)->daddr));
return NF_ACCEPT;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
if (related)
return verdict;
skb = *pskb;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
}
/* Protocol supported? */
@@ -1064,7 +1062,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
{
int r;
- if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+ if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
return ip_vs_in_icmp(pskb, &r, hooknum);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 502111fba87..dcf5d46aaa5 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -204,7 +204,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_dh_bucket *tbl;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 847c47af040..344ddbbdc75 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -159,10 +159,10 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 0;
if (cp->app_data == &ip_vs_ftp_pasv) {
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
- data_limit = (*pskb)->tail;
+ data_limit = skb_tail_pointer(*pskb);
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
@@ -262,14 +262,14 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Detecting whether it is passive
*/
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
data = data_start = (char *)th + (th->doff << 2);
- data_limit = (*pskb)->tail;
+ data_limit = skb_tail_pointer(*pskb);
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index c801273cb88..052f4ed5917 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -521,7 +521,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dest *dest;
struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 23f9b9e73c8..6225acac7a3 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -775,7 +775,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
struct ip_vs_dest *dest;
struct ip_vs_lblcr_table *tbl;
struct ip_vs_lblcr_entry *en;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 8b0505b0931..a842676e1c6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -52,15 +52,15 @@ ah_conn_in_get(const struct sk_buff *skb,
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
@@ -89,15 +89,15 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 16a9ebee2fe..e65577a7700 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -76,16 +76,15 @@ tcp_conn_schedule(struct sk_buff *skb,
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
if (th->syn &&
- (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, th->dest))) {
+ (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+ ip_hdr(skb)->daddr, th->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -127,7 +126,7 @@ tcp_snat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int tcphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -143,7 +142,7 @@ tcp_snat_handler(struct sk_buff **pskb,
return 0;
}
- tcph = (void *)(*pskb)->nh.iph + tcphoff;
+ tcph = (void *)ip_hdr(*pskb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
@@ -175,7 +174,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int tcphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -194,7 +193,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
return 0;
}
- tcph = (void *)(*pskb)->nh.iph + tcphoff;
+ tcph = (void *)ip_hdr(*pskb) + tcphoff;
tcph->dest = cp->dport;
/*
@@ -224,15 +223,15 @@ tcp_dnat_handler(struct sk_buff **pskb,
static int
tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
- unsigned int tcphoff = skb->nh.iph->ihl*4;
+ const unsigned int tcphoff = ip_hdrlen(skb);
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
skb->len - tcphoff,
- skb->nh.iph->protocol, skb->csum)) {
+ ip_hdr(skb)->protocol, skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
@@ -467,8 +466,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 03f0a414cfa..8ee5fe6a101 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -22,7 +22,7 @@
#include <linux/udp.h>
#include <net/ip_vs.h>
-
+#include <net/ip.h>
static struct ip_vs_conn *
udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -56,7 +56,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -82,15 +82,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
- uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ uh = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_udph), &_udph);
if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
- if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, uh->dest))) {
+ if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+ ip_hdr(skb)->daddr, uh->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -133,7 +133,7 @@ udp_snat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int udphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -151,7 +151,7 @@ udp_snat_handler(struct sk_buff **pskb,
return 0;
}
- udph = (void *)(*pskb)->nh.iph + udphoff;
+ udph = (void *)ip_hdr(*pskb) + udphoff;
udph->source = cp->vport;
/*
@@ -187,7 +187,7 @@ udp_dnat_handler(struct sk_buff **pskb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+ unsigned int udphoff = ip_hdrlen(*pskb);
/* csum_check requires unshared skb */
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -206,7 +206,7 @@ udp_dnat_handler(struct sk_buff **pskb,
return 0;
}
- udph = (void *)(*pskb)->nh.iph + udphoff;
+ udph = (void *)ip_hdr(*pskb) + udphoff;
udph->dest = cp->dport;
/*
@@ -239,7 +239,7 @@ static int
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
struct udphdr _udph, *uh;
- unsigned int udphoff = skb->nh.iph->ihl*4;
+ const unsigned int udphoff = ip_hdrlen(skb);
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
if (uh == NULL)
@@ -251,10 +251,10 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(skb->nh.iph->saddr,
- skb->nh.iph->daddr,
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
skb->len - udphoff,
- skb->nh.iph->protocol,
+ ip_hdr(skb)->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 338668f88fe..1b25b00ef1e 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -201,7 +201,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_sh_bucket *tbl;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index e1f77bd7c9a..900ce29db38 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -156,7 +156,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
u8 tos = iph->tos;
int mtu;
struct flowi fl = {
@@ -178,7 +178,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
@@ -193,7 +193,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_rt_put(rt);
return NF_STOLEN;
}
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
/* drop old route */
dst_release(skb->dst);
@@ -226,7 +226,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
{
struct rtable *rt; /* Route to the other host */
int mtu;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
EnterFunction(10);
@@ -245,7 +245,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
@@ -266,8 +266,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
goto tx_error;
- skb->nh.iph->daddr = cp->daddr;
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->daddr = cp->daddr;
+ ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
@@ -320,19 +320,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
{
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
u8 tos = old_iph->tos;
__be16 df = old_iph->frag_off;
+ sk_buff_data_t old_transport_header = skb->transport_header;
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
int mtu;
EnterFunction(10);
- if (skb->protocol != __constant_htons(ETH_P_IP)) {
+ if (skb->protocol != htons(ETH_P_IP)) {
IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
"ETH_P_IP: %d, skb protocol: %d\n",
- __constant_htons(ETH_P_IP), skb->protocol);
+ htons(ETH_P_IP), skb->protocol);
goto tx_error;
}
@@ -350,9 +351,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb->dst)
skb->dst->ops->update_pmtu(skb->dst, mtu);
- df |= (old_iph->frag_off&__constant_htons(IP_DF));
+ df |= (old_iph->frag_off & htons(IP_DF));
- if ((old_iph->frag_off&__constant_htons(IP_DF))
+ if ((old_iph->frag_off & htons(IP_DF))
&& mtu < ntohs(old_iph->tot_len)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
@@ -377,15 +378,16 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = (void *) old_iph;
+ skb->transport_header = old_transport_header;
/* fix old IP header checksum */
ip_send_check(old_iph);
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
/* drop old route */
@@ -395,7 +397,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/*
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
@@ -435,7 +437,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
int mtu;
EnterFunction(10);
@@ -445,7 +447,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
@@ -460,7 +462,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_rt_put(rt);
return NF_STOLEN;
}
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
/* drop old route */
dst_release(skb->dst);
@@ -514,12 +516,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
* mangle and send the packet here (only for VS/NAT)
*/
- if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
goto tx_error_icmp;
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 574c735836f..b03c5ca2c82 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this,
spin_unlock_bh(&state_lock);
break;
- };
+ }
return NOTIFY_DONE;
}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6069a11514f..b44192924f9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -10,7 +10,7 @@
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ const struct iphdr *iph = ip_hdr(*pskb);
struct rtable *rt;
struct flowi fl = {};
struct dst_entry *odst;
@@ -142,7 +142,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
struct ip_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP_LOCAL_OUT) {
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
rt_info->tos = iph->tos;
rt_info->daddr = iph->daddr;
@@ -155,7 +155,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
const struct ip_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP_LOCAL_OUT) {
- struct iphdr *iph = (*pskb)->nh.iph;
+ const struct iphdr *iph = ip_hdr(*pskb);
if (!(iph->tos == rt_info->tos
&& iph->daddr == rt_info->daddr
@@ -168,7 +168,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 601808c796e..46509fae9fd 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -30,188 +30,6 @@ config NF_CONNTRACK_PROC_COMPAT
If unsure, say Y.
-# connection tracking, helpers and protocols
-config IP_NF_CT_ACCT
- bool "Connection tracking flow accounting"
- depends on IP_NF_CONNTRACK
- help
- If this option is enabled, the connection tracking code will
- keep per-flow packet and byte counters.
-
- Those counters can be used for flow-based accounting or the
- `connbytes' match.
-
- If unsure, say `N'.
-
-config IP_NF_CONNTRACK_MARK
- bool 'Connection mark tracking support'
- depends on IP_NF_CONNTRACK
- help
- This option enables support for connection marks, used by the
- `CONNMARK' target and `connmark' match. Similar to the mark value
- of packets, but this mark value is kept in the conntrack session
- instead of the individual packets.
-
-config IP_NF_CONNTRACK_SECMARK
- bool 'Connection tracking security mark support'
- depends on IP_NF_CONNTRACK && NETWORK_SECMARK
- help
- This option enables security markings to be applied to
- connections. Typically they are copied to connections from
- packets using the CONNSECMARK target and copied back from
- connections to packets with the same target, with the packets
- being originally labeled via SECMARK.
-
- If unsure, say 'N'.
-
-config IP_NF_CONNTRACK_EVENTS
- bool "Connection tracking events (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_CONNTRACK
- help
- If this option is enabled, the connection tracking code will
- provide a notifier chain that can be used by other kernel code
- to get notified about changes in the connection tracking state.
-
- IF unsure, say `N'.
-
-config IP_NF_CONNTRACK_NETLINK
- tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
- depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
- depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
- depends on IP_NF_NAT=n || IP_NF_NAT
- help
- This option enables support for a netlink-based userspace interface
-
-
-config IP_NF_CT_PROTO_SCTP
- tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- With this option enabled, the connection tracking code will
- be able to do state tracking on SCTP connections.
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_FTP
- tristate "FTP protocol support"
- depends on IP_NF_CONNTRACK
- help
- Tracking FTP connections is problematic: special helpers are
- required for tracking them, and doing masquerading and other forms
- of Network Address Translation on them.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_IRC
- tristate "IRC protocol support"
- depends on IP_NF_CONNTRACK
- ---help---
- There is a commonly-used extension to IRC called
- Direct Client-to-Client Protocol (DCC). This enables users to send
- files to each other, and also chat to each other without the need
- of a server. DCC Sending is used anywhere you send files over IRC,
- and DCC Chat is most commonly used by Eggdrop bots. If you are
- using NAT, this extension will enable you to send files and initiate
- chats. Note that you do NOT need this extension to get files or
- have others initiate chats, or everything else in IRC.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_NETBIOS_NS
- tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- NetBIOS name service requests are sent as broadcast messages from an
- unprivileged port and responded to with unicast messages to the
- same port. This make them hard to firewall properly because connection
- tracking doesn't deal with broadcasts. This helper tracks locally
- originating NetBIOS name service requests and the corresponding
- responses. It relies on correct IP address configuration, specifically
- netmask and broadcast address. When properly configured, the output
- of "ip address show" should look similar to this:
-
- $ ip -4 address show eth0
- 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
- inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_TFTP
- tristate "TFTP protocol support"
- depends on IP_NF_CONNTRACK
- help
- TFTP connection tracking helper, this is required depending
- on how restrictive your ruleset is.
- If you are using a tftp client behind -j SNAT or -j MASQUERADING
- you will need this.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_AMANDA
- tristate "Amanda backup protocol support"
- depends on IP_NF_CONNTRACK
- select TEXTSEARCH
- select TEXTSEARCH_KMP
- help
- If you are running the Amanda backup package <http://www.amanda.org/>
- on this machine or machines that will be MASQUERADED through this
- machine, then you may want to enable this feature. This allows the
- connection tracking and natting code to allow the sub-channels that
- Amanda requires for communication of the backup data, messages and
- index.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_PPTP
- tristate 'PPTP protocol support'
- depends on IP_NF_CONNTRACK
- help
- This module adds support for PPTP (Point to Point Tunnelling
- Protocol, RFC2637) connection tracking and NAT.
-
- If you are running PPTP sessions over a stateful firewall or NAT
- box, you may want to enable this feature.
-
- Please note that not all PPTP modes of operation are supported yet.
- For more info, read top of the file
- net/ipv4/netfilter/ip_conntrack_pptp.c
-
- If you want to compile it as a module, say M here and read
- Documentation/modules.txt. If unsure, say `N'.
-
-config IP_NF_H323
- tristate 'H.323 protocol support (EXPERIMENTAL)'
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- H.323 is a VoIP signalling protocol from ITU-T. As one of the most
- important VoIP protocols, it is widely used by voice hardware and
- software including voice gateways, IP phones, Netmeeting, OpenPhone,
- Gnomemeeting, etc.
-
- With this module you can support H.323 on a connection tracking/NAT
- firewall.
-
- This module supports RAS, Fast Start, H.245 Tunnelling, Call
- Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
- whiteboard, file transfer, etc. For more information, please
- visit http://nath323.sourceforge.net/.
-
- If you want to compile it as a module, say 'M' here and read
- Documentation/modules.txt. If unsure, say 'N'.
-
-config IP_NF_SIP
- tristate "SIP protocol support (EXPERIMENTAL)"
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
- help
- SIP is an application-layer control protocol that can establish,
- modify, and terminate multimedia sessions (conferences) such as
- Internet telephony calls. With the ip_conntrack_sip and
- the ip_nat_sip modules you can support the protocol on a connection
- tracking/NATing firewall.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
config IP_NF_QUEUE
tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
help
@@ -361,17 +179,6 @@ config IP_NF_TARGET_ULOG
To compile it as a module, choose M here. If unsure, say N.
-# NAT + specific targets: ip_conntrack
-config IP_NF_NAT
- tristate "Full NAT"
- depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
- help
- The Full NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. It is controlled by
- the `nat' table in iptables: see the man page for iptables(8).
-
- To compile it as a module, choose M here. If unsure, say N.
-
# NAT + specific targets: nf_conntrack
config NF_NAT
tristate "Full NAT"
@@ -383,11 +190,6 @@ config NF_NAT
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_NAT_NEEDED
- bool
- depends on IP_NF_NAT
- default y
-
config NF_NAT_NEEDED
bool
depends on NF_NAT
@@ -395,7 +197,7 @@ config NF_NAT_NEEDED
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
@@ -407,7 +209,7 @@ config IP_NF_TARGET_MASQUERADE
config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
REDIRECT is a special case of NAT: all incoming connections are
mapped onto the incoming interface's address, causing the packets to
@@ -418,7 +220,7 @@ config IP_NF_TARGET_REDIRECT
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
@@ -429,28 +231,13 @@ config IP_NF_TARGET_NETMAP
config IP_NF_TARGET_SAME
tristate "SAME target support"
- depends on (NF_NAT || IP_NF_NAT)
+ depends on NF_NAT
help
This option adds a `SAME' target, which works like the standard SNAT
target, but attempts to give clients the same IP for all connections.
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_NAT_SNMP_BASIC
- tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_NAT
- ---help---
-
- This module implements an Application Layer Gateway (ALG) for
- SNMP payloads. In conjunction with NAT, it allows a network
- management system to access multiple private networks with
- conflicting addresses. It works by modifying IP addresses
- inside SNMP payloads to match IP-layer NAT mapping.
-
- This is the "basic" form of SNMP-ALG, as described in RFC 2962
-
- To compile it as a module, choose M here. If unsure, say N.
-
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
depends on EXPERIMENTAL && NF_NAT
@@ -477,78 +264,37 @@ config NF_NAT_PROTO_GRE
tristate
depends on NF_NAT && NF_CT_PROTO_GRE
-config IP_NF_NAT_FTP
- tristate
- depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT
- default IP_NF_NAT && IP_NF_FTP
-
config NF_NAT_FTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_FTP
-config IP_NF_NAT_IRC
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_IRC=y
- default m if IP_NF_IRC=m
-
config NF_NAT_IRC
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_IRC
-config IP_NF_NAT_TFTP
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_TFTP=y
- default m if IP_NF_TFTP=m
-
config NF_NAT_TFTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_TFTP
-config IP_NF_NAT_AMANDA
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_AMANDA=y
- default m if IP_NF_AMANDA=m
-
config NF_NAT_AMANDA
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_AMANDA
-config IP_NF_NAT_PPTP
- tristate
- depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
- default IP_NF_NAT if IP_NF_PPTP=y
- default m if IP_NF_PPTP=m
-
config NF_NAT_PPTP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_PPTP
select NF_NAT_PROTO_GRE
-config IP_NF_NAT_H323
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_H323=y
- default m if IP_NF_H323=m
-
config NF_NAT_H323
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_H323
-config IP_NF_NAT_SIP
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_SIP=y
- default m if IP_NF_SIP=m
-
config NF_NAT_SIP
tristate
depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
@@ -606,9 +352,8 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_MANGLE && EXPERIMENTAL
- depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
- select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
- select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
+ depends on NF_CONNTRACK_IPV4
+ select NF_CONNTRACK_MARK
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 6625ec68180..409d273f6f8 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -2,8 +2,6 @@
# Makefile for the netfilter modules on top of IPv4.
#
-# objects for the standalone - connection tracking / NAT
-ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
# objects for l3 independent conntrack
nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
@@ -12,53 +10,14 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
endif
endif
-ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-ifneq ($(CONFIG_NF_NAT),)
+nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
-else
-iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
-endif
-
-ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
-ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
-
-ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o
-ip_nat_h323-objs := ip_nat_helper_h323.o
# connection tracking
-obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
obj-$(CONFIG_NF_NAT) += nf_nat.o
-# conntrack netlink interface
-obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
-
-
-# SCTP protocol connection tracking
-obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
-
-# connection tracking helpers
-obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o
-obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
-obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
-obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
-obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
-obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
-obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o
-obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
-
-# NAT helpers (ip_conntrack)
-obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
-obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
-obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
-obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
-obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
-obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
-obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o
-
# NAT helpers (nf_conntrack)
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -78,7 +37,6 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
@@ -100,7 +58,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 57b0221f9e2..cae41215e3c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -245,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
- arp = (*pskb)->nh.arph;
+ arp = arp_hdr(*pskb);
do {
if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
struct arpt_entry_target *t;
@@ -297,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
t->data);
/* Target might have changed stuff. */
- arp = (*pskb)->nh.arph;
+ arp = arp_hdr(*pskb);
if (verdict == ARPT_CONTINUE)
e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 709db4d3f48..6298d404e7c 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -30,35 +30,35 @@ target(struct sk_buff **pskb,
*pskb = nskb;
}
- arp = (*pskb)->nh.arph;
- arpptr = (*pskb)->nh.raw + sizeof(*arp);
+ arp = arp_hdr(*pskb);
+ arpptr = skb_network_header(*pskb) + sizeof(*arp);
pln = arp->ar_pln;
hln = arp->ar_hln;
/* We assume that pln and hln were checked in the match */
if (mangle->flags & ARPT_MANGLE_SDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > (**pskb).tail))
+ (arpptr + hln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, mangle->src_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_SIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > (**pskb).tail))
+ (arpptr + pln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_s.src_ip, pln);
}
arpptr += pln;
if (mangle->flags & ARPT_MANGLE_TDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > (**pskb).tail))
+ (arpptr + hln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, mangle->tgt_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_TIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > (**pskb).tail))
+ (arpptr + pln > skb_tail_pointer(*pskb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
}
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
deleted file mode 100644
index 4f561f52c83..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/* Amanda extension for IP connection tracking, Version 0.2
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on HW's ip_conntrack_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_conntrack_amanda.o [master_timeout=n]
- *
- * Where master_timeout is the timeout (in seconds) of the master
- * connection (port 10080). This defaults to 5 minutes but if
- * your clients take longer than 5 minutes to do their work
- * before getting back to the Amanda server, you can increase
- * this value.
- *
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/textsearch.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-static unsigned int master_timeout = 300;
-static char *ts_algo = "kmp";
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda connection tracking module");
-MODULE_LICENSE("GPL");
-module_param(master_timeout, uint, 0600);
-MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
-module_param(ts_algo, charp, 0400);
-MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
-
-unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
-
-enum amanda_strings {
- SEARCH_CONNECT,
- SEARCH_NEWLINE,
- SEARCH_DATA,
- SEARCH_MESG,
- SEARCH_INDEX,
-};
-
-static struct {
- char *string;
- size_t len;
- struct ts_config *ts;
-} search[] = {
- [SEARCH_CONNECT] = {
- .string = "CONNECT ",
- .len = 8,
- },
- [SEARCH_NEWLINE] = {
- .string = "\n",
- .len = 1,
- },
- [SEARCH_DATA] = {
- .string = "DATA ",
- .len = 5,
- },
- [SEARCH_MESG] = {
- .string = "MESG ",
- .len = 5,
- },
- [SEARCH_INDEX] = {
- .string = "INDEX ",
- .len = 6,
- },
-};
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- struct ts_state ts;
- struct ip_conntrack_expect *exp;
- unsigned int dataoff, start, stop, off, i;
- char pbuf[sizeof("65535")], *tmp;
- u_int16_t port, len;
- int ret = NF_ACCEPT;
- typeof(ip_nat_amanda_hook) ip_nat_amanda;
-
- /* Only look at packets from the Amanda server */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* increase the UDP timeout of the master connection as replies from
- * Amanda clients to the server can be quite delayed */
- ip_ct_refresh(ct, *pskb, master_timeout * HZ);
-
- /* No data? */
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
- if (net_ratelimit())
- printk("amanda_help: skblen = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
-
- memset(&ts, 0, sizeof(ts));
- start = skb_find_text(*pskb, dataoff, (*pskb)->len,
- search[SEARCH_CONNECT].ts, &ts);
- if (start == UINT_MAX)
- goto out;
- start += dataoff + search[SEARCH_CONNECT].len;
-
- memset(&ts, 0, sizeof(ts));
- stop = skb_find_text(*pskb, start, (*pskb)->len,
- search[SEARCH_NEWLINE].ts, &ts);
- if (stop == UINT_MAX)
- goto out;
- stop += start;
-
- for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
- memset(&ts, 0, sizeof(ts));
- off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
- if (off == UINT_MAX)
- continue;
- off += start + search[i].len;
-
- len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
- if (skb_copy_bits(*pskb, off, pbuf, len))
- break;
- pbuf[len] = '\0';
-
- port = simple_strtoul(pbuf, &tmp, 10);
- len = tmp - pbuf;
- if (port == 0 || len > 5)
- break;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->tuple.dst.u.tcp.port = htons(port);
-
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
-
- /* RCU read locked by nf_hook_slow */
- ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook);
- if (ip_nat_amanda)
- ret = ip_nat_amanda(pskb, ctinfo, off - dataoff,
- len, exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- }
-
-out:
- return ret;
-}
-
-static struct ip_conntrack_helper amanda_helper = {
- .max_expected = 3,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "amanda",
-
- .tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } },
- .dst = { .protonum = IPPROTO_UDP },
- },
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
- },
-};
-
-static void __exit ip_conntrack_amanda_fini(void)
-{
- int i;
-
- ip_conntrack_helper_unregister(&amanda_helper);
- for (i = 0; i < ARRAY_SIZE(search); i++)
- textsearch_destroy(search[i].ts);
-}
-
-static int __init ip_conntrack_amanda_init(void)
-{
- int ret, i;
-
- ret = -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(search); i++) {
- search[i].ts = textsearch_prepare(ts_algo, search[i].string,
- search[i].len,
- GFP_KERNEL, TS_AUTOLOAD);
- if (search[i].ts == NULL)
- goto err;
- }
- ret = ip_conntrack_helper_register(&amanda_helper);
- if (ret < 0)
- goto err;
- return 0;
-
-err:
- for (; i >= 0; i--) {
- if (search[i].ts)
- textsearch_destroy(search[i].ts);
- }
- return ret;
-}
-
-module_init(ip_conntrack_amanda_init);
-module_exit(ip_conntrack_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
deleted file mode 100644
index 23b99ae2cc3..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ /dev/null
@@ -1,1550 +0,0 @@
-/* Connection state tracking for netfilter. This is separated from,
- but required by, the NAT layer; it can also be used by an iptables
- extension. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * - add usage/reference counts to ip_conntrack_expect
- * - export ip_conntrack[_expect]_{find_get,put} functions
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <linux/stddef.h>
-#include <linux/sysctl.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/err.h>
-#include <linux/percpu.h>
-#include <linux/moduleparam.h>
-#include <linux/notifier.h>
-
-/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
- registrations, conntrack timers*/
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#define IP_CONNTRACK_VERSION "2.4"
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_conntrack_lock);
-
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-
-void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
-static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size __read_mostly = 0;
-int ip_conntrack_max __read_mostly;
-struct list_head *ip_conntrack_hash __read_mostly;
-static struct kmem_cache *ip_conntrack_cachep __read_mostly;
-static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
-struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid __read_mostly;
-static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc __read_mostly;
-
-static unsigned int ip_conntrack_next_id;
-static unsigned int ip_conntrack_expect_next_id;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
-
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
-
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
-{
- DEBUGP("ecache: delivering events for %p\n", ecache->ct);
- if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
- atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
- ecache->ct);
- ecache->events = 0;
- ip_conntrack_put(ecache->ct);
- ecache->ct = NULL;
-}
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling or freeing the skb */
-void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
-{
- struct ip_conntrack_ecache *ecache;
-
- local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- if (ecache->ct == ct)
- __ip_ct_deliver_cached_events(ecache);
- local_bh_enable();
-}
-
-void __ip_ct_event_cache_init(struct ip_conntrack *ct)
-{
- struct ip_conntrack_ecache *ecache;
-
- /* take care of delivering potentially old events */
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- BUG_ON(ecache->ct == ct);
- if (ecache->ct)
- __ip_ct_deliver_cached_events(ecache);
- /* initialize for this conntrack/packet */
- ecache->ct = ct;
- nf_conntrack_get(&ct->ct_general);
-}
-
-/* flush the event cache - touches other CPU's data and must not be called while
- * packets are still passing through the code */
-static void ip_ct_event_cache_flush(void)
-{
- struct ip_conntrack_ecache *ecache;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- ecache = &per_cpu(ip_conntrack_ecache, cpu);
- if (ecache->ct)
- ip_conntrack_put(ecache->ct);
- }
-}
-#else
-static inline void ip_ct_event_cache_flush(void) {}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int ip_conntrack_hash_rnd_initted;
-static unsigned int ip_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
- unsigned int size, unsigned int rnd)
-{
- return (jhash_3words((__force u32)tuple->src.ip,
- ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
- (tuple->src.u.all | (tuple->dst.u.all << 16)),
- rnd) % size);
-}
-
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
-{
- return __hash_conntrack(tuple, ip_conntrack_htable_size,
- ip_conntrack_hash_rnd);
-}
-
-int
-ip_ct_get_tuple(const struct iphdr *iph,
- const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_protocol *protocol)
-{
- /* Never happen */
- if (iph->frag_off & htons(IP_OFFSET)) {
- printk("ip_conntrack_core: Frag of proto %u.\n",
- iph->protocol);
- return 0;
- }
-
- tuple->src.ip = iph->saddr;
- tuple->dst.ip = iph->daddr;
- tuple->dst.protonum = iph->protocol;
- tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
- return protocol->pkt_to_tuple(skb, dataoff, tuple);
-}
-
-int
-ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig,
- const struct ip_conntrack_protocol *protocol)
-{
- inverse->src.ip = orig->dst.ip;
- inverse->dst.ip = orig->src.ip;
- inverse->dst.protonum = orig->dst.protonum;
- inverse->dst.dir = !orig->dst.dir;
-
- return protocol->invert_tuple(inverse, orig);
-}
-
-
-/* ip_conntrack_expect helper functions */
-void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
-{
- IP_NF_ASSERT(!timer_pending(&exp->timeout));
- list_del(&exp->list);
- CONNTRACK_STAT_INC(expect_delete);
- exp->master->expecting--;
- ip_conntrack_expect_put(exp);
-}
-
-static void expectation_timed_out(unsigned long ul_expect)
-{
- struct ip_conntrack_expect *exp = (void *)ul_expect;
-
- write_lock_bh(&ip_conntrack_lock);
- ip_ct_unlink_expect(exp);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_expect_put(exp);
-}
-
-struct ip_conntrack_expect *
-__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
- return i;
- }
- return NULL;
-}
-
-/* Just find a expectation corresponding to a tuple. */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- read_lock_bh(&ip_conntrack_lock);
- i = __ip_conntrack_expect_find(tuple);
- if (i)
- atomic_inc(&i->use);
- read_unlock_bh(&ip_conntrack_lock);
-
- return i;
-}
-
-/* If an expectation for this connection is found, it gets delete from
- * global list then returned. */
-static struct ip_conntrack_expect *
-find_expectation(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
- master ct never got confirmed, we'd hold a reference to it
- and weird things would happen to future packets). */
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
- && is_confirmed(i->master)) {
- if (i->flags & IP_CT_EXPECT_PERMANENT) {
- atomic_inc(&i->use);
- return i;
- } else if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- return i;
- }
- }
- }
- return NULL;
-}
-
-/* delete all expectations for this conntrack */
-void ip_ct_remove_expectations(struct ip_conntrack *ct)
-{
- struct ip_conntrack_expect *i, *tmp;
-
- /* Optimization: most connection never expect any others. */
- if (ct->expecting == 0)
- return;
-
- list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
- if (i->master == ct && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
- }
- }
-}
-
-static void
-clean_from_lists(struct ip_conntrack *ct)
-{
- DEBUGP("clean_from_lists(%p)\n", ct);
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
-
- /* Destroy all pending expectations */
- ip_ct_remove_expectations(ct);
-}
-
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
-{
- struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
- struct ip_conntrack_protocol *proto;
- struct ip_conntrack_helper *helper;
- typeof(ip_conntrack_destroyed) destroyed;
-
- DEBUGP("destroy_conntrack(%p)\n", ct);
- IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
- IP_NF_ASSERT(!timer_pending(&ct->timeout));
-
- ip_conntrack_event(IPCT_DESTROY, ct);
- set_bit(IPS_DYING_BIT, &ct->status);
-
- helper = ct->helper;
- if (helper && helper->destroy)
- helper->destroy(ct);
-
- /* To make sure we don't get any weird locking issues here:
- * destroy_conntrack() MUST NOT be called with a write lock
- * to ip_conntrack_lock!!! -HW */
- rcu_read_lock();
- proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
- if (proto && proto->destroy)
- proto->destroy(ct);
-
- destroyed = rcu_dereference(ip_conntrack_destroyed);
- if (destroyed)
- destroyed(ct);
-
- rcu_read_unlock();
-
- write_lock_bh(&ip_conntrack_lock);
- /* Expectations will have been removed in clean_from_lists,
- * except TFTP can create an expectation on the first packet,
- * before connection is in the list, so we need to clean here,
- * too. */
- ip_ct_remove_expectations(ct);
-
- /* We overload first tuple to link into unconfirmed list. */
- if (!is_confirmed(ct)) {
- BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- }
-
- CONNTRACK_STAT_INC(delete);
- write_unlock_bh(&ip_conntrack_lock);
-
- if (ct->master)
- ip_conntrack_put(ct->master);
-
- DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
- ip_conntrack_free(ct);
-}
-
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- struct ip_conntrack *ct = (void *)ul_conntrack;
-
- write_lock_bh(&ip_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
- CONNTRACK_STAT_INC(delete_list);
- clean_from_lists(ct);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_put(ct);
-}
-
-struct ip_conntrack_tuple_hash *
-__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
- unsigned int hash = hash_conntrack(tuple);
-
- list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
- if (tuplehash_to_ctrack(h) != ignored_conntrack &&
- ip_ct_tuple_equal(tuple, &h->tuple)) {
- CONNTRACK_STAT_INC(found);
- return h;
- }
- CONNTRACK_STAT_INC(searched);
- }
-
- return NULL;
-}
-
-/* Find a connection corresponding to a tuple. */
-struct ip_conntrack_tuple_hash *
-ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
-
- read_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_find(tuple, ignored_conntrack);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
- read_unlock_bh(&ip_conntrack_lock);
-
- return h;
-}
-
-static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
- unsigned int hash,
- unsigned int repl_hash)
-{
- ct->id = ++ip_conntrack_next_id;
- list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
- &ip_conntrack_hash[hash]);
- list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
- &ip_conntrack_hash[repl_hash]);
-}
-
-void ip_conntrack_hash_insert(struct ip_conntrack *ct)
-{
- unsigned int hash, repl_hash;
-
- hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- write_lock_bh(&ip_conntrack_lock);
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* Confirm a connection given skb; places it in hash table */
-int
-__ip_conntrack_confirm(struct sk_buff **pskb)
-{
- unsigned int hash, repl_hash;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* ipt_REJECT uses ip_conntrack_attach to attach related
- ICMP/TCP RST packets in other direction. Actual packet
- which created connection will be IP_CT_NEW or for an
- expected connection, IP_CT_RELATED. */
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- /* We're not in hash table, and we refuse to set up related
- connections for unconfirmed conns. But packet copies and
- REJECT will give spurious warnings here. */
- /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
-
- /* No external references means noone else could have
- confirmed us. */
- IP_NF_ASSERT(!is_confirmed(ct));
- DEBUGP("Confirming conntrack %p\n", ct);
-
- write_lock_bh(&ip_conntrack_lock);
-
- /* See if there's one in the list already, including reverse:
- NAT could have grabbed it without realizing, since we're
- not in the hash. If there is, we lost race. */
- list_for_each_entry(h, &ip_conntrack_hash[hash], list)
- if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &h->tuple))
- goto out;
- list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
- if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- &h->tuple))
- goto out;
-
- /* Remove from unconfirmed list */
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- /* Timer relative to confirmation time, not original
- setting time, otherwise we'd get timer wrap in
- weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
- atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
- CONNTRACK_STAT_INC(insert);
- write_unlock_bh(&ip_conntrack_lock);
- if (ct->helper)
- ip_conntrack_event_cache(IPCT_HELPER, *pskb);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
-#endif
- ip_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
-
- return NF_ACCEPT;
-
-out:
- CONNTRACK_STAT_INC(insert_failed);
- write_unlock_bh(&ip_conntrack_lock);
- return NF_DROP;
-}
-
-/* Returns true if a connection correspondings to the tuple (required
- for NAT). */
-int
-ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
-
- read_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_find(tuple, ignored_conntrack);
- read_unlock_bh(&ip_conntrack_lock);
-
- return h != NULL;
-}
-
-/* There's a small race here where we may free a just-assured
- connection. Too bad: we're in trouble anyway. */
-static int early_drop(struct list_head *chain)
-{
- /* Traverse backwards: gives us oldest, which is roughly LRU */
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct = NULL, *tmp;
- int dropped = 0;
-
- read_lock_bh(&ip_conntrack_lock);
- list_for_each_entry_reverse(h, chain, list) {
- tmp = tuplehash_to_ctrack(h);
- if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
- ct = tmp;
- atomic_inc(&ct->ct_general.use);
- break;
- }
- }
- read_unlock_bh(&ip_conntrack_lock);
-
- if (!ct)
- return dropped;
-
- if (del_timer(&ct->timeout)) {
- death_by_timeout((unsigned long)ct);
- dropped = 1;
- CONNTRACK_STAT_INC_ATOMIC(early_drop);
- }
- ip_conntrack_put(ct);
- return dropped;
-}
-
-static struct ip_conntrack_helper *
-__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
- return h;
- }
- return NULL;
-}
-
-struct ip_conntrack_helper *
-ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_helper *helper;
-
- /* need ip_conntrack_lock to assure that helper exists until
- * try_module_get() is called */
- read_lock_bh(&ip_conntrack_lock);
-
- helper = __ip_conntrack_helper_find(tuple);
- if (helper) {
- /* need to increase module usage count to assure helper will
- * not go away while the caller is e.g. busy putting a
- * conntrack in the hash that uses the helper */
- if (!try_module_get(helper->me))
- helper = NULL;
- }
-
- read_unlock_bh(&ip_conntrack_lock);
-
- return helper;
-}
-
-void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
-{
- module_put(helper->me);
-}
-
-struct ip_conntrack_protocol *
-__ip_conntrack_proto_find(u_int8_t protocol)
-{
- return ip_ct_protos[protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct ip_conntrack_protocol *
-ip_conntrack_proto_find_get(u_int8_t protocol)
-{
- struct ip_conntrack_protocol *p;
-
- rcu_read_lock();
- p = __ip_conntrack_proto_find(protocol);
- if (p) {
- if (!try_module_get(p->me))
- p = &ip_conntrack_generic_protocol;
- }
- rcu_read_unlock();
-
- return p;
-}
-
-void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
-{
- module_put(p->me);
-}
-
-struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
- struct ip_conntrack_tuple *repl)
-{
- struct ip_conntrack *conntrack;
-
- if (!ip_conntrack_hash_rnd_initted) {
- get_random_bytes(&ip_conntrack_hash_rnd, 4);
- ip_conntrack_hash_rnd_initted = 1;
- }
-
- /* We don't want any race condition at early drop stage */
- atomic_inc(&ip_conntrack_count);
-
- if (ip_conntrack_max
- && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
- unsigned int hash = hash_conntrack(orig);
- /* Try dropping from this hash chain. */
- if (!early_drop(&ip_conntrack_hash[hash])) {
- atomic_dec(&ip_conntrack_count);
- if (net_ratelimit())
- printk(KERN_WARNING
- "ip_conntrack: table full, dropping"
- " packet.\n");
- return ERR_PTR(-ENOMEM);
- }
- }
-
- conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
- if (!conntrack) {
- DEBUGP("Can't allocate conntrack.\n");
- atomic_dec(&ip_conntrack_count);
- return ERR_PTR(-ENOMEM);
- }
-
- atomic_set(&conntrack->ct_general.use, 1);
- conntrack->ct_general.destroy = destroy_conntrack;
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
- conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- /* Don't set timer yet: wait for confirmation */
- init_timer(&conntrack->timeout);
- conntrack->timeout.data = (unsigned long)conntrack;
- conntrack->timeout.function = death_by_timeout;
-
- return conntrack;
-}
-
-void
-ip_conntrack_free(struct ip_conntrack *conntrack)
-{
- atomic_dec(&ip_conntrack_count);
- kmem_cache_free(ip_conntrack_cachep, conntrack);
-}
-
-/* Allocate a new conntrack: we return -ENOMEM if classification
- * failed due to stress. Otherwise it really is unclassifiable */
-static struct ip_conntrack_tuple_hash *
-init_conntrack(struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *protocol,
- struct sk_buff *skb)
-{
- struct ip_conntrack *conntrack;
- struct ip_conntrack_tuple repl_tuple;
- struct ip_conntrack_expect *exp;
-
- if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
- DEBUGP("Can't invert tuple.\n");
- return NULL;
- }
-
- conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
- if (conntrack == NULL || IS_ERR(conntrack))
- return (struct ip_conntrack_tuple_hash *)conntrack;
-
- if (!protocol->new(conntrack, skb)) {
- ip_conntrack_free(conntrack);
- return NULL;
- }
-
- write_lock_bh(&ip_conntrack_lock);
- exp = find_expectation(tuple);
-
- if (exp) {
- DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
- conntrack, exp);
- /* Welcome, Mr. Bond. We've been expecting you... */
- __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
- conntrack->master = exp->master;
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
- conntrack->mark = exp->master->mark;
-#endif
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
- /* this is ugly, but there is no other place where to put it */
- conntrack->nat.masq_index = exp->master->nat.masq_index;
-#endif
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
- conntrack->secmark = exp->master->secmark;
-#endif
- nf_conntrack_get(&conntrack->master->ct_general);
- CONNTRACK_STAT_INC(expect_new);
- } else {
- conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
-
- CONNTRACK_STAT_INC(new);
- }
-
- /* Overload tuple linked list to put us in unconfirmed list. */
- list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-
- write_unlock_bh(&ip_conntrack_lock);
-
- if (exp) {
- if (exp->expectfn)
- exp->expectfn(conntrack, exp);
- ip_conntrack_expect_put(exp);
- }
-
- return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
-}
-
-/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
-static inline struct ip_conntrack *
-resolve_normal_ct(struct sk_buff *skb,
- struct ip_conntrack_protocol *proto,
- int *set_reply,
- unsigned int hooknum,
- enum ip_conntrack_info *ctinfo)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
-
- IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
-
- if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
- &tuple,proto))
- return NULL;
-
- /* look for tuple match */
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h) {
- h = init_conntrack(&tuple, proto, skb);
- if (!h)
- return NULL;
- if (IS_ERR(h))
- return (void *)h;
- }
- ct = tuplehash_to_ctrack(h);
-
- /* It exists; we have (non-exclusive) reference. */
- if (DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
- } else {
- /* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- DEBUGP("ip_conntrack_in: normal packet for %p\n",
- ct);
- *ctinfo = IP_CT_ESTABLISHED;
- } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
- DEBUGP("ip_conntrack_in: related packet for %p\n",
- ct);
- *ctinfo = IP_CT_RELATED;
- } else {
- DEBUGP("ip_conntrack_in: new packet for %p\n",
- ct);
- *ctinfo = IP_CT_NEW;
- }
- *set_reply = 0;
- }
- skb->nfct = &ct->ct_general;
- skb->nfctinfo = *ctinfo;
- return ct;
-}
-
-/* Netfilter hook itself. */
-unsigned int ip_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack_protocol *proto;
- int set_reply = 0;
- int ret;
-
- /* Previously seen (loopback or untracked)? Ignore. */
- if ((*pskb)->nfct) {
- CONNTRACK_STAT_INC_ATOMIC(ignore);
- return NF_ACCEPT;
- }
-
- /* Never happen */
- if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
- if (net_ratelimit()) {
- printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
- (*pskb)->nh.iph->protocol, hooknum);
- }
- return NF_DROP;
- }
-
-/* Doesn't cover locally-generated broadcast, so not worth it. */
-#if 0
- /* Ignore broadcast: no `connection'. */
- if ((*pskb)->pkt_type == PACKET_BROADCAST) {
- printk("Broadcast packet!\n");
- return NF_ACCEPT;
- } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
- == htonl(0x000000FF)) {
- printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr),
- (*pskb)->sk, (*pskb)->pkt_type);
- }
-#endif
-
- /* rcu_read_lock()ed by nf_hook_slow */
- proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
-
- /* It may be an special packet, error, unclean...
- * inverse of the return code tells to the netfilter
- * core what to do with the packet. */
- if (proto->error != NULL
- && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
- CONNTRACK_STAT_INC_ATOMIC(error);
- CONNTRACK_STAT_INC_ATOMIC(invalid);
- return -ret;
- }
-
- if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
- /* Not valid part of a connection */
- CONNTRACK_STAT_INC_ATOMIC(invalid);
- return NF_ACCEPT;
- }
-
- if (IS_ERR(ct)) {
- /* Too stressed to deal. */
- CONNTRACK_STAT_INC_ATOMIC(drop);
- return NF_DROP;
- }
-
- IP_NF_ASSERT((*pskb)->nfct);
-
- ret = proto->packet(ct, *pskb, ctinfo);
- if (ret < 0) {
- /* Invalid: inverse of the return code tells
- * the netfilter core what to do*/
- nf_conntrack_put((*pskb)->nfct);
- (*pskb)->nfct = NULL;
- CONNTRACK_STAT_INC_ATOMIC(invalid);
- return -ret;
- }
-
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_STATUS, *pskb);
-
- return ret;
-}
-
-int invert_tuplepr(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig)
-{
- struct ip_conntrack_protocol *proto;
- int ret;
-
- rcu_read_lock();
- proto = __ip_conntrack_proto_find(orig->dst.protonum);
- ret = ip_ct_invert_tuple(inverse, orig, proto);
- rcu_read_unlock();
-
- return ret;
-}
-
-/* Would two expected things clash? */
-static inline int expect_clash(const struct ip_conntrack_expect *a,
- const struct ip_conntrack_expect *b)
-{
- /* Part covered by intersection of masks must be unequal,
- otherwise they clash */
- struct ip_conntrack_tuple intersect_mask
- = { { a->mask.src.ip & b->mask.src.ip,
- { a->mask.src.u.all & b->mask.src.u.all } },
- { a->mask.dst.ip & b->mask.dst.ip,
- { a->mask.dst.u.all & b->mask.dst.u.all },
- a->mask.dst.protonum & b->mask.dst.protonum } };
-
- return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
-}
-
-static inline int expect_matches(const struct ip_conntrack_expect *a,
- const struct ip_conntrack_expect *b)
-{
- return a->master == b->master
- && ip_ct_tuple_equal(&a->tuple, &b->tuple)
- && ip_ct_tuple_equal(&a->mask, &b->mask);
-}
-
-/* Generally a bad idea to call this: could have matched already. */
-void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack_expect *i;
-
- write_lock_bh(&ip_conntrack_lock);
- /* choose the the oldest expectation to evict */
- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
- if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_expect_put(i);
- return;
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* We don't increase the master conntrack refcount for non-fulfilled
- * conntracks. During the conntrack destruction, the expectations are
- * always killed before the conntrack itself */
-struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
-{
- struct ip_conntrack_expect *new;
-
- new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
- if (!new) {
- DEBUGP("expect_related: OOM allocating expect\n");
- return NULL;
- }
- new->master = me;
- atomic_set(&new->use, 1);
- return new;
-}
-
-void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
-{
- if (atomic_dec_and_test(&exp->use))
- kmem_cache_free(ip_conntrack_expect_cachep, exp);
-}
-
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
-{
- atomic_inc(&exp->use);
- exp->master->expecting++;
- list_add(&exp->list, &ip_conntrack_expect_list);
-
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
- exp->timeout.function = expectation_timed_out;
- exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
- add_timer(&exp->timeout);
-
- exp->id = ++ip_conntrack_expect_next_id;
- atomic_inc(&exp->use);
- CONNTRACK_STAT_INC(expect_create);
-}
-
-/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct ip_conntrack *master)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
- if (i->master == master) {
- if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
- }
- break;
- }
- }
-}
-
-static inline int refresh_timer(struct ip_conntrack_expect *i)
-{
- if (!del_timer(&i->timeout))
- return 0;
-
- i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
- add_timer(&i->timeout);
- return 1;
-}
-
-int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
-{
- struct ip_conntrack_expect *i;
- int ret;
-
- DEBUGP("ip_conntrack_expect_related %p\n", related_to);
- DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
- DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
-
- write_lock_bh(&ip_conntrack_lock);
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
- ret = 0;
- goto out;
- }
- } else if (expect_clash(i, expect)) {
- ret = -EBUSY;
- goto out;
- }
- }
-
- /* Will be over limit? */
- if (expect->master->helper->max_expected &&
- expect->master->expecting >= expect->master->helper->max_expected)
- evict_oldest_expect(expect->master);
-
- ip_conntrack_expect_insert(expect);
- ip_conntrack_expect_event(IPEXP_NEW, expect);
- ret = 0;
-out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-/* Alter reply tuple (maybe alter helper). This is for NAT, and is
- implicitly racy: see __ip_conntrack_confirm */
-void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
- const struct ip_conntrack_tuple *newreply)
-{
- write_lock_bh(&ip_conntrack_lock);
- /* Should be unconfirmed, so not in hash table yet */
- IP_NF_ASSERT(!is_confirmed(conntrack));
-
- DEBUGP("Altering reply tuple of %p to ", conntrack);
- DUMP_TUPLE(newreply);
-
- conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- if (!conntrack->master && conntrack->expecting == 0)
- conntrack->helper = __ip_conntrack_helper_find(newreply);
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
-{
- BUG_ON(me->timeout == 0);
- write_lock_bh(&ip_conntrack_lock);
- list_add(&me->list, &helpers);
- write_unlock_bh(&ip_conntrack_lock);
-
- return 0;
-}
-
-struct ip_conntrack_helper *
-__ip_conntrack_helper_find_byname(const char *name)
-{
- struct ip_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (!strcmp(h->name, name))
- return h;
- }
-
- return NULL;
-}
-
-static inline void unhelp(struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_helper *me)
-{
- if (tuplehash_to_ctrack(i)->helper == me) {
- ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
- tuplehash_to_ctrack(i)->helper = NULL;
- }
-}
-
-void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
-{
- unsigned int i;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_expect *exp, *tmp;
-
- /* Need write lock here, to delete helper. */
- write_lock_bh(&ip_conntrack_lock);
- list_del(&me->list);
-
- /* Get rid of expectations */
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
- if (exp->master->helper == me && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- /* Get rid of expecteds, set helpers to NULL. */
- list_for_each_entry(h, &unconfirmed, list)
- unhelp(h, me);
- for (i = 0; i < ip_conntrack_htable_size; i++) {
- list_for_each_entry(h, &ip_conntrack_hash[i], list)
- unhelp(h, me);
- }
- write_unlock_bh(&ip_conntrack_lock);
-
- /* Someone could be still looking at the helper in a bh. */
- synchronize_net();
-}
-
-/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
-void __ip_ct_refresh_acct(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- unsigned long extra_jiffies,
- int do_acct)
-{
- int event = 0;
-
- IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
- IP_NF_ASSERT(skb);
-
- write_lock_bh(&ip_conntrack_lock);
-
- /* Only update if this is not a fixed timeout */
- if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
- write_unlock_bh(&ip_conntrack_lock);
- return;
- }
-
- /* If not in hash table, timer will not be active yet */
- if (!is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- event = IPCT_REFRESH;
- } else {
- /* Need del_timer for race avoidance (may already be dying). */
- if (del_timer(&ct->timeout)) {
- ct->timeout.expires = jiffies + extra_jiffies;
- add_timer(&ct->timeout);
- event = IPCT_REFRESH;
- }
- }
-
-#ifdef CONFIG_IP_NF_CT_ACCT
- if (do_acct) {
- ct->counters[CTINFO2DIR(ctinfo)].packets++;
- ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- ntohs(skb->nh.iph->tot_len);
- if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
- || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
- event |= IPCT_COUNTER_FILLING;
- }
-#endif
-
- write_unlock_bh(&ip_conntrack_lock);
-
- /* must be unlocked when calling event cache */
- if (event)
- ip_conntrack_event_cache(event, skb);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
-int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
- &tuple->src.u.tcp.port);
- NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
- &tuple->dst.u.tcp.port);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
- struct ip_conntrack_tuple *t)
-{
- if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
- return -EINVAL;
-
- t->src.u.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
- t->dst.u.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
-
- return 0;
-}
-#endif
-
-/* Returns new sk_buff, or NULL */
-struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
- skb_orphan(skb);
-
- local_bh_disable();
- skb = ip_defrag(skb, user);
- local_bh_enable();
-
- if (skb)
- ip_send_check(skb->nh.iph);
- return skb;
-}
-
-/* Used by ipt_REJECT. */
-static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This ICMP is in reverse direction to the packet which caused it */
- ct = ip_conntrack_get(skb, &ctinfo);
-
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
- ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
- else
- ctinfo = IP_CT_RELATED;
-
- /* Attach to new skbuff, and increment count */
- nskb->nfct = &ct->ct_general;
- nskb->nfctinfo = ctinfo;
- nf_conntrack_get(nskb->nfct);
-}
-
-/* Bring out ya dead! */
-static struct ip_conntrack *
-get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
- void *data, unsigned int *bucket)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
-
- write_lock_bh(&ip_conntrack_lock);
- for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
- list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
- ct = tuplehash_to_ctrack(h);
- if (iter(ct, data))
- goto found;
- }
- }
- list_for_each_entry(h, &unconfirmed, list) {
- ct = tuplehash_to_ctrack(h);
- if (iter(ct, data))
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- write_unlock_bh(&ip_conntrack_lock);
- return NULL;
-
-found:
- atomic_inc(&ct->ct_general.use);
- write_unlock_bh(&ip_conntrack_lock);
- return ct;
-}
-
-void
-ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
-{
- struct ip_conntrack *ct;
- unsigned int bucket = 0;
-
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
- if (del_timer(&ct->timeout))
- death_by_timeout((unsigned long)ct);
- /* ... else the timer will get him soon. */
-
- ip_conntrack_put(ct);
- }
-}
-
-/* Fast function for those who don't want to parse /proc (and I don't
- blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
- mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
-
- IP_CT_TUPLE_U_BLANK(&tuple);
- tuple.src.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
- tuple.dst.ip = inet->daddr;
- tuple.dst.u.tcp.port = inet->dport;
- tuple.dst.protonum = IPPROTO_TCP;
-
- /* We only do TCP at the moment: is there a better way? */
- if (strcmp(sk->sk_prot->name, "TCP")) {
- DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
- return -ENOPROTOOPT;
- }
-
- if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
- DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
- *len, sizeof(struct sockaddr_in));
- return -EINVAL;
- }
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (h) {
- struct sockaddr_in sin;
- struct ip_conntrack *ct = tuplehash_to_ctrack(h);
-
- sin.sin_family = AF_INET;
- sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.u.tcp.port;
- sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.ip;
- memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
-
- DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
- ip_conntrack_put(ct);
- if (copy_to_user(user, &sin, sizeof(sin)) != 0)
- return -EFAULT;
- else
- return 0;
- }
- DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
- NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
- NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
- return -ENOENT;
-}
-
-static struct nf_sockopt_ops so_getorigdst = {
- .pf = PF_INET,
- .get_optmin = SO_ORIGINAL_DST,
- .get_optmax = SO_ORIGINAL_DST+1,
- .get = &getorigdst,
-};
-
-static int kill_all(struct ip_conntrack *i, void *data)
-{
- return 1;
-}
-
-void ip_conntrack_flush(void)
-{
- ip_ct_iterate_cleanup(kill_all, NULL);
-}
-
-static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
-{
- if (vmalloced)
- vfree(hash);
- else
- free_pages((unsigned long)hash,
- get_order(sizeof(struct list_head) * size));
-}
-
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
- rcu_assign_pointer(ip_ct_attach, NULL);
-
- /* This makes sure all current packets have passed through
- netfilter framework. Roll on, two-stage module
- delete... */
- synchronize_net();
-
- ip_ct_event_cache_flush();
- i_see_dead_people:
- ip_conntrack_flush();
- if (atomic_read(&ip_conntrack_count) != 0) {
- schedule();
- goto i_see_dead_people;
- }
- /* wait until all references to ip_conntrack_untracked are dropped */
- while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
- schedule();
-
- kmem_cache_destroy(ip_conntrack_cachep);
- kmem_cache_destroy(ip_conntrack_expect_cachep);
- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
- nf_unregister_sockopt(&so_getorigdst);
-}
-
-static struct list_head *alloc_hashtable(int size, int *vmalloced)
-{
- struct list_head *hash;
- unsigned int i;
-
- *vmalloced = 0;
- hash = (void*)__get_free_pages(GFP_KERNEL,
- get_order(sizeof(struct list_head)
- * size));
- if (!hash) {
- *vmalloced = 1;
- printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
- hash = vmalloc(sizeof(struct list_head) * size);
- }
-
- if (hash)
- for (i = 0; i < size; i++)
- INIT_LIST_HEAD(&hash[i]);
-
- return hash;
-}
-
-static int set_hashsize(const char *val, struct kernel_param *kp)
-{
- int i, bucket, hashsize, vmalloced;
- int old_vmalloced, old_size;
- int rnd;
- struct list_head *hash, *old_hash;
- struct ip_conntrack_tuple_hash *h;
-
- /* On boot, we can set this without any fancy locking. */
- if (!ip_conntrack_htable_size)
- return param_set_int(val, kp);
-
- hashsize = simple_strtol(val, NULL, 0);
- if (!hashsize)
- return -EINVAL;
-
- hash = alloc_hashtable(hashsize, &vmalloced);
- if (!hash)
- return -ENOMEM;
-
- /* We have to rehash for the new table anyway, so we also can
- * use a new random seed */
- get_random_bytes(&rnd, 4);
-
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++) {
- while (!list_empty(&ip_conntrack_hash[i])) {
- h = list_entry(ip_conntrack_hash[i].next,
- struct ip_conntrack_tuple_hash, list);
- list_del(&h->list);
- bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
- list_add_tail(&h->list, &hash[bucket]);
- }
- }
- old_size = ip_conntrack_htable_size;
- old_vmalloced = ip_conntrack_vmalloc;
- old_hash = ip_conntrack_hash;
-
- ip_conntrack_htable_size = hashsize;
- ip_conntrack_vmalloc = vmalloced;
- ip_conntrack_hash = hash;
- ip_conntrack_hash_rnd = rnd;
- write_unlock_bh(&ip_conntrack_lock);
-
- free_conntrack_hash(old_hash, old_vmalloced, old_size);
- return 0;
-}
-
-module_param_call(hashsize, set_hashsize, param_get_uint,
- &ip_conntrack_htable_size, 0600);
-
-int __init ip_conntrack_init(void)
-{
- unsigned int i;
- int ret;
-
- /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
- * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
- if (!ip_conntrack_htable_size) {
- ip_conntrack_htable_size
- = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct list_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- ip_conntrack_htable_size = 8192;
- if (ip_conntrack_htable_size < 16)
- ip_conntrack_htable_size = 16;
- }
- ip_conntrack_max = 8 * ip_conntrack_htable_size;
-
- printk("ip_conntrack version %s (%u buckets, %d max)"
- " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
- ip_conntrack_htable_size, ip_conntrack_max,
- sizeof(struct ip_conntrack));
-
- ret = nf_register_sockopt(&so_getorigdst);
- if (ret != 0) {
- printk(KERN_ERR "Unable to register netfilter socket option\n");
- return ret;
- }
-
- ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
- &ip_conntrack_vmalloc);
- if (!ip_conntrack_hash) {
- printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
- goto err_unreg_sockopt;
- }
-
- ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
- sizeof(struct ip_conntrack), 0,
- 0, NULL, NULL);
- if (!ip_conntrack_cachep) {
- printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
- goto err_free_hash;
- }
-
- ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
- sizeof(struct ip_conntrack_expect),
- 0, 0, NULL, NULL);
- if (!ip_conntrack_expect_cachep) {
- printk(KERN_ERR "Unable to create ip_expect slab cache\n");
- goto err_free_conntrack_slab;
- }
-
- /* Don't NEED lock here, but good form anyway. */
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < MAX_IP_CT_PROTO; i++)
- rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
- /* Sew in builtin protocols. */
- rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
- rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
- rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
- write_unlock_bh(&ip_conntrack_lock);
-
- /* For use by ipt_REJECT */
- rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
-
- /* Set up fake conntrack:
- - to never be deleted, not in any hashes */
- atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
- /* - and look it like as a confirmed connection */
- set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-
- return ret;
-
-err_free_conntrack_slab:
- kmem_cache_destroy(ip_conntrack_cachep);
-err_free_hash:
- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
-err_unreg_sockopt:
- nf_unregister_sockopt(&so_getorigdst);
-
- return -ENOMEM;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
deleted file mode 100644
index 1faa68ab943..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* FTP extension for IP connection tracking. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/ctype.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp connection tracking helper");
-
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-static DEFINE_SPINLOCK(ip_ftp_lock);
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-
-static int loose;
-module_param(loose, bool, 0600);
-
-unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq);
-EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int try_rfc959(const char *, size_t, u_int32_t [], char);
-static int try_eprt(const char *, size_t, u_int32_t [], char);
-static int try_epsv_response(const char *, size_t, u_int32_t [], char);
-
-static const struct ftp_search {
- const char *pattern;
- size_t plen;
- char skip;
- char term;
- enum ip_ct_ftp_type ftptype;
- int (*getnum)(const char *, size_t, u_int32_t[], char);
-} search[IP_CT_DIR_MAX][2] = {
- [IP_CT_DIR_ORIGINAL] = {
- {
- .pattern = "PORT",
- .plen = sizeof("PORT") - 1,
- .skip = ' ',
- .term = '\r',
- .ftptype = IP_CT_FTP_PORT,
- .getnum = try_rfc959,
- },
- {
- .pattern = "EPRT",
- .plen = sizeof("EPRT") - 1,
- .skip = ' ',
- .term = '\r',
- .ftptype = IP_CT_FTP_EPRT,
- .getnum = try_eprt,
- },
- },
- [IP_CT_DIR_REPLY] = {
- {
- .pattern = "227 ",
- .plen = sizeof("227 ") - 1,
- .skip = '(',
- .term = ')',
- .ftptype = IP_CT_FTP_PASV,
- .getnum = try_rfc959,
- },
- {
- .pattern = "229 ",
- .plen = sizeof("229 ") - 1,
- .skip = '(',
- .term = ')',
- .ftptype = IP_CT_FTP_EPSV,
- .getnum = try_epsv_response,
- },
- },
-};
-
-static int try_number(const char *data, size_t dlen, u_int32_t array[],
- int array_size, char sep, char term)
-{
- u_int32_t i, len;
-
- memset(array, 0, sizeof(array[0])*array_size);
-
- /* Keep data pointing at next char. */
- for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
- if (*data >= '0' && *data <= '9') {
- array[i] = array[i]*10 + *data - '0';
- }
- else if (*data == sep)
- i++;
- else {
- /* Unexpected character; true if it's the
- terminator and we're finished. */
- if (*data == term && i == array_size - 1)
- return len;
-
- DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
- len, i, *data);
- return 0;
- }
- }
- DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
-
- return 0;
-}
-
-/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
-static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- return try_number(data, dlen, array, 6, ',', term);
-}
-
-/* Grab port: number up to delimiter */
-static int get_port(const char *data, int start, size_t dlen, char delim,
- u_int32_t array[2])
-{
- u_int16_t port = 0;
- int i;
-
- for (i = start; i < dlen; i++) {
- /* Finished? */
- if (data[i] == delim) {
- if (port == 0)
- break;
- array[0] = port >> 8;
- array[1] = port;
- return i + 1;
- }
- else if (data[i] >= '0' && data[i] <= '9')
- port = port*10 + data[i] - '0';
- else /* Some other crap */
- break;
- }
- return 0;
-}
-
-/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
-static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- char delim;
- int length;
-
- /* First character is delimiter, then "1" for IPv4, then
- delimiter again. */
- if (dlen <= 3) return 0;
- delim = data[0];
- if (isdigit(delim) || delim < 33 || delim > 126
- || data[1] != '1' || data[2] != delim)
- return 0;
-
- DEBUGP("EPRT: Got |1|!\n");
- /* Now we have IP address. */
- length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
- if (length == 0)
- return 0;
-
- DEBUGP("EPRT: Got IP address!\n");
- /* Start offset includes initial "|1|", and trailing delimiter */
- return get_port(data, 3 + length + 1, dlen, delim, array+4);
-}
-
-/* Returns 0, or length of numbers: |||6446| */
-static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- char delim;
-
- /* Three delimiters. */
- if (dlen <= 3) return 0;
- delim = data[0];
- if (isdigit(delim) || delim < 33 || delim > 126
- || data[1] != delim || data[2] != delim)
- return 0;
-
- return get_port(data, 3, dlen, delim, array+4);
-}
-
-/* Return 1 for match, 0 for accept, -1 for partial. */
-static int find_pattern(const char *data, size_t dlen,
- const char *pattern, size_t plen,
- char skip, char term,
- unsigned int *numoff,
- unsigned int *numlen,
- u_int32_t array[6],
- int (*getnum)(const char *, size_t, u_int32_t[], char))
-{
- size_t i;
-
- DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
- if (dlen == 0)
- return 0;
-
- if (dlen <= plen) {
- /* Short packet: try for partial? */
- if (strnicmp(data, pattern, dlen) == 0)
- return -1;
- else return 0;
- }
-
- if (strnicmp(data, pattern, plen) != 0) {
-#if 0
- size_t i;
-
- DEBUGP("ftp: string mismatch\n");
- for (i = 0; i < plen; i++) {
- DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
- i, data[i], data[i],
- pattern[i], pattern[i]);
- }
-#endif
- return 0;
- }
-
- DEBUGP("Pattern matches!\n");
- /* Now we've found the constant string, try to skip
- to the 'skip' character */
- for (i = plen; data[i] != skip; i++)
- if (i == dlen - 1) return -1;
-
- /* Skip over the last character */
- i++;
-
- DEBUGP("Skipped up to `%c'!\n", skip);
-
- *numoff = i;
- *numlen = getnum(data + i, dlen - i, array, term);
- if (!*numlen)
- return -1;
-
- DEBUGP("Match succeeded!\n");
- return 1;
-}
-
-/* Look up to see if we're just after a \n. */
-static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
-{
- unsigned int i;
-
- for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
- if (info->seq_aft_nl[dir][i] == seq)
- return 1;
- return 0;
-}
-
-/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
- struct sk_buff *skb)
-{
- unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
-
- /* Look for oldest: if we find exact match, we're done. */
- for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
- if (info->seq_aft_nl[dir][i] == nl_seq)
- return;
-
- if (oldest == info->seq_aft_nl_num[dir]
- || before(info->seq_aft_nl[dir][i], oldest))
- oldest = i;
- }
-
- if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
- info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
- } else if (oldest != NUM_SEQ_TO_REMEMBER) {
- info->seq_aft_nl[dir][oldest] = nl_seq;
- ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
- }
-}
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff, datalen;
- struct tcphdr _tcph, *th;
- char *fb_ptr;
- int ret;
- u32 seq, array[6] = { 0 };
- int dir = CTINFO2DIR(ctinfo);
- unsigned int matchlen, matchoff;
- struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
- struct ip_conntrack_expect *exp;
- unsigned int i;
- int found = 0, ends_in_nl;
- typeof(ip_nat_ftp_hook) ip_nat_ftp;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
- return NF_ACCEPT;
- }
-
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return NF_ACCEPT;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
- /* No data? */
- if (dataoff >= (*pskb)->len) {
- DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
- datalen = (*pskb)->len - dataoff;
-
- spin_lock_bh(&ip_ftp_lock);
- fb_ptr = skb_header_pointer(*pskb, dataoff,
- (*pskb)->len - dataoff, ftp_buffer);
- BUG_ON(fb_ptr == NULL);
-
- ends_in_nl = (fb_ptr[datalen - 1] == '\n');
- seq = ntohl(th->seq) + datalen;
-
- /* Look up to see if we're just after a \n. */
- if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
- /* Now if this ends in \n, update ftp info. */
- DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
- ct_ftp_info->seq_aft_nl[0][dir]
- old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
- ret = NF_ACCEPT;
- goto out_update_nl;
- }
-
- /* Initialize IP array to expected address (it's not mentioned
- in EPSV responses) */
- array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
- array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
- array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
- array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
-
- for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
- found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
- search[dir][i].pattern,
- search[dir][i].plen,
- search[dir][i].skip,
- search[dir][i].term,
- &matchoff, &matchlen,
- array,
- search[dir][i].getnum);
- if (found) break;
- }
- if (found == -1) {
- /* We don't usually drop packets. After all, this is
- connection tracking, not packet filtering.
- However, it is necessary for accurate tracking in
- this case. */
- if (net_ratelimit())
- printk("conntrack_ftp: partial %s %u+%u\n",
- search[dir][i].pattern,
- ntohl(th->seq), datalen);
- ret = NF_DROP;
- goto out;
- } else if (found == 0) { /* No match */
- ret = NF_ACCEPT;
- goto out_update_nl;
- }
-
- DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
- fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
-
- /* Allocate expectation which will be inserted */
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- /* We refer to the reverse direction ("!dir") tuples here,
- * because we're expecting something in the other direction.
- * Doesn't matter unless NAT is happening. */
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-
- if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
- != ct->tuplehash[dir].tuple.src.ip) {
- /* Enrico Scholz's passive FTP to partially RNAT'd ftp
- server: it really wants us to connect to a
- different IP address. Simply don't record it for
- NAT. */
- DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
- array[0], array[1], array[2], array[3],
- NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
-
- /* Thanks to Cristiano Lincoln Mattos
- <lincoln@cesar.org.br> for reporting this potential
- problem (DMZ machines opening holes to internal
- networks, or the packet filter itself). */
- if (!loose) {
- ret = NF_ACCEPT;
- goto out_put_expect;
- }
- exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
- | (array[2] << 8) | array[3]);
- }
-
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
- exp->tuple.src.u.tcp.port = 0; /* Don't care. */
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask = ((struct ip_conntrack_tuple)
- { { htonl(0xFFFFFFFF), { 0 } },
- { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- /* Now, NAT might want to mangle the packet, and register the
- * (possibly changed) expectation itself. */
- ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook);
- if (ip_nat_ftp)
- ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
- matchoff, matchlen, exp, &seq);
- else {
- /* Can't expect this? Best to drop packet now. */
- if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- else
- ret = NF_ACCEPT;
- }
-
-out_put_expect:
- ip_conntrack_expect_put(exp);
-
-out_update_nl:
- /* Now if this ends in \n, update ftp info. Seq may have been
- * adjusted by NAT code. */
- if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info,dir, *pskb);
- out:
- spin_unlock_bh(&ip_ftp_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
-
-/* Not __exit: called from init() */
-static void ip_conntrack_ftp_fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&ftp[i]);
- }
-
- kfree(ftp_buffer);
-}
-
-static int __init ip_conntrack_ftp_init(void)
-{
- int i, ret;
- char *tmpname;
-
- ftp_buffer = kmalloc(65536, GFP_KERNEL);
- if (!ftp_buffer)
- return -ENOMEM;
-
- if (ports_c == 0)
- ports[ports_c++] = FTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
- ftp[i].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
- ftp[i].mask.dst.protonum = 0xFF;
- ftp[i].max_expected = 1;
- ftp[i].timeout = 5 * 60; /* 5 minutes */
- ftp[i].me = THIS_MODULE;
- ftp[i].help = help;
-
- tmpname = &ftp_names[i][0];
- if (ports[i] == FTP_PORT)
- sprintf(tmpname, "ftp");
- else
- sprintf(tmpname, "ftp-%d", ports[i]);
- ftp[i].name = tmpname;
-
- DEBUGP("ip_ct_ftp: registering helper for port %d\n",
- ports[i]);
- ret = ip_conntrack_helper_register(&ftp[i]);
-
- if (ret) {
- ip_conntrack_ftp_fini();
- return ret;
- }
- }
- return 0;
-}
-
-module_init(ip_conntrack_ftp_init);
-module_exit(ip_conntrack_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
deleted file mode 100644
index 53eb365ccc7..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ /dev/null
@@ -1,1841 +0,0 @@
-/*
- * H.323 connection tracking helper
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 connection tracking module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * For more information, please see http://nath323.sourceforge.net/
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/moduleparam.h>
-#include <linux/ctype.h>
-#include <linux/inet.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Parameters */
-static unsigned int default_rrq_ttl = 300;
-module_param(default_rrq_ttl, uint, 0600);
-MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
-
-static int gkrouted_only = 1;
-module_param(gkrouted_only, int, 0600);
-MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
-
-static int callforward_filter = 1;
-module_param(callforward_filter, bool, 0600);
-MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
- "if both endpoints are on different sides "
- "(determined by routing information)");
-
-/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff ** pskb,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- __be32 ip, u_int16_t port);
-int (*set_h225_addr_hook) (struct sk_buff ** pskb,
- unsigned char **data, int dataoff,
- TransportAddress * addr,
- __be32 ip, u_int16_t port);
-int (*set_sig_addr_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count);
-int (*set_ras_addr_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count);
-int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- u_int16_t port, u_int16_t rtp_port,
- struct ip_conntrack_expect * rtp_exp,
- struct ip_conntrack_expect * rtcp_exp);
-int (*nat_t120_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect * exp);
-int (*nat_h245_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect * exp);
-int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect * exp);
-int (*nat_q931_hook) (struct sk_buff ** pskb,
- struct ip_conntrack * ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, TransportAddress * addr, int idx,
- u_int16_t port, struct ip_conntrack_expect * exp);
-
-
-static DEFINE_SPINLOCK(ip_h323_lock);
-static char *h323_buffer;
-
-/****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int *datalen, int *dataoff)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- struct tcphdr _tcph, *th;
- int tcpdatalen;
- int tcpdataoff;
- unsigned char *tpkt;
- int tpktlen;
- int tpktoff;
-
- /* Get TCP header */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return 0;
-
- /* Get TCP data offset */
- tcpdataoff = (*pskb)->nh.iph->ihl * 4 + th->doff * 4;
-
- /* Get TCP data length */
- tcpdatalen = (*pskb)->len - tcpdataoff;
- if (tcpdatalen <= 0) /* No TCP data */
- goto clear_out;
-
- if (*data == NULL) { /* first TPKT */
- /* Get first TPKT pointer */
- tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
- h323_buffer);
- BUG_ON(tpkt == NULL);
-
- /* Validate TPKT identifier */
- if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
- /* Netmeeting sends TPKT header and data separately */
- if (info->tpkt_len[dir] > 0) {
- DEBUGP("ip_ct_h323: previous packet "
- "indicated separate TPKT data of %hu "
- "bytes\n", info->tpkt_len[dir]);
- if (info->tpkt_len[dir] <= tcpdatalen) {
- /* Yes, there was a TPKT header
- * received */
- *data = tpkt;
- *datalen = info->tpkt_len[dir];
- *dataoff = 0;
- goto out;
- }
-
- /* Fragmented TPKT */
- if (net_ratelimit())
- printk("ip_ct_h323: "
- "fragmented TPKT\n");
- goto clear_out;
- }
-
- /* It is not even a TPKT */
- return 0;
- }
- tpktoff = 0;
- } else { /* Next TPKT */
- tpktoff = *dataoff + *datalen;
- tcpdatalen -= tpktoff;
- if (tcpdatalen <= 4) /* No more TPKT */
- goto clear_out;
- tpkt = *data + *datalen;
-
- /* Validate TPKT identifier */
- if (tpkt[0] != 0x03 || tpkt[1] != 0)
- goto clear_out;
- }
-
- /* Validate TPKT length */
- tpktlen = tpkt[2] * 256 + tpkt[3];
- if (tpktlen < 4)
- goto clear_out;
- if (tpktlen > tcpdatalen) {
- if (tcpdatalen == 4) { /* Separate TPKT header */
- /* Netmeeting sends TPKT header and data separately */
- DEBUGP("ip_ct_h323: separate TPKT header indicates "
- "there will be TPKT data of %hu bytes\n",
- tpktlen - 4);
- info->tpkt_len[dir] = tpktlen - 4;
- return 0;
- }
-
- if (net_ratelimit())
- printk("ip_ct_h323: incomplete TPKT (fragmented?)\n");
- goto clear_out;
- }
-
- /* This is the encapsulated data */
- *data = tpkt + 4;
- *datalen = tpktlen - 4;
- *dataoff = tpktoff + 4;
-
- out:
- /* Clear TPKT length */
- info->tpkt_len[dir] = 0;
- return 1;
-
- clear_out:
- info->tpkt_len[dir] = 0;
- return 0;
-}
-
-/****************************************************************************/
-static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
- __be32 * ip, u_int16_t * port)
-{
- unsigned char *p;
-
- if (addr->choice != eH245_TransportAddress_unicastAddress ||
- addr->unicastAddress.choice != eUnicastAddress_iPAddress)
- return 0;
-
- p = data + addr->unicastAddress.iPAddress.network;
- *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
- *port = (p[4] << 8) | (p[5]);
-
- return 1;
-}
-
-/****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- u_int16_t rtp_port;
- struct ip_conntrack_expect *rtp_exp;
- struct ip_conntrack_expect *rtcp_exp;
- typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
-
- /* Read RTP or RTCP address */
- if (!get_h245_addr(*data, addr, &ip, &port) ||
- ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
- return 0;
-
- /* RTP port is even */
- rtp_port = port & (~1);
-
- /* Create expect for RTP */
- if ((rtp_exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- rtp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- rtp_exp->tuple.src.u.udp.port = 0;
- rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
- rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
- rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
- rtp_exp->mask.src.u.udp.port = 0;
- rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
- rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
- rtp_exp->mask.dst.protonum = 0xFF;
- rtp_exp->flags = 0;
-
- /* Create expect for RTCP */
- if ((rtcp_exp = ip_conntrack_expect_alloc(ct)) == NULL) {
- ip_conntrack_expect_put(rtp_exp);
- return -1;
- }
- rtcp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- rtcp_exp->tuple.src.u.udp.port = 0;
- rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
- rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
- rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
- rtcp_exp->mask.src.u.udp.port = 0;
- rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
- rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
- rtcp_exp->mask.dst.protonum = 0xFF;
- rtcp_exp->flags = 0;
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) {
- /* NAT needed */
- ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- addr, port, rtp_port, rtp_exp, rtcp_exp);
- } else { /* Conntrack only */
- rtp_exp->expectfn = NULL;
- rtcp_exp->expectfn = NULL;
-
- if (ip_conntrack_expect_related(rtp_exp) == 0) {
- if (ip_conntrack_expect_related(rtcp_exp) == 0) {
- DEBUGP("ip_ct_h323: expect RTP "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtp_exp->tuple.src.ip),
- ntohs(rtp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtp_exp->tuple.dst.ip),
- ntohs(rtp_exp->tuple.dst.u.udp.port));
- DEBUGP("ip_ct_h323: expect RTCP "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtcp_exp->tuple.src.ip),
- ntohs(rtcp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtcp_exp->tuple.dst.ip),
- ntohs(rtcp_exp->tuple.dst.u.udp.port));
- } else {
- ip_conntrack_unexpect_related(rtp_exp);
- ret = -1;
- }
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(rtp_exp);
- ip_conntrack_expect_put(rtcp_exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
- typeof(nat_t120_hook) nat_t120;
-
- /* Read T.120 address */
- if (!get_h245_addr(*data, addr, &ip, &port) ||
- ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
- return 0;
-
- /* Create expect for T.120 connections */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_t120 = rcu_dereference(nat_t120_hook))) {
- /* NAT needed */
- ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr,
- port, exp);
- } else { /* Conntrack only */
- exp->expectfn = NULL;
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_h323: expect T.120 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H2250LogicalChannelParameters * channel)
-{
- int ret;
-
- if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
- /* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &channel->mediaChannel);
- if (ret < 0)
- return -1;
- }
-
- if (channel->
- options & eH2250LogicalChannelParameters_mediaControlChannel) {
- /* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &channel->mediaControlChannel);
- if (ret < 0)
- return -1;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- OpenLogicalChannel * olc)
-{
- int ret;
-
- DEBUGP("ip_ct_h323: OpenLogicalChannel\n");
-
- if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
- eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
- {
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
- &olc->
- forwardLogicalChannelParameters.
- multiplexParameters.
- h2250LogicalChannelParameters);
- if (ret < 0)
- return -1;
- }
-
- if ((olc->options &
- eOpenLogicalChannel_reverseLogicalChannelParameters) &&
- (olc->reverseLogicalChannelParameters.options &
- eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
- && (olc->reverseLogicalChannelParameters.multiplexParameters.
- choice ==
- eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
- {
- ret =
- process_h245_channel(pskb, ct, ctinfo, data, dataoff,
- &olc->
- reverseLogicalChannelParameters.
- multiplexParameters.
- h2250LogicalChannelParameters);
- if (ret < 0)
- return -1;
- }
-
- if ((olc->options & eOpenLogicalChannel_separateStack) &&
- olc->forwardLogicalChannelParameters.dataType.choice ==
- eDataType_data &&
- olc->forwardLogicalChannelParameters.dataType.data.application.
- choice == eDataApplicationCapability_application_t120 &&
- olc->forwardLogicalChannelParameters.dataType.data.application.
- t120.choice == eDataProtocolCapability_separateLANStack &&
- olc->separateStack.networkAddress.choice ==
- eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
- &olc->separateStack.networkAddress.
- localAreaAddress);
- if (ret < 0)
- return -1;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- OpenLogicalChannelAck * olca)
-{
- H2250LogicalChannelAckParameters *ack;
- int ret;
-
- DEBUGP("ip_ct_h323: OpenLogicalChannelAck\n");
-
- if ((olca->options &
- eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
- (olca->reverseLogicalChannelParameters.options &
- eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
- && (olca->reverseLogicalChannelParameters.multiplexParameters.
- choice ==
- eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
- {
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
- &olca->
- reverseLogicalChannelParameters.
- multiplexParameters.
- h2250LogicalChannelParameters);
- if (ret < 0)
- return -1;
- }
-
- if ((olca->options &
- eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
- (olca->forwardMultiplexAckParameters.choice ==
- eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
- {
- ack = &olca->forwardMultiplexAckParameters.
- h2250LogicalChannelAckParameters;
- if (ack->options &
- eH2250LogicalChannelAckParameters_mediaChannel) {
- /* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &ack->mediaChannel);
- if (ret < 0)
- return -1;
- }
-
- if (ack->options &
- eH2250LogicalChannelAckParameters_mediaControlChannel) {
- /* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
- &ack->mediaControlChannel);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- MultimediaSystemControlMessage * mscm)
-{
- switch (mscm->choice) {
- case eMultimediaSystemControlMessage_request:
- if (mscm->request.choice ==
- eRequestMessage_openLogicalChannel) {
- return process_olc(pskb, ct, ctinfo, data, dataoff,
- &mscm->request.openLogicalChannel);
- }
- DEBUGP("ip_ct_h323: H.245 Request %d\n",
- mscm->request.choice);
- break;
- case eMultimediaSystemControlMessage_response:
- if (mscm->response.choice ==
- eResponseMessage_openLogicalChannelAck) {
- return process_olca(pskb, ct, ctinfo, data, dataoff,
- &mscm->response.
- openLogicalChannelAck);
- }
- DEBUGP("ip_ct_h323: H.245 Response %d\n",
- mscm->response.choice);
- break;
- default:
- DEBUGP("ip_ct_h323: H.245 signal %d\n", mscm->choice);
- break;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- static MultimediaSystemControlMessage mscm;
- unsigned char *data = NULL;
- int datalen;
- int dataoff;
- int ret;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- return NF_ACCEPT;
- }
- DEBUGP("ip_ct_h245: skblen = %u\n", (*pskb)->len);
-
- spin_lock_bh(&ip_h323_lock);
-
- /* Process each TPKT */
- while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
- DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
- /* Decode H.245 signal */
- ret = DecodeMultimediaSystemControlMessage(data, datalen,
- &mscm);
- if (ret < 0) {
- if (net_ratelimit())
- printk("ip_ct_h245: decoding error: %s\n",
- ret == H323_ERROR_BOUND ?
- "out of bound" : "out of range");
- /* We don't drop when decoding error */
- break;
- }
-
- /* Process H.245 signal */
- if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
- goto drop;
- }
-
- spin_unlock_bh(&ip_h323_lock);
- return NF_ACCEPT;
-
- drop:
- spin_unlock_bh(&ip_h323_lock);
- if (net_ratelimit())
- printk("ip_ct_h245: packet dropped\n");
- return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_h245 = {
- .name = "H.245",
- .me = THIS_MODULE,
- .max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */ ,
- .timeout = 240,
- .tuple = {.dst = {.protonum = IPPROTO_TCP}},
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF}},
- .help = h245_help
-};
-
-/****************************************************************************/
-void ip_conntrack_h245_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- write_lock_bh(&ip_conntrack_lock);
- new->helper = &ip_conntrack_helper_h245;
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-int get_h225_addr(unsigned char *data, TransportAddress * addr,
- __be32 * ip, u_int16_t * port)
-{
- unsigned char *p;
-
- if (addr->choice != eTransportAddress_ipAddress)
- return 0;
-
- p = data + addr->ipAddress.ip;
- *ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
- *port = (p[4] << 8) | (p[5]);
-
- return 1;
-}
-
-/****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
- typeof(nat_h245_hook) nat_h245;
-
- /* Read h245Address */
- if (!get_h225_addr(*data, addr, &ip, &port) ||
- ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
- return 0;
-
- /* Create expect for h245 connection */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = 0;
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_h245 = rcu_dereference(nat_h245_hook))) {
- /* NAT needed */
- ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr,
- port, exp);
- } else { /* Conntrack only */
- exp->expectfn = ip_conntrack_h245_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_q931: expect H.245 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/* Forwarding declaration */
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
- typeof(nat_callforwarding_hook) nat_callforwarding;
-
- /* Read alternativeAddress */
- if (!get_h225_addr(*data, addr, &ip, &port) || port == 0)
- return 0;
-
- /* If the calling party is on the same side of the forward-to party,
- * we don't need to track the second call */
- if (callforward_filter) {
- struct rtable *rt1, *rt2;
- struct flowi fl1 = {
- .fl4_dst = ip,
- };
- struct flowi fl2 = {
- .fl4_dst = ct->tuplehash[!dir].tuple.src.ip,
- };
-
- if (ip_route_output_key(&rt1, &fl1) == 0) {
- if (ip_route_output_key(&rt2, &fl2) == 0) {
- if (rt1->rt_gateway == rt2->rt_gateway &&
- rt1->u.dst.dev == rt2->u.dst.dev)
- ret = 1;
- dst_release(&rt2->u.dst);
- }
- dst_release(&rt1->u.dst);
- }
- if (ret) {
- DEBUGP("ip_ct_q931: Call Forwarding not tracked\n");
- return 0;
- }
- }
-
- /* Create expect for the second call leg */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = 0;
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip &&
- (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) {
- /* Need NAT */
- ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
- addr, port, exp);
- } else { /* Conntrack only */
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_q931: expect Call Forwarding "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Setup_UUIE * setup)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret;
- int i;
- __be32 ip;
- u_int16_t port;
- typeof(set_h225_addr_hook) set_h225_addr;
-
- DEBUGP("ip_ct_q931: Setup\n");
-
- if (setup->options & eSetup_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &setup->h245Address);
- if (ret < 0)
- return -1;
- }
-
- set_h225_addr = rcu_dereference(set_h225_addr_hook);
-
- if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
- (set_h225_addr) &&
- get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) &&
- ip != ct->tuplehash[!dir].tuple.src.ip) {
- DEBUGP("ip_ct_q931: set destCallSignalAddress "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.src.ip),
- ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
- &setup->destCallSignalAddress,
- ct->tuplehash[!dir].tuple.src.ip,
- ntohs(ct->tuplehash[!dir].tuple.src.
- u.tcp.port));
- if (ret < 0)
- return -1;
- }
-
- if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
- (set_h225_addr) &&
- get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port)
- && ip != ct->tuplehash[!dir].tuple.dst.ip) {
- DEBUGP("ip_ct_q931: set sourceCallSignalAddress "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
- ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
- &setup->sourceCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- ntohs(ct->tuplehash[!dir].tuple.dst.
- u.tcp.port));
- if (ret < 0)
- return -1;
- }
-
- if (setup->options & eSetup_UUIE_fastStart) {
- for (i = 0; i < setup->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &setup->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- CallProceeding_UUIE * callproc)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: CallProceeding\n");
-
- if (callproc->options & eCallProceeding_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &callproc->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (callproc->options & eCallProceeding_UUIE_fastStart) {
- for (i = 0; i < callproc->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &callproc->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Connect_UUIE * connect)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Connect\n");
-
- if (connect->options & eConnect_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &connect->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (connect->options & eConnect_UUIE_fastStart) {
- for (i = 0; i < connect->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &connect->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Alerting_UUIE * alert)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Alerting\n");
-
- if (alert->options & eAlerting_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &alert->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (alert->options & eAlerting_UUIE_fastStart) {
- for (i = 0; i < alert->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &alert->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_information(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Information_UUIE * info)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Information\n");
-
- if (info->options & eInformation_UUIE_fastStart) {
- for (i = 0; i < info->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &info->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Facility_UUIE * facility)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Facility\n");
-
- if (facility->reason.choice == eFacilityReason_callForwarded) {
- if (facility->options & eFacility_UUIE_alternativeAddress)
- return expect_callforwarding(pskb, ct, ctinfo, data,
- dataoff,
- &facility->
- alternativeAddress);
- return 0;
- }
-
- if (facility->options & eFacility_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &facility->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (facility->options & eFacility_UUIE_fastStart) {
- for (i = 0; i < facility->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &facility->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- Progress_UUIE * progress)
-{
- int ret;
- int i;
-
- DEBUGP("ip_ct_q931: Progress\n");
-
- if (progress->options & eProgress_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
- &progress->h245Address);
- if (ret < 0)
- return -1;
- }
-
- if (progress->options & eProgress_UUIE_fastStart) {
- for (i = 0; i < progress->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
- &progress->fastStart.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff, Q931 * q931)
-{
- H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
- int i;
- int ret = 0;
-
- switch (pdu->h323_message_body.choice) {
- case eH323_UU_PDU_h323_message_body_setup:
- ret = process_setup(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.setup);
- break;
- case eH323_UU_PDU_h323_message_body_callProceeding:
- ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.
- callProceeding);
- break;
- case eH323_UU_PDU_h323_message_body_connect:
- ret = process_connect(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.connect);
- break;
- case eH323_UU_PDU_h323_message_body_alerting:
- ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.alerting);
- break;
- case eH323_UU_PDU_h323_message_body_information:
- ret = process_information(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.
- information);
- break;
- case eH323_UU_PDU_h323_message_body_facility:
- ret = process_facility(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.facility);
- break;
- case eH323_UU_PDU_h323_message_body_progress:
- ret = process_progress(pskb, ct, ctinfo, data, dataoff,
- &pdu->h323_message_body.progress);
- break;
- default:
- DEBUGP("ip_ct_q931: Q.931 signal %d\n",
- pdu->h323_message_body.choice);
- break;
- }
-
- if (ret < 0)
- return -1;
-
- if (pdu->options & eH323_UU_PDU_h245Control) {
- for (i = 0; i < pdu->h245Control.count; i++) {
- ret = process_h245(pskb, ct, ctinfo, data, dataoff,
- &pdu->h245Control.item[i]);
- if (ret < 0)
- return -1;
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- static Q931 q931;
- unsigned char *data = NULL;
- int datalen;
- int dataoff;
- int ret;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- return NF_ACCEPT;
- }
- DEBUGP("ip_ct_q931: skblen = %u\n", (*pskb)->len);
-
- spin_lock_bh(&ip_h323_lock);
-
- /* Process each TPKT */
- while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
- DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
- /* Decode Q.931 signal */
- ret = DecodeQ931(data, datalen, &q931);
- if (ret < 0) {
- if (net_ratelimit())
- printk("ip_ct_q931: decoding error: %s\n",
- ret == H323_ERROR_BOUND ?
- "out of bound" : "out of range");
- /* We don't drop when decoding error */
- break;
- }
-
- /* Process Q.931 signal */
- if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
- goto drop;
- }
-
- spin_unlock_bh(&ip_h323_lock);
- return NF_ACCEPT;
-
- drop:
- spin_unlock_bh(&ip_h323_lock);
- if (net_ratelimit())
- printk("ip_ct_q931: packet dropped\n");
- return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_q931 = {
- .name = "Q.931",
- .me = THIS_MODULE,
- .max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ ,
- .timeout = 240,
- .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}},
- .dst = {.protonum = IPPROTO_TCP}},
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF}},
- .help = q931_help
-};
-
-/****************************************************************************/
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- write_lock_bh(&ip_conntrack_lock);
- new->helper = &ip_conntrack_helper_q931;
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
-{
- struct udphdr _uh, *uh;
- int dataoff;
-
- uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, sizeof(_uh),
- &_uh);
- if (uh == NULL)
- return NULL;
- dataoff = (*pskb)->nh.iph->ihl * 4 + sizeof(_uh);
- if (dataoff >= (*pskb)->len)
- return NULL;
- *datalen = (*pskb)->len - dataoff;
- return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
-}
-
-/****************************************************************************/
-static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
- __be32 ip, u_int16_t port)
-{
- struct ip_conntrack_expect *exp;
- struct ip_conntrack_tuple tuple;
-
- tuple.src.ip = 0;
- tuple.src.u.tcp.port = 0;
- tuple.dst.ip = ip;
- tuple.dst.u.tcp.port = htons(port);
- tuple.dst.protonum = IPPROTO_TCP;
-
- exp = __ip_conntrack_expect_find(&tuple);
- if (exp && exp->master == ct)
- return exp;
- return NULL;
-}
-
-/****************************************************************************/
-static int set_expect_timeout(struct ip_conntrack_expect *exp,
- unsigned timeout)
-{
- if (!exp || !del_timer(&exp->timeout))
- return 0;
-
- exp->timeout.expires = jiffies + timeout * HZ;
- add_timer(&exp->timeout);
-
- return 1;
-}
-
-/****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- int i;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp;
- typeof(nat_q931_hook) nat_q931;
-
- /* Look for the first related address */
- for (i = 0; i < count; i++) {
- if (get_h225_addr(*data, &addr[i], &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip && port != 0)
- break;
- }
-
- if (i >= count) /* Not found */
- return 0;
-
- /* Create expect for Q.931 */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = gkrouted_only ? /* only accept calls from GK? */
- ct->tuplehash[!dir].tuple.src.ip : 0;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */
-
- nat_q931 = rcu_dereference(nat_q931_hook);
- if (nat_q931) { /* Need NAT */
- ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp);
- } else { /* Conntrack only */
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect Q.931 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
-
- /* Save port for looking up expect in processing RCF */
- info->sig_port[dir] = port;
- } else
- ret = -1;
- }
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, GatekeeperRequest * grq)
-{
- typeof(set_ras_addr_hook) set_ras_addr;
-
- DEBUGP("ip_ct_ras: GRQ\n");
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr) /* NATed */
- return set_ras_addr(pskb, ct, ctinfo, data,
- &grq->rasAddress, 1);
- return 0;
-}
-
-/* Declare before using */
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, GatekeeperConfirm * gcf)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp;
-
- DEBUGP("ip_ct_ras: GCF\n");
-
- if (!get_h225_addr(*data, &gcf->rasAddress, &ip, &port))
- return 0;
-
- /* Registration port is the same as discovery port */
- if (ip == ct->tuplehash[dir].tuple.src.ip &&
- port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port))
- return 0;
-
- /* Avoid RAS expectation loops. A GCF is never expected. */
- if (test_bit(IPS_EXPECTED_BIT, &ct->status))
- return 0;
-
- /* Need new expect */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_UDP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = 0;
- exp->expectfn = ip_conntrack_ras_expect;
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect RAS "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, RegistrationRequest * rrq)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int ret;
- typeof(set_ras_addr_hook) set_ras_addr;
-
- DEBUGP("ip_ct_ras: RRQ\n");
-
- ret = expect_q931(pskb, ct, ctinfo, data,
- rrq->callSignalAddress.item,
- rrq->callSignalAddress.count);
- if (ret < 0)
- return -1;
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
- rrq->rasAddress.item,
- rrq->rasAddress.count);
- if (ret < 0)
- return -1;
- }
-
- if (rrq->options & eRegistrationRequest_timeToLive) {
- DEBUGP("ip_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
- info->timeout = rrq->timeToLive;
- } else
- info->timeout = default_rrq_ttl;
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, RegistrationConfirm * rcf)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int ret;
- struct ip_conntrack_expect *exp;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: RCF\n");
-
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
- rcf->callSignalAddress.item,
- rcf->callSignalAddress.count);
- if (ret < 0)
- return -1;
- }
-
- if (rcf->options & eRegistrationConfirm_timeToLive) {
- DEBUGP("ip_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
- info->timeout = rcf->timeToLive;
- }
-
- if (info->timeout > 0) {
- DEBUGP
- ("ip_ct_ras: set RAS connection timeout to %u seconds\n",
- info->timeout);
- ip_ct_refresh(ct, *pskb, info->timeout * HZ);
-
- /* Set expect timeout */
- read_lock_bh(&ip_conntrack_lock);
- exp = find_expect(ct, ct->tuplehash[dir].tuple.dst.ip,
- info->sig_port[!dir]);
- if (exp) {
- DEBUGP("ip_ct_ras: set Q.931 expect "
- "(%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu) "
- "timeout to %u seconds\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port),
- info->timeout);
- set_expect_timeout(exp, info->timeout);
- }
- read_unlock_bh(&ip_conntrack_lock);
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, UnregistrationRequest * urq)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int ret;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: URQ\n");
-
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
- urq->callSignalAddress.item,
- urq->callSignalAddress.count);
- if (ret < 0)
- return -1;
- }
-
- /* Clear old expect */
- ip_ct_remove_expectations(ct);
- info->sig_port[dir] = 0;
- info->sig_port[!dir] = 0;
-
- /* Give it 30 seconds for UCF or URJ */
- ip_ct_refresh(ct, *pskb, 30 * HZ);
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, AdmissionRequest * arq)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- __be32 ip;
- u_int16_t port;
- typeof(set_h225_addr_hook) set_h225_addr;
-
- DEBUGP("ip_ct_ras: ARQ\n");
-
- set_h225_addr = rcu_dereference(set_h225_addr_hook);
- if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
- get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip &&
- port == info->sig_port[dir] && set_h225_addr) {
- /* Answering ARQ */
- return set_h225_addr(pskb, data, 0,
- &arq->destCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- info->sig_port[!dir]);
- }
-
- if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
- get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) {
- /* Calling ARQ */
- return set_h225_addr(pskb, data, 0,
- &arq->srcCallSignalAddress,
- ct->tuplehash[!dir].tuple.dst.ip,
- port);
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, AdmissionConfirm * acf)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: ACF\n");
-
- if (!get_h225_addr(*data, &acf->destCallSignalAddress, &ip, &port))
- return 0;
-
- if (ip == ct->tuplehash[dir].tuple.dst.ip) { /* Answering ACF */
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr)
- return set_sig_addr(pskb, ct, ctinfo, data,
- &acf->destCallSignalAddress, 1);
- return 0;
- }
-
- /* Need new expect */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT;
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect Q.931 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
-
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, LocationRequest * lrq)
-{
- typeof(set_ras_addr_hook) set_ras_addr;
-
- DEBUGP("ip_ct_ras: LRQ\n");
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr)
- return set_ras_addr(pskb, ct, ctinfo, data,
- &lrq->replyAddress, 1);
- return 0;
-}
-
-/****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, LocationConfirm * lcf)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = 0;
- __be32 ip;
- u_int16_t port;
- struct ip_conntrack_expect *exp = NULL;
-
- DEBUGP("ip_ct_ras: LCF\n");
-
- if (!get_h225_addr(*data, &lcf->callSignalAddress, &ip, &port))
- return 0;
-
- /* Need new expect for call signal */
- if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
- return -1;
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ip;
- exp->tuple.dst.u.tcp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.tcp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
- exp->flags = IP_CT_EXPECT_PERMANENT;
- exp->expectfn = ip_conntrack_q931_expect;
-
- if (ip_conntrack_expect_related(exp) == 0) {
- DEBUGP("ip_ct_ras: expect Q.931 "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
- } else
- ret = -1;
-
- ip_conntrack_expect_put(exp);
-
- /* Ignore rasAddress */
-
- return ret;
-}
-
-/****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, InfoRequestResponse * irr)
-{
- int ret;
- typeof(set_ras_addr_hook) set_ras_addr;
- typeof(set_sig_addr_hook) set_sig_addr;
-
- DEBUGP("ip_ct_ras: IRR\n");
-
- set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
- &irr->rasAddress, 1);
- if (ret < 0)
- return -1;
- }
-
- set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
- irr->callSignalAddress.item,
- irr->callSignalAddress.count);
- if (ret < 0)
- return -1;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, RasMessage * ras)
-{
- switch (ras->choice) {
- case eRasMessage_gatekeeperRequest:
- return process_grq(pskb, ct, ctinfo, data,
- &ras->gatekeeperRequest);
- case eRasMessage_gatekeeperConfirm:
- return process_gcf(pskb, ct, ctinfo, data,
- &ras->gatekeeperConfirm);
- case eRasMessage_registrationRequest:
- return process_rrq(pskb, ct, ctinfo, data,
- &ras->registrationRequest);
- case eRasMessage_registrationConfirm:
- return process_rcf(pskb, ct, ctinfo, data,
- &ras->registrationConfirm);
- case eRasMessage_unregistrationRequest:
- return process_urq(pskb, ct, ctinfo, data,
- &ras->unregistrationRequest);
- case eRasMessage_admissionRequest:
- return process_arq(pskb, ct, ctinfo, data,
- &ras->admissionRequest);
- case eRasMessage_admissionConfirm:
- return process_acf(pskb, ct, ctinfo, data,
- &ras->admissionConfirm);
- case eRasMessage_locationRequest:
- return process_lrq(pskb, ct, ctinfo, data,
- &ras->locationRequest);
- case eRasMessage_locationConfirm:
- return process_lcf(pskb, ct, ctinfo, data,
- &ras->locationConfirm);
- case eRasMessage_infoRequestResponse:
- return process_irr(pskb, ct, ctinfo, data,
- &ras->infoRequestResponse);
- default:
- DEBUGP("ip_ct_ras: RAS message %d\n", ras->choice);
- break;
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- static RasMessage ras;
- unsigned char *data;
- int datalen = 0;
- int ret;
-
- DEBUGP("ip_ct_ras: skblen = %u\n", (*pskb)->len);
-
- spin_lock_bh(&ip_h323_lock);
-
- /* Get UDP data */
- data = get_udp_data(pskb, &datalen);
- if (data == NULL)
- goto accept;
- DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
- /* Decode RAS message */
- ret = DecodeRasMessage(data, datalen, &ras);
- if (ret < 0) {
- if (net_ratelimit())
- printk("ip_ct_ras: decoding error: %s\n",
- ret == H323_ERROR_BOUND ?
- "out of bound" : "out of range");
- goto accept;
- }
-
- /* Process RAS message */
- if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
- goto drop;
-
- accept:
- spin_unlock_bh(&ip_h323_lock);
- return NF_ACCEPT;
-
- drop:
- spin_unlock_bh(&ip_h323_lock);
- if (net_ratelimit())
- printk("ip_ct_ras: packet dropped\n");
- return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_ras = {
- .name = "RAS",
- .me = THIS_MODULE,
- .max_expected = 32,
- .timeout = 240,
- .tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}},
- .dst = {.protonum = IPPROTO_UDP}},
- .mask = {.src = {.u = {0xFFFE}},
- .dst = {.protonum = 0xFF}},
- .help = ras_help,
-};
-
-/****************************************************************************/
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- write_lock_bh(&ip_conntrack_lock);
- new->helper = &ip_conntrack_helper_ras;
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-/* Not __exit - called from init() */
-static void fini(void)
-{
- ip_conntrack_helper_unregister(&ip_conntrack_helper_ras);
- ip_conntrack_helper_unregister(&ip_conntrack_helper_q931);
- kfree(h323_buffer);
- DEBUGP("ip_ct_h323: fini\n");
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
- int ret;
-
- h323_buffer = kmalloc(65536, GFP_KERNEL);
- if (!h323_buffer)
- return -ENOMEM;
- if ((ret = ip_conntrack_helper_register(&ip_conntrack_helper_q931)) ||
- (ret = ip_conntrack_helper_register(&ip_conntrack_helper_ras))) {
- fini();
- return ret;
- }
- DEBUGP("ip_ct_h323: init success\n");
- return 0;
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL_GPL(get_h225_addr);
-EXPORT_SYMBOL_GPL(ip_conntrack_h245_expect);
-EXPORT_SYMBOL_GPL(ip_conntrack_q931_expect);
-EXPORT_SYMBOL_GPL(set_h245_addr_hook);
-EXPORT_SYMBOL_GPL(set_h225_addr_hook);
-EXPORT_SYMBOL_GPL(set_sig_addr_hook);
-EXPORT_SYMBOL_GPL(set_ras_addr_hook);
-EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
-EXPORT_SYMBOL_GPL(nat_t120_hook);
-EXPORT_SYMBOL_GPL(nat_h245_hook);
-EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
-EXPORT_SYMBOL_GPL(nat_q931_hook);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 connection tracking helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
deleted file mode 100644
index 2b760c5cf70..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * ip_conntrack_pptp.c - Version 3.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * - We blindly assume that control connections are always
- * established in PNS->PAC direction. This is a violation
- * of RFFC2673
- * - We can only support one single call within each session
- *
- * TODO:
- * - testing of incoming PPTP calls
- *
- * Changes:
- * 2002-02-05 - Version 1.3
- * - Call ip_conntrack_unexpect_related() from
- * pptp_destroy_siblings() to destroy expectations in case
- * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * (Philip Craig <philipc@snapgear.com>)
- * - Add Version information at module loadtime
- * 2002-02-10 - Version 1.6
- * - move to C99 style initializers
- * - remove second expectation if first arrives
- * 2004-10-22 - Version 2.0
- * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * - fix lots of linear skb assumptions from Mandrake's port
- * 2005-06-10 - Version 2.1
- * - use ip_conntrack_expect_free() instead of kfree() on the
- * expect's (which are from the slab for quite some time)
- * 2005-06-10 - Version 3.0
- * - port helper to post-2.6.11 API changes,
- * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 2005-07-30 - Version 3.1
- * - port helper to 2.6.13 API changes
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "3.1"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-static DEFINE_SPINLOCK(ip_pptp_lock);
-
-int
-(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-int
-(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-void
-(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
- struct ip_conntrack_expect *expect_reply);
-
-void
-(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp);
-
-#if 0
-/* PptpControlMessageType names */
-const char *pptp_msg_name[] = {
- "UNKNOWN_MESSAGE",
- "START_SESSION_REQUEST",
- "START_SESSION_REPLY",
- "STOP_SESSION_REQUEST",
- "STOP_SESSION_REPLY",
- "ECHO_REQUEST",
- "ECHO_REPLY",
- "OUT_CALL_REQUEST",
- "OUT_CALL_REPLY",
- "IN_CALL_REQUEST",
- "IN_CALL_REPLY",
- "IN_CALL_CONNECT",
- "CALL_CLEAR_REQUEST",
- "CALL_DISCONNECT_NOTIFY",
- "WAN_ERROR_NOTIFY",
- "SET_LINK_INFO"
-};
-EXPORT_SYMBOL(pptp_msg_name);
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-
-#define PPTP_GRE_TIMEOUT (10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
-
-static void pptp_expectfn(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn;
-
- DEBUGP("increasing timeouts\n");
-
- /* increase timeout of GRE data channel conntrack entry */
- ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
- ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
- /* Can you see how rusty this code is, compared with the pre-2.6.11
- * one? That's what happened to my shiny newnat of 2002 ;( -HW */
-
- rcu_read_lock();
- ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn);
- if (!ip_nat_pptp_expectfn) {
- struct ip_conntrack_tuple inv_t;
- struct ip_conntrack_expect *exp_other;
-
- /* obviously this tuple inversion only works until you do NAT */
- invert_tuplepr(&inv_t, &exp->tuple);
- DEBUGP("trying to unexpect other dir: ");
- DUMP_TUPLE(&inv_t);
-
- exp_other = ip_conntrack_expect_find_get(&inv_t);
- if (exp_other) {
- /* delete other expectation. */
- DEBUGP("found\n");
- ip_conntrack_unexpect_related(exp_other);
- ip_conntrack_expect_put(exp_other);
- } else {
- DEBUGP("not found\n");
- }
- } else {
- /* we need more than simple inversion */
- ip_nat_pptp_expectfn(ct, exp);
- }
- rcu_read_unlock();
-}
-
-static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_expect *exp;
-
- DEBUGP("trying to timeout ct or exp for tuple ");
- DUMP_TUPLE(t);
-
- h = ip_conntrack_find_get(t, NULL);
- if (h) {
- struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
- DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
- sibling->proto.gre.timeout = 0;
- sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
- ip_conntrack_put(sibling);
- return 1;
- } else {
- exp = ip_conntrack_expect_find_get(t);
- if (exp) {
- DEBUGP("unexpect_related of expect %p\n", exp);
- ip_conntrack_unexpect_related(exp);
- ip_conntrack_expect_put(exp);
- return 1;
- }
- }
-
- return 0;
-}
-
-
-/* timeout GRE data connections */
-static void pptp_destroy_siblings(struct ip_conntrack *ct)
-{
- struct ip_conntrack_tuple t;
-
- ip_ct_gre_keymap_destroy(ct);
- /* Since ct->sibling_list has literally rusted away in 2.6.11,
- * we now need another way to find out about our sibling
- * contrack and expects... -HW */
-
- /* try original (pns->pac) tuple */
- memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
- t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
- t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
-
- if (!destroy_sibling_or_exp(&t))
- DEBUGP("failed to timeout original pns->pac ct/exp\n");
-
- /* try reply (pac->pns) tuple */
- memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
- t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
- t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
-
- if (!destroy_sibling_or_exp(&t))
- DEBUGP("failed to timeout reply pac->pns ct/exp\n");
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *ct,
- __be16 callid,
- __be16 peer_callid)
-{
- struct ip_conntrack_expect *exp_orig, *exp_reply;
- int ret = 1;
- typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre;
-
- exp_orig = ip_conntrack_expect_alloc(ct);
- if (exp_orig == NULL)
- goto out;
-
- exp_reply = ip_conntrack_expect_alloc(ct);
- if (exp_reply == NULL)
- goto out_put_orig;
-
- /* original direction, PNS->PAC */
- exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- exp_orig->tuple.src.u.gre.key = peer_callid;
- exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- exp_orig->tuple.dst.u.gre.key = callid;
- exp_orig->tuple.dst.protonum = IPPROTO_GRE;
-
- exp_orig->mask.src.ip = htonl(0xffffffff);
- exp_orig->mask.src.u.all = 0;
- exp_orig->mask.dst.u.gre.key = htons(0xffff);
- exp_orig->mask.dst.ip = htonl(0xffffffff);
- exp_orig->mask.dst.protonum = 0xff;
-
- exp_orig->master = ct;
- exp_orig->expectfn = pptp_expectfn;
- exp_orig->flags = 0;
-
- /* both expectations are identical apart from tuple */
- memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
-
- /* reply direction, PAC->PNS */
- exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- exp_reply->tuple.src.u.gre.key = callid;
- exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- exp_reply->tuple.dst.u.gre.key = peer_callid;
- exp_reply->tuple.dst.protonum = IPPROTO_GRE;
-
- ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre);
- if (ip_nat_pptp_exp_gre)
- ip_nat_pptp_exp_gre(exp_orig, exp_reply);
- if (ip_conntrack_expect_related(exp_orig) != 0)
- goto out_put_both;
- if (ip_conntrack_expect_related(exp_reply) != 0)
- goto out_unexpect_orig;
-
- /* Add GRE keymap entries */
- if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
- goto out_unexpect_both;
- if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
- ip_ct_gre_keymap_destroy(ct);
- goto out_unexpect_both;
- }
- ret = 0;
-
-out_put_both:
- ip_conntrack_expect_put(exp_reply);
-out_put_orig:
- ip_conntrack_expect_put(exp_orig);
-out:
- return ret;
-
-out_unexpect_both:
- ip_conntrack_unexpect_related(exp_reply);
-out_unexpect_orig:
- ip_conntrack_unexpect_related(exp_orig);
- goto out_put_both;
-}
-
-static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq,
- unsigned int reqlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg;
- __be16 cid = 0, pcid = 0;
- typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound;
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REPLY:
- /* server confirms new control session */
- if (info->sstate < PPTP_SESSION_REQUESTED)
- goto invalid;
- if (pptpReq->srep.resultCode == PPTP_START_OK)
- info->sstate = PPTP_SESSION_CONFIRMED;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_STOP_SESSION_REPLY:
- /* server confirms end of control session */
- if (info->sstate > PPTP_SESSION_STOPREQ)
- goto invalid;
- if (pptpReq->strep.resultCode == PPTP_STOP_OK)
- info->sstate = PPTP_SESSION_NONE;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_OUT_CALL_REPLY:
- /* server accepted call, we now expect GRE frames */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- if (info->cstate != PPTP_CALL_OUT_REQ &&
- info->cstate != PPTP_CALL_OUT_CONF)
- goto invalid;
-
- cid = pptpReq->ocack.callID;
- pcid = pptpReq->ocack.peersCallID;
- if (info->pns_call_id != pcid)
- goto invalid;
- DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
- ntohs(cid), ntohs(pcid));
-
- if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
- info->cstate = PPTP_CALL_OUT_CONF;
- info->pac_call_id = cid;
- exp_gre(ct, cid, pcid);
- } else
- info->cstate = PPTP_CALL_NONE;
- break;
-
- case PPTP_IN_CALL_REQUEST:
- /* server tells us about incoming call request */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
-
- cid = pptpReq->icreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
- info->cstate = PPTP_CALL_IN_REQ;
- info->pac_call_id = cid;
- break;
-
- case PPTP_IN_CALL_CONNECT:
- /* server tells us about incoming call established */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- if (info->cstate != PPTP_CALL_IN_REP &&
- info->cstate != PPTP_CALL_IN_CONF)
- goto invalid;
-
- pcid = pptpReq->iccon.peersCallID;
- cid = info->pac_call_id;
-
- if (info->pns_call_id != pcid)
- goto invalid;
-
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
- info->cstate = PPTP_CALL_IN_CONF;
-
- /* we expect a GRE connection from PAC to PNS */
- exp_gre(ct, cid, pcid);
- break;
-
- case PPTP_CALL_DISCONNECT_NOTIFY:
- /* server confirms disconnect */
- cid = pptpReq->disc.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_destroy_siblings(ct);
- break;
-
- case PPTP_WAN_ERROR_NOTIFY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- goto invalid;
- }
-
- ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound);
- if (ip_nat_pptp_inbound)
- return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
- return NF_ACCEPT;
-
-invalid:
- DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
- "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
- msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
- ntohs(info->pns_call_id), ntohs(info->pac_call_id));
- return NF_ACCEPT;
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq,
- unsigned int reqlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg;
- __be16 cid = 0, pcid = 0;
- typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound;
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REQUEST:
- /* client requests for new control session */
- if (info->sstate != PPTP_SESSION_NONE)
- goto invalid;
- info->sstate = PPTP_SESSION_REQUESTED;
- break;
- case PPTP_STOP_SESSION_REQUEST:
- /* client requests end of control session */
- info->sstate = PPTP_SESSION_STOPREQ;
- break;
-
- case PPTP_OUT_CALL_REQUEST:
- /* client initiating connection to server */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- info->cstate = PPTP_CALL_OUT_REQ;
- /* track PNS call id */
- cid = pptpReq->ocreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
- info->pns_call_id = cid;
- break;
- case PPTP_IN_CALL_REPLY:
- /* client answers incoming call */
- if (info->cstate != PPTP_CALL_IN_REQ &&
- info->cstate != PPTP_CALL_IN_REP)
- goto invalid;
-
- cid = pptpReq->icack.callID;
- pcid = pptpReq->icack.peersCallID;
- if (info->pac_call_id != pcid)
- goto invalid;
- DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
- ntohs(cid), ntohs(pcid));
-
- if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
- /* part two of the three-way handshake */
- info->cstate = PPTP_CALL_IN_REP;
- info->pns_call_id = cid;
- } else
- info->cstate = PPTP_CALL_NONE;
- break;
-
- case PPTP_CALL_CLEAR_REQUEST:
- /* client requests hangup of call */
- if (info->sstate != PPTP_SESSION_CONFIRMED)
- goto invalid;
- /* FUTURE: iterate over all calls and check if
- * call ID is valid. We don't do this without newnat,
- * because we only know about last call */
- info->cstate = PPTP_CALL_CLEAR_REQ;
- break;
- case PPTP_SET_LINK_INFO:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- goto invalid;
- }
-
- ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound);
- if (ip_nat_pptp_outbound)
- return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
- return NF_ACCEPT;
-
-invalid:
- DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
- "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
- msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
- ntohs(info->pns_call_id), ntohs(info->pac_call_id));
- return NF_ACCEPT;
-}
-
-static const unsigned int pptp_msg_size[] = {
- [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
- [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
- [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
- [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
- [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
- [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
- [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
- [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
- [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
- [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
- [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
- [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
- [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
-};
-
-/* track caller id inside control connection, call expect_related */
-static int
-conntrack_pptp_help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
- int dir = CTINFO2DIR(ctinfo);
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- struct tcphdr _tcph, *tcph;
- struct pptp_pkt_hdr _pptph, *pptph;
- struct PptpControlHeader _ctlh, *ctlh;
- union pptp_ctrl_union _pptpReq, *pptpReq;
- unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
- unsigned int datalen, reqlen, nexthdr_off;
- int oldsstate, oldcstate;
- int ret;
- u_int16_t msg;
-
- /* don't do any tracking before tcp handshake complete */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ctinfo = %u, skipping\n", ctinfo);
- return NF_ACCEPT;
- }
-
- nexthdr_off = (*pskb)->nh.iph->ihl*4;
- tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
- BUG_ON(!tcph);
- nexthdr_off += tcph->doff * 4;
- datalen = tcplen - tcph->doff * 4;
-
- pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
- if (!pptph) {
- DEBUGP("no full PPTP header, can't track\n");
- return NF_ACCEPT;
- }
- nexthdr_off += sizeof(_pptph);
- datalen -= sizeof(_pptph);
-
- /* if it's not a control message we can't do anything with it */
- if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
- ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
- DEBUGP("not a control packet\n");
- return NF_ACCEPT;
- }
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh)
- return NF_ACCEPT;
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- msg = ntohs(ctlh->messageType);
- if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
- return NF_ACCEPT;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
-
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq)
- return NF_ACCEPT;
-
- oldsstate = info->sstate;
- oldcstate = info->cstate;
-
- spin_lock_bh(&ip_pptp_lock);
-
- /* FIXME: We just blindly assume that the control connection is always
- * established from PNS->PAC. However, RFC makes no guarantee */
- if (dir == IP_CT_DIR_ORIGINAL)
- /* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
- ctinfo);
- else
- /* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
- ctinfo);
- DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
- oldsstate, info->sstate, oldcstate, info->cstate);
- spin_unlock_bh(&ip_pptp_lock);
-
- return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
- .list = { NULL, NULL },
- .name = "pptp",
- .me = THIS_MODULE,
- .max_expected = 2,
- .timeout = 5 * 60,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = IPPROTO_TCP
- }
- },
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = __constant_htons(0xffff) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = 0xff
- }
- },
- .help = conntrack_pptp_help,
- .destroy = pptp_destroy_siblings,
-};
-
-extern void ip_ct_proto_gre_fini(void);
-extern int __init ip_ct_proto_gre_init(void);
-
-/* ip_conntrack_pptp initialization */
-static int __init ip_conntrack_helper_pptp_init(void)
-{
- int retcode;
-
- retcode = ip_ct_proto_gre_init();
- if (retcode < 0)
- return retcode;
-
- DEBUGP(" registering helper\n");
- if ((retcode = ip_conntrack_helper_register(&pptp))) {
- printk(KERN_ERR "Unable to register conntrack application "
- "helper for pptp: %d\n", retcode);
- ip_ct_proto_gre_fini();
- return retcode;
- }
-
- printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit ip_conntrack_helper_pptp_fini(void)
-{
- ip_conntrack_helper_unregister(&pptp);
- ip_ct_proto_gre_fini();
- printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(ip_conntrack_helper_pptp_init);
-module_exit(ip_conntrack_helper_pptp_fini);
-
-EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
-EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
deleted file mode 100644
index 053e591f407..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* IRC extension for IP connection tracking, Version 1.21
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
- * based on RR's ip_conntrack_ftp.c
- *
- * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- **
- * Module load syntax:
- * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
- * max_dcc_channels=n dcc_timeout=secs
- *
- * please give the ports of all IRC servers You wish to connect to.
- * If You don't specify ports, the default will be port 6667.
- * With max_dcc_channels you can define the maximum number of not
- * yet answered DCC channels per IRC session (default 8).
- * With dcc_timeout you can specify how long the system waits for
- * an expected DCC channel (default 300 seconds).
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/moduleparam.h>
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-static unsigned int max_dcc_channels = 8;
-static unsigned int dcc_timeout = 300;
-/* This is slow, but it's simple. --RR */
-static char *irc_buffer;
-static DEFINE_SPINLOCK(irc_buffer_lock);
-
-unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
-MODULE_LICENSE("GPL");
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, uint, 0400);
-MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, uint, 0400);
-MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
-
-static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
-#define MINMATCHLEN 5
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
- u_int16_t *port, char **ad_beg_p, char **ad_end_p)
-/* tries to get the ip_addr and port out of a dcc command
- return value: -1 on failure, 0 on success
- data pointer to first byte of DCC command data
- data_end pointer to last byte of dcc command data
- ip returns parsed ip of dcc command
- port returns parsed port of dcc command
- ad_beg_p returns pointer to first byte of addr data
- ad_end_p returns pointer to last byte of addr data */
-{
-
- /* at least 12: "AAAAAAAA P\1\n" */
- while (*data++ != ' ')
- if (data > data_end - 12)
- return -1;
-
- *ad_beg_p = data;
- *ip = simple_strtoul(data, &data, 10);
-
- /* skip blanks between ip and port */
- while (*data == ' ') {
- if (data >= data_end)
- return -1;
- data++;
- }
-
- *port = simple_strtoul(data, &data, 10);
- *ad_end_p = data;
-
- return 0;
-}
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff;
- struct tcphdr _tcph, *th;
- char *data, *data_limit, *ib_ptr;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack_expect *exp;
- u32 seq;
- u_int32_t dcc_ip;
- u_int16_t dcc_port;
- int i, ret = NF_ACCEPT;
- char *addr_beg_p, *addr_end_p;
- typeof(ip_nat_irc_hook) ip_nat_irc;
-
- DEBUGP("entered\n");
-
- /* If packet is coming from IRC server */
- if (dir == IP_CT_DIR_REPLY)
- return NF_ACCEPT;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- DEBUGP("Conntrackinfo = %u\n", ctinfo);
- return NF_ACCEPT;
- }
-
- /* Not a full tcp header? */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return NF_ACCEPT;
-
- /* No data? */
- dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
- if (dataoff >= (*pskb)->len)
- return NF_ACCEPT;
-
- spin_lock_bh(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(*pskb, dataoff,
- (*pskb)->len - dataoff, irc_buffer);
- BUG_ON(ib_ptr == NULL);
-
- data = ib_ptr;
- data_limit = ib_ptr + (*pskb)->len - dataoff;
-
- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
- while (data < (data_limit - (19 + MINMATCHLEN))) {
- if (memcmp(data, "\1DCC ", 5)) {
- data++;
- continue;
- }
-
- data += 5;
- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
-
- DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest));
-
- for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
- if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
- /* no match */
- continue;
- }
-
- DEBUGP("DCC %s detected\n", dccprotos[i]);
- data += strlen(dccprotos[i]);
- /* we have at least
- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
- * data left (== 14/13 bytes) */
- if (parse_dcc((char *)data, data_limit, &dcc_ip,
- &dcc_port, &addr_beg_p, &addr_end_p)) {
- /* unable to parse */
- DEBUGP("unable to parse dcc command\n");
- continue;
- }
- DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
- HIPQUAD(dcc_ip), dcc_port);
-
- /* dcc_ip can be the internal OR external (NAT'ed) IP
- * Tiago Sousa <mirage@kaotik.org> */
- if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
- && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "Forged DCC command from "
- "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
- NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
- HIPQUAD(dcc_ip), dcc_port);
-
- continue;
- }
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- /* save position of address in dcc string,
- * necessary for NAT */
- DEBUGP("tcph->seq = %u\n", th->seq);
- seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
-
- /* We refer to the reverse direction ("!dir")
- * tuples here, because we're expecting
- * something in the other * direction.
- * Doesn't matter unless NAT is happening. */
- exp->tuple = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { ct->tuplehash[!dir].tuple.dst.ip,
- { .tcp = { htons(dcc_port) } },
- IPPROTO_TCP }});
- exp->mask = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { htonl(0xFFFFFFFF),
- { .tcp = { htons(0xFFFF) } }, 0xFF }});
- exp->expectfn = NULL;
- exp->flags = 0;
- ip_nat_irc = rcu_dereference(ip_nat_irc_hook);
- if (ip_nat_irc)
- ret = ip_nat_irc(pskb, ctinfo,
- addr_beg_p - ib_ptr,
- addr_end_p - addr_beg_p,
- exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- goto out;
- } /* for .. NUM_DCCPROTO */
- } /* while data < ... */
-
- out:
- spin_unlock_bh(&irc_buffer_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][sizeof("irc-65535")];
-
-static void ip_conntrack_irc_fini(void);
-
-static int __init ip_conntrack_irc_init(void)
-{
- int i, ret;
- struct ip_conntrack_helper *hlpr;
- char *tmpname;
-
- if (max_dcc_channels < 1) {
- printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
- return -EBUSY;
- }
-
- irc_buffer = kmalloc(65536, GFP_KERNEL);
- if (!irc_buffer)
- return -ENOMEM;
-
- /* If no port given, default to standard irc port */
- if (ports_c == 0)
- ports[ports_c++] = IRC_PORT;
-
- for (i = 0; i < ports_c; i++) {
- hlpr = &irc_helpers[i];
- hlpr->tuple.src.u.tcp.port = htons(ports[i]);
- hlpr->tuple.dst.protonum = IPPROTO_TCP;
- hlpr->mask.src.u.tcp.port = htons(0xFFFF);
- hlpr->mask.dst.protonum = 0xFF;
- hlpr->max_expected = max_dcc_channels;
- hlpr->timeout = dcc_timeout;
- hlpr->me = THIS_MODULE;
- hlpr->help = help;
-
- tmpname = &irc_names[i][0];
- if (ports[i] == IRC_PORT)
- sprintf(tmpname, "irc");
- else
- sprintf(tmpname, "irc-%d", i);
- hlpr->name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret = ip_conntrack_helper_register(hlpr);
-
- if (ret) {
- printk("ip_conntrack_irc: ERROR registering port %d\n",
- ports[i]);
- ip_conntrack_irc_fini();
- return -EBUSY;
- }
- }
- return 0;
-}
-
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void ip_conntrack_irc_fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("unregistering port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&irc_helpers[i]);
- }
- kfree(irc_buffer);
-}
-
-module_init(ip_conntrack_irc_init);
-module_exit(ip_conntrack_irc_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
deleted file mode 100644
index cc6dd49c9da..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * NetBIOS name service broadcast connection tracking helper
- *
- * (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-/*
- * This helper tracks locally originating NetBIOS name service
- * requests by issuing permanent expectations (valid until
- * timing out) matching all reply connections from the
- * destination network. The only NetBIOS specific thing is
- * actually the port number.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#define NMBD_PORT 137
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int timeout = 3;
-module_param(timeout, uint, 0400);
-MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- struct ip_conntrack_expect *exp;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
- struct in_device *in_dev;
- __be32 mask = 0;
-
- /* we're only interested in locally generated packets */
- if ((*pskb)->sk == NULL)
- goto out;
- if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
- goto out;
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- goto out;
-
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(rt->u.dst.dev);
- if (in_dev != NULL) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_broadcast == iph->daddr) {
- mask = ifa->ifa_mask;
- break;
- }
- } endfor_ifa(in_dev);
- }
- rcu_read_unlock();
-
- if (mask == 0)
- goto out;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- goto out;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
- exp->mask.src.ip = mask;
- exp->mask.src.u.udp.port = htons(0xFFFF);
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.udp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
-
- exp->expectfn = NULL;
- exp->flags = IP_CT_EXPECT_PERMANENT;
-
- ip_conntrack_expect_related(exp);
- ip_conntrack_expect_put(exp);
-
- ip_ct_refresh(ct, *pskb, timeout * HZ);
-out:
- return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper helper = {
- .name = "netbios-ns",
- .tuple = {
- .src = {
- .u = {
- .udp = {
- .port = __constant_htons(NMBD_PORT),
- }
- }
- },
- .dst = {
- .protonum = IPPROTO_UDP,
- },
- },
- .mask = {
- .src = {
- .u = {
- .udp = {
- .port = __constant_htons(0xFFFF),
- }
- }
- },
- .dst = {
- .protonum = 0xFF,
- },
- },
- .max_expected = 1,
- .me = THIS_MODULE,
- .help = help,
-};
-
-static int __init ip_conntrack_netbios_ns_init(void)
-{
- helper.timeout = timeout;
- return ip_conntrack_helper_register(&helper);
-}
-
-static void __exit ip_conntrack_netbios_ns_fini(void)
-{
- ip_conntrack_helper_unregister(&helper);
-}
-
-module_init(ip_conntrack_netbios_ns_init);
-module_exit(ip_conntrack_netbios_ns_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
deleted file mode 100644
index 9228b76ccd9..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-/* Connection tracking via netlink socket. Allows for user space
- * protocol helpers and general trouble making from userspace.
- *
- * (C) 2001 by Jay Schulist <jschlst@samba.org>
- * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
- * Initial connection tracking via netlink development funded and
- * generally made possible by Network Robots, Inc. (www.networkrobots.com)
- *
- * Further development of this code funded by Astaro AG (http://www.astaro.com)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-MODULE_LICENSE("GPL");
-
-static char __initdata version[] = "0.90";
-
-static inline int
-ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *proto)
-{
- int ret = 0;
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-
- NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
-
- if (likely(proto->tuple_to_nfattr))
- ret = proto->tuple_to_nfattr(skb, tuple);
-
- NFA_NEST_END(skb, nest_parms);
-
- return ret;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples_ip(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
-
- NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
- NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- int ret;
- struct ip_conntrack_protocol *proto;
-
- ret = ctnetlink_dump_tuples_ip(skb, tuple);
- if (unlikely(ret < 0))
- return ret;
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, tuple, proto);
- ip_conntrack_proto_put(proto);
-
- return ret;
-}
-
-static inline int
-ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 status = htonl((u_int32_t) ct->status);
- NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- long timeout_l = ct->timeout.expires - jiffies;
- __be32 timeout;
-
- if (timeout_l < 0)
- timeout = 0;
- else
- timeout = htonl(timeout_l / HZ);
-
- NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
- struct nfattr *nest_proto;
- int ret;
-
- if (!proto->to_nfattr) {
- ip_conntrack_proto_put(proto);
- return 0;
- }
-
- nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
-
- ret = proto->to_nfattr(skb, nest_proto, ct);
-
- ip_conntrack_proto_put(proto);
-
- NFA_NEST_END(skb, nest_proto);
-
- return ret;
-
-nfattr_failure:
- ip_conntrack_proto_put(proto);
- return -1;
-}
-
-static inline int
-ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- struct nfattr *nest_helper;
-
- if (!ct->helper)
- return 0;
-
- nest_helper = NFA_NEST(skb, CTA_HELP);
- NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
-
- if (ct->helper->to_nfattr)
- ct->helper->to_nfattr(skb, ct);
-
- NFA_NEST_END(skb, nest_helper);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static inline int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
- enum ip_conntrack_dir dir)
-{
- enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
- struct nfattr *nest_count = NFA_NEST(skb, type);
- __be32 tmp;
-
- tmp = htonl(ct->counters[dir].packets);
- NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
-
- tmp = htonl(ct->counters[dir].bytes);
- NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
-
- NFA_NEST_END(skb, nest_count);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-#else
-#define ctnetlink_dump_counters(a, b, c) (0)
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-static inline int
-ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 mark = htonl(ct->mark);
-
- NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-#else
-#define ctnetlink_dump_mark(a, b) (0)
-#endif
-
-static inline int
-ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 id = htonl(ct->id);
- NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- __be32 use = htonl(atomic_read(&ct->ct_general.use));
-
- NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
-static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, int nowait,
- const struct ip_conntrack *ct)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct nfattr *nest_parms;
- unsigned char *b;
-
- b = skb->tail;
-
- event |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
- ctnetlink_dump_protoinfo(skb, ct) < 0 ||
- ctnetlink_dump_helpinfo(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
- ctnetlink_dump_id(skb, ct) < 0 ||
- ctnetlink_dump_use(skb, ct) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct nfattr *nest_parms;
- struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
- struct sk_buff *skb;
- unsigned int type;
- unsigned char *b;
- unsigned int flags = 0, group;
-
- /* ignore our fake conntrack entry */
- if (ct == &ip_conntrack_untracked)
- return NOTIFY_DONE;
-
- if (events & IPCT_DESTROY) {
- type = IPCTNL_MSG_CT_DELETE;
- group = NFNLGRP_CONNTRACK_DESTROY;
- } else if (events & (IPCT_NEW | IPCT_RELATED)) {
- type = IPCTNL_MSG_CT_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
- type = IPCTNL_MSG_CT_NEW;
- group = NFNLGRP_CONNTRACK_UPDATE;
- } else
- return NOTIFY_DONE;
-
- if (!nfnetlink_has_listeners(group))
- return NOTIFY_DONE;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
- return NOTIFY_DONE;
-
- b = skb->tail;
-
- type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = flags;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- if (events & IPCT_DESTROY) {
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
- goto nfattr_failure;
- } else {
- if (ctnetlink_dump_status(skb, ct) < 0)
- goto nfattr_failure;
-
- if (ctnetlink_dump_timeout(skb, ct) < 0)
- goto nfattr_failure;
-
- if (events & IPCT_PROTOINFO
- && ctnetlink_dump_protoinfo(skb, ct) < 0)
- goto nfattr_failure;
-
- if ((events & IPCT_HELPER || ct->helper)
- && ctnetlink_dump_helpinfo(skb, ct) < 0)
- goto nfattr_failure;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
- if ((events & IPCT_MARK || ct->mark)
- && ctnetlink_dump_mark(skb, ct) < 0)
- goto nfattr_failure;
-#endif
-
- if (events & IPCT_COUNTER_FILLING &&
- (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
- goto nfattr_failure;
- }
-
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, group, 0);
- return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
- kfree_skb(skb);
- return NOTIFY_DONE;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-static int ctnetlink_done(struct netlink_callback *cb)
-{
- if (cb->args[1])
- ip_conntrack_put((struct ip_conntrack *)cb->args[1]);
- return 0;
-}
-
-static int
-ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack *ct, *last;
- struct ip_conntrack_tuple_hash *h;
- struct list_head *i;
-
- read_lock_bh(&ip_conntrack_lock);
- last = (struct ip_conntrack *)cb->args[1];
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
-restart:
- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = tuplehash_to_ctrack(h);
- if (cb->args[1]) {
- if (ct != last)
- continue;
- cb->args[1] = 0;
- }
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0) {
- nf_conntrack_get(&ct->ct_general);
- cb->args[1] = (unsigned long)ct;
- goto out;
- }
-#ifdef CONFIG_NF_CT_ACCT
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO)
- memset(&ct->counters, 0, sizeof(ct->counters));
-#endif
- }
- if (cb->args[1]) {
- cb->args[1] = 0;
- goto restart;
- }
- }
-out:
- read_unlock_bh(&ip_conntrack_lock);
- if (last)
- ip_conntrack_put(last);
-
- return skb->len;
-}
-
-static const size_t cta_min_ip[CTA_IP_MAX] = {
- [CTA_IP_V4_SRC-1] = sizeof(__be32),
- [CTA_IP_V4_DST-1] = sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *tb[CTA_IP_MAX];
-
- nfattr_parse_nested(tb, CTA_IP_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
- return -EINVAL;
-
- if (!tb[CTA_IP_V4_SRC-1])
- return -EINVAL;
- tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
-
- if (!tb[CTA_IP_V4_DST-1])
- return -EINVAL;
- tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
-
- return 0;
-}
-
-static const size_t cta_min_proto[CTA_PROTO_MAX] = {
- [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
- [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
- [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
- [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t),
-};
-
-static inline int
-ctnetlink_parse_tuple_proto(struct nfattr *attr,
- struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *tb[CTA_PROTO_MAX];
- struct ip_conntrack_protocol *proto;
- int ret = 0;
-
- nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
- return -EINVAL;
-
- if (!tb[CTA_PROTO_NUM-1])
- return -EINVAL;
- tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-
- if (likely(proto->nfattr_to_tuple))
- ret = proto->nfattr_to_tuple(tb, tuple);
-
- ip_conntrack_proto_put(proto);
-
- return ret;
-}
-
-static inline int
-ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
- enum ctattr_tuple type)
-{
- struct nfattr *tb[CTA_TUPLE_MAX];
- int err;
-
- memset(tuple, 0, sizeof(*tuple));
-
- nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
-
- if (!tb[CTA_TUPLE_IP-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
- if (err < 0)
- return err;
-
- if (!tb[CTA_TUPLE_PROTO-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
- if (err < 0)
- return err;
-
- /* orig and expect tuples get DIR_ORIGINAL */
- if (type == CTA_TUPLE_REPLY)
- tuple->dst.dir = IP_CT_DIR_REPLY;
- else
- tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
- return 0;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
- [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
- [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
-};
-
-static int ctnetlink_parse_nat_proto(struct nfattr *attr,
- const struct ip_conntrack *ct,
- struct ip_nat_range *range)
-{
- struct nfattr *tb[CTA_PROTONAT_MAX];
- struct ip_nat_protocol *npt;
-
- nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
- return -EINVAL;
-
- npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
- if (!npt->nfattr_to_range) {
- ip_nat_proto_put(npt);
- return 0;
- }
-
- /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
- if (npt->nfattr_to_range(tb, range) > 0)
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-
- ip_nat_proto_put(npt);
-
- return 0;
-}
-
-static const size_t cta_min_nat[CTA_NAT_MAX] = {
- [CTA_NAT_MINIP-1] = sizeof(__be32),
- [CTA_NAT_MAXIP-1] = sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_nat(struct nfattr *nat,
- const struct ip_conntrack *ct, struct ip_nat_range *range)
-{
- struct nfattr *tb[CTA_NAT_MAX];
- int err;
-
- memset(range, 0, sizeof(*range));
-
- nfattr_parse_nested(tb, CTA_NAT_MAX, nat);
-
- if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
- return -EINVAL;
-
- if (tb[CTA_NAT_MINIP-1])
- range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
-
- if (!tb[CTA_NAT_MAXIP-1])
- range->max_ip = range->min_ip;
- else
- range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
-
- if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
-
- if (!tb[CTA_NAT_PROTO-1])
- return 0;
-
- err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
- if (err < 0)
- return err;
-
- return 0;
-}
-#endif
-
-static inline int
-ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
-{
- struct nfattr *tb[CTA_HELP_MAX];
-
- nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
-
- if (!tb[CTA_HELP_NAME-1])
- return -EINVAL;
-
- *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
-
- return 0;
-}
-
-static const size_t cta_min[CTA_MAX] = {
- [CTA_STATUS-1] = sizeof(__be32),
- [CTA_TIMEOUT-1] = sizeof(__be32),
- [CTA_MARK-1] = sizeof(__be32),
- [CTA_USE-1] = sizeof(__be32),
- [CTA_ID-1] = sizeof(__be32)
-};
-
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack *ct;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
- else if (cda[CTA_TUPLE_REPLY-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
- else {
- /* Flush the whole table */
- ip_conntrack_flush();
- return 0;
- }
-
- if (err < 0)
- return err;
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h)
- return -ENOENT;
-
- ct = tuplehash_to_ctrack(h);
-
- if (cda[CTA_ID-1]) {
- u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
- if (ct->id != id) {
- ip_conntrack_put(ct);
- return -ENOENT;
- }
- }
- if (del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
-
- ip_conntrack_put(ct);
-
- return 0;
-}
-
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack *ct;
- struct sk_buff *skb2 = NULL;
- int err = 0;
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct nfgenmsg *msg = NLMSG_DATA(nlh);
- u32 rlen;
-
- if (msg->nfgen_family != AF_INET)
- return -EAFNOSUPPORT;
-
-#ifndef CONFIG_IP_NF_CT_ACCT
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
- return -ENOTSUPP;
-#endif
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
-
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
- }
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
- else if (cda[CTA_TUPLE_REPLY-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
- else
- return -EINVAL;
-
- if (err < 0)
- return err;
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h)
- return -ENOENT;
-
- ct = tuplehash_to_ctrack(h);
-
- err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2) {
- ip_conntrack_put(ct);
- return -ENOMEM;
- }
-
- err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, 1, ct);
- ip_conntrack_put(ct);
- if (err <= 0)
- goto free;
-
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
- if (err < 0)
- goto out;
-
- return 0;
-
-free:
- kfree_skb(skb2);
-out:
- return err;
-}
-
-static inline int
-ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- unsigned long d;
- unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
- d = ct->status ^ status;
-
- if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
- /* unchangeable */
- return -EINVAL;
-
- if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
- /* SEEN_REPLY bit can only be set */
- return -EINVAL;
-
-
- if (d & IPS_ASSURED && !(status & IPS_ASSURED))
- /* ASSURED bit can only be set */
- return -EINVAL;
-
- if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-#ifndef CONFIG_IP_NF_NAT_NEEDED
- return -EINVAL;
-#else
- struct ip_nat_range range;
-
- if (cda[CTA_NAT_DST-1]) {
- if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
- &range) < 0)
- return -EINVAL;
- if (ip_nat_initialized(ct,
- HOOK2MANIP(NF_IP_PRE_ROUTING)))
- return -EEXIST;
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
- }
- if (cda[CTA_NAT_SRC-1]) {
- if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
- &range) < 0)
- return -EINVAL;
- if (ip_nat_initialized(ct,
- HOOK2MANIP(NF_IP_POST_ROUTING)))
- return -EEXIST;
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
- }
-#endif
- }
-
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * ip_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
- return 0;
-}
-
-
-static inline int
-ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- struct ip_conntrack_helper *helper;
- char *helpname;
- int err;
-
- /* don't change helper of sibling connections */
- if (ct->master)
- return -EINVAL;
-
- err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
- if (err < 0)
- return err;
-
- helper = __ip_conntrack_helper_find_byname(helpname);
- if (!helper) {
- if (!strcmp(helpname, ""))
- helper = NULL;
- else
- return -EINVAL;
- }
-
- if (ct->helper) {
- if (!helper) {
- /* we had a helper before ... */
- ip_ct_remove_expectations(ct);
- ct->helper = NULL;
- } else {
- /* need to zero data of old helper */
- memset(&ct->help, 0, sizeof(ct->help));
- }
- }
-
- ct->helper = helper;
-
- return 0;
-}
-
-static inline int
-ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
- if (!del_timer(&ct->timeout))
- return -ETIME;
-
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
-
- return 0;
-}
-
-static inline int
-ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
- struct ip_conntrack_protocol *proto;
- u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
- int err = 0;
-
- nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
-
- proto = ip_conntrack_proto_find_get(npt);
-
- if (proto->from_nfattr)
- err = proto->from_nfattr(tb, ct);
- ip_conntrack_proto_put(proto);
-
- return err;
-}
-
-static int
-ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- int err;
-
- if (cda[CTA_HELP-1]) {
- err = ctnetlink_change_helper(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_TIMEOUT-1]) {
- err = ctnetlink_change_timeout(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_STATUS-1]) {
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_PROTOINFO-1]) {
- err = ctnetlink_change_protoinfo(ct, cda);
- if (err < 0)
- return err;
- }
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
- return 0;
-}
-
-static int
-ctnetlink_create_conntrack(struct nfattr *cda[],
- struct ip_conntrack_tuple *otuple,
- struct ip_conntrack_tuple *rtuple)
-{
- struct ip_conntrack *ct;
- int err = -EINVAL;
-
- ct = ip_conntrack_alloc(otuple, rtuple);
- if (ct == NULL || IS_ERR(ct))
- return -ENOMEM;
-
- if (!cda[CTA_TIMEOUT-1])
- goto err;
- ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
- ct->status |= IPS_CONFIRMED;
-
- if (cda[CTA_STATUS-1]) {
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- goto err;
- }
-
- if (cda[CTA_PROTOINFO-1]) {
- err = ctnetlink_change_protoinfo(ct, cda);
- if (err < 0)
- goto err;
- }
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
- ct->helper = ip_conntrack_helper_find_get(rtuple);
-
- add_timer(&ct->timeout);
- ip_conntrack_hash_insert(ct);
-
- if (ct->helper)
- ip_conntrack_helper_put(ct->helper);
-
- return 0;
-
-err:
- ip_conntrack_free(ct);
- return err;
-}
-
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple otuple, rtuple;
- struct ip_conntrack_tuple_hash *h = NULL;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1]) {
- err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_TUPLE_REPLY-1]) {
- err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
- if (err < 0)
- return err;
- }
-
- write_lock_bh(&ip_conntrack_lock);
- if (cda[CTA_TUPLE_ORIG-1])
- h = __ip_conntrack_find(&otuple, NULL);
- else if (cda[CTA_TUPLE_REPLY-1])
- h = __ip_conntrack_find(&rtuple, NULL);
-
- if (h == NULL) {
- write_unlock_bh(&ip_conntrack_lock);
- err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
- return err;
- }
- /* implicit 'else' */
-
- /* we only allow nat config for new conntracks */
- if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
- err = -EINVAL;
- goto out_unlock;
- }
-
- /* We manipulate the conntrack inside the global conntrack table lock,
- * so there's no need to increase the refcount */
- err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
- err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
-
-out_unlock:
- write_unlock_bh(&ip_conntrack_lock);
- return err;
-}
-
-/***********************************************************************
- * EXPECT
- ***********************************************************************/
-
-static inline int
-ctnetlink_exp_dump_tuple(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- enum ctattr_expect type)
-{
- struct nfattr *nest_parms = NFA_NEST(skb, type);
-
- if (ctnetlink_dump_tuples(skb, tuple) < 0)
- goto nfattr_failure;
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_mask(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *mask)
-{
- int ret;
- struct ip_conntrack_protocol *proto;
- struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
-
- ret = ctnetlink_dump_tuples_ip(skb, mask);
- if (unlikely(ret < 0))
- goto nfattr_failure;
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, mask, proto);
- ip_conntrack_proto_put(proto);
- if (unlikely(ret < 0))
- goto nfattr_failure;
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_expect(struct sk_buff *skb,
- const struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *master = exp->master;
- __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
- __be32 id = htonl(exp->id);
-
- if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
- goto nfattr_failure;
- if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
- goto nfattr_failure;
- if (ctnetlink_exp_dump_tuple(skb,
- &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- CTA_EXPECT_MASTER) < 0)
- goto nfattr_failure;
-
- NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
- NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event,
- int nowait,
- const struct ip_conntrack_expect *exp)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- unsigned char *b;
-
- b = skb->tail;
-
- event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- if (ctnetlink_exp_dump_expect(skb, exp) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
- struct sk_buff *skb;
- unsigned int type;
- unsigned char *b;
- int flags = 0;
-
- if (events & IPEXP_NEW) {
- type = IPCTNL_MSG_EXP_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- } else
- return NOTIFY_DONE;
-
- if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
- return NOTIFY_DONE;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
- return NOTIFY_DONE;
-
- b = skb->tail;
-
- type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = flags;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- if (ctnetlink_exp_dump_expect(skb, exp) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
- return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
- kfree_skb(skb);
- return NOTIFY_DONE;
-}
-#endif
-
-static int
-ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack_expect *exp = NULL;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[0];
-
- read_lock_bh(&ip_conntrack_lock);
- list_for_each_prev(i, &ip_conntrack_expect_list) {
- exp = (struct ip_conntrack_expect *) i;
- if (exp->id <= *id)
- continue;
- if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_EXP_NEW,
- 1, exp) < 0)
- goto out;
- *id = exp->id;
- }
-out:
- read_unlock_bh(&ip_conntrack_lock);
-
- return skb->len;
-}
-
-static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
- [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32),
- [CTA_EXPECT_ID-1] = sizeof(__be32)
-};
-
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_expect *exp;
- struct sk_buff *skb2;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct nfgenmsg *msg = NLMSG_DATA(nlh);
- u32 rlen;
-
- if (msg->nfgen_family != AF_INET)
- return -EAFNOSUPPORT;
-
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_exp_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
- }
-
- if (cda[CTA_EXPECT_MASTER-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
- else
- return -EINVAL;
-
- if (err < 0)
- return err;
-
- exp = ip_conntrack_expect_find_get(&tuple);
- if (!exp)
- return -ENOENT;
-
- if (cda[CTA_EXPECT_ID-1]) {
- __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
- if (exp->id != ntohl(id)) {
- ip_conntrack_expect_put(exp);
- return -ENOENT;
- }
- }
-
- err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2)
- goto out;
-
- err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
- nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
- 1, exp);
- if (err <= 0)
- goto free;
-
- ip_conntrack_expect_put(exp);
-
- return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-
-free:
- kfree_skb(skb2);
-out:
- ip_conntrack_expect_put(exp);
- return err;
-}
-
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_expect *exp, *tmp;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_helper *h;
- int err;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (cda[CTA_EXPECT_TUPLE-1]) {
- /* delete a single expect by tuple */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
-
- /* bump usage count to 2 */
- exp = ip_conntrack_expect_find_get(&tuple);
- if (!exp)
- return -ENOENT;
-
- if (cda[CTA_EXPECT_ID-1]) {
- __be32 id =
- *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
- if (exp->id != ntohl(id)) {
- ip_conntrack_expect_put(exp);
- return -ENOENT;
- }
- }
-
- /* after list removal, usage count == 1 */
- ip_conntrack_unexpect_related(exp);
- /* have to put what we 'get' above.
- * after this line usage count == 0 */
- ip_conntrack_expect_put(exp);
- } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
- char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
-
- /* delete all expectations for this helper */
- write_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_helper_find_byname(name);
- if (!h) {
- write_unlock_bh(&ip_conntrack_lock);
- return -EINVAL;
- }
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
- list) {
- if (exp->master->helper == h
- && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
- } else {
- /* This basically means we have to flush everything*/
- write_lock_bh(&ip_conntrack_lock);
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
- list) {
- if (del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
- }
-
- return 0;
-}
-static int
-ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
-{
- return -EOPNOTSUPP;
-}
-
-static int
-ctnetlink_create_expect(struct nfattr *cda[])
-{
- struct ip_conntrack_tuple tuple, mask, master_tuple;
- struct ip_conntrack_tuple_hash *h = NULL;
- struct ip_conntrack_expect *exp;
- struct ip_conntrack *ct;
- int err = 0;
-
- /* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
- if (err < 0)
- return err;
-
- /* Look for master conntrack of this expectation */
- h = ip_conntrack_find_get(&master_tuple, NULL);
- if (!h)
- return -ENOENT;
- ct = tuplehash_to_ctrack(h);
-
- if (!ct->helper) {
- /* such conntrack hasn't got any helper, abort */
- err = -EINVAL;
- goto out;
- }
-
- exp = ip_conntrack_expect_alloc(ct);
- if (!exp) {
- err = -ENOMEM;
- goto out;
- }
-
- exp->expectfn = NULL;
- exp->flags = 0;
- exp->master = ct;
- memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
- memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
-
- err = ip_conntrack_expect_related(exp);
- ip_conntrack_expect_put(exp);
-
-out:
- ip_conntrack_put(tuplehash_to_ctrack(h));
- return err;
-}
-
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_expect *exp;
- int err = 0;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (!cda[CTA_EXPECT_TUPLE-1]
- || !cda[CTA_EXPECT_MASK-1]
- || !cda[CTA_EXPECT_MASTER-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
-
- write_lock_bh(&ip_conntrack_lock);
- exp = __ip_conntrack_expect_find(&tuple);
-
- if (!exp) {
- write_unlock_bh(&ip_conntrack_lock);
- err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_expect(cda);
- return err;
- }
-
- err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
- err = ctnetlink_change_expect(exp, cda);
- write_unlock_bh(&ip_conntrack_lock);
-
- return err;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
-};
-
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
-};
-#endif
-
-static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
- [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX, },
-};
-
-static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
- [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
- .attr_count = CTA_EXPECT_MAX, },
- [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
- .attr_count = CTA_EXPECT_MAX, },
- [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
- .attr_count = CTA_EXPECT_MAX, },
-};
-
-static struct nfnetlink_subsystem ctnl_subsys = {
- .name = "conntrack",
- .subsys_id = NFNL_SUBSYS_CTNETLINK,
- .cb_count = IPCTNL_MSG_MAX,
- .cb = ctnl_cb,
-};
-
-static struct nfnetlink_subsystem ctnl_exp_subsys = {
- .name = "conntrack_expect",
- .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP,
- .cb_count = IPCTNL_MSG_EXP_MAX,
- .cb = ctnl_exp_cb,
-};
-
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
-
-static int __init ctnetlink_init(void)
-{
- int ret;
-
- printk("ctnetlink v%s: registering with nfnetlink.\n", version);
- ret = nfnetlink_subsys_register(&ctnl_subsys);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register with nfnetlink.\n");
- goto err_out;
- }
-
- ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
- goto err_unreg_subsys;
- }
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
- ret = ip_conntrack_register_notifier(&ctnl_notifier);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register notifier.\n");
- goto err_unreg_exp_subsys;
- }
-
- ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
- if (ret < 0) {
- printk("ctnetlink_init: cannot expect register notifier.\n");
- goto err_unreg_notifier;
- }
-#endif
-
- return 0;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
- ip_conntrack_unregister_notifier(&ctnl_notifier);
-err_unreg_exp_subsys:
- nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
-err_unreg_subsys:
- nfnetlink_subsys_unregister(&ctnl_subsys);
-err_out:
- return ret;
-}
-
-static void __exit ctnetlink_exit(void)
-{
- printk("ctnetlink: unregistering from nfnetlink.\n");
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
- ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
- ip_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
- nfnetlink_subsys_unregister(&ctnl_exp_subsys);
- nfnetlink_subsys_unregister(&ctnl_subsys);
- return;
-}
-
-module_init(ctnetlink_init);
-module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
deleted file mode 100644
index 88af82e9865..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
-
-static int generic_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return 1;
-}
-
-static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int generic_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return 0;
-}
-
-/* Print out the private part of the conntrack. */
-static int generic_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *state)
-{
- return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
- return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_generic_protocol =
-{
- .proto = 0,
- .name = "unknown",
- .pkt_to_tuple = generic_pkt_to_tuple,
- .invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
- .print_conntrack = generic_print_conntrack,
- .packet = packet,
- .new = new,
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index ac1c49ef36a..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-
-static DEFINE_RWLOCK(ip_ct_gre_lock);
-
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT (30*HZ)
-#define GRE_STREAM_TIMEOUT (180*HZ)
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
- NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
- NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
- const struct ip_conntrack_tuple *t)
-{
- return ((km->tuple.src.ip == t->src.ip) &&
- (km->tuple.dst.ip == t->dst.ip) &&
- (km->tuple.dst.protonum == t->dst.protonum) &&
- (km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
- struct ip_ct_gre_keymap *km;
- __be16 key = 0;
-
- read_lock_bh(&ip_ct_gre_lock);
- list_for_each_entry(km, &gre_keymap_list, list) {
- if (gre_key_cmpfn(km, t)) {
- key = km->tuple.src.u.gre.key;
- break;
- }
- }
- read_unlock_bh(&ip_ct_gre_lock);
-
- DEBUGP("lookup src key 0x%x up key for ", key);
- DUMP_TUPLE_GRE(t);
-
- return key;
-}
-
-/* add a single keymap entry, associate with specified master ct */
-int
-ip_ct_gre_keymap_add(struct ip_conntrack *ct,
- struct ip_conntrack_tuple *t, int reply)
-{
- struct ip_ct_gre_keymap **exist_km, *km;
-
- if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
- DEBUGP("refusing to add GRE keymap to non-pptp session\n");
- return -1;
- }
-
- if (!reply)
- exist_km = &ct->help.ct_pptp_info.keymap_orig;
- else
- exist_km = &ct->help.ct_pptp_info.keymap_reply;
-
- if (*exist_km) {
- /* check whether it's a retransmission */
- list_for_each_entry(km, &gre_keymap_list, list) {
- if (gre_key_cmpfn(km, t) && km == *exist_km)
- return 0;
- }
- DEBUGP("trying to override keymap_%s for ct %p\n",
- reply? "reply":"orig", ct);
- return -EEXIST;
- }
-
- km = kmalloc(sizeof(*km), GFP_ATOMIC);
- if (!km)
- return -ENOMEM;
-
- memcpy(&km->tuple, t, sizeof(*t));
- *exist_km = km;
-
- DEBUGP("adding new entry %p: ", km);
- DUMP_TUPLE_GRE(&km->tuple);
-
- write_lock_bh(&ip_ct_gre_lock);
- list_add_tail(&km->list, &gre_keymap_list);
- write_unlock_bh(&ip_ct_gre_lock);
-
- return 0;
-}
-
-/* destroy the keymap entries associated with specified master ct */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
-{
- DEBUGP("entering for ct %p\n", ct);
-
- if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
- DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
- return;
- }
-
- write_lock_bh(&ip_ct_gre_lock);
- if (ct->help.ct_pptp_info.keymap_orig) {
- DEBUGP("removing %p from list\n",
- ct->help.ct_pptp_info.keymap_orig);
- list_del(&ct->help.ct_pptp_info.keymap_orig->list);
- kfree(ct->help.ct_pptp_info.keymap_orig);
- ct->help.ct_pptp_info.keymap_orig = NULL;
- }
- if (ct->help.ct_pptp_info.keymap_reply) {
- DEBUGP("removing %p from list\n",
- ct->help.ct_pptp_info.keymap_reply);
- list_del(&ct->help.ct_pptp_info.keymap_reply->list);
- kfree(ct->help.ct_pptp_info.keymap_reply);
- ct->help.ct_pptp_info.keymap_reply = NULL;
- }
- write_unlock_bh(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->dst.u.gre.key = orig->src.u.gre.key;
- tuple->src.u.gre.key = orig->dst.u.gre.key;
-
- return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct gre_hdr_pptp _pgrehdr, *pgrehdr;
- __be16 srckey;
- struct gre_hdr _grehdr, *grehdr;
-
- /* first only delinearize old RFC1701 GRE header */
- grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
- if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
- /* try to behave like "ip_conntrack_proto_generic" */
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
- return 1;
- }
-
- /* PPTP header is variable length, only need up to the call_id field */
- pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
- if (!pgrehdr)
- return 1;
-
- if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
- DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
- return 0;
- }
-
- tuple->dst.u.gre.key = pgrehdr->call_id;
- srckey = gre_keymap_lookup(tuple);
- tuple->src.u.gre.key = srckey;
-
- return 1;
-}
-
-/* print gre part of tuple */
-static int gre_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *ct)
-{
- return seq_printf(s, "timeout=%u, stream_timeout=%u ",
- (ct->proto.gre.timeout / HZ),
- (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info conntrackinfo)
-{
- /* If we've seen traffic both ways, this is a GRE connection.
- * Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.stream_timeout);
- /* Also, more likely to be important, and not a probe. */
- set_bit(IPS_ASSURED_BIT, &ct->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
- const struct sk_buff *skb)
-{
- DEBUGP(": ");
- DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
- /* initialize to sane value. Ideally a conntrack helper
- * (e.g. in case of pptp) is increasing them */
- ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
- ct->proto.gre.timeout = GRE_TIMEOUT;
-
- return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
- struct ip_conntrack *master = ct->master;
- DEBUGP(" entering\n");
-
- if (!master)
- DEBUGP("no master !?!\n");
- else
- ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
- .proto = IPPROTO_GRE,
- .name = "gre",
- .pkt_to_tuple = gre_pkt_to_tuple,
- .invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
- .print_conntrack = gre_print_conntrack,
- .packet = gre_packet,
- .new = gre_new,
- .destroy = gre_destroy,
- .me = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-/* ip_conntrack_proto_gre initialization */
-int __init ip_ct_proto_gre_init(void)
-{
- return ip_conntrack_protocol_register(&gre);
-}
-
-/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
- * init() code on errors.
- */
-void ip_ct_proto_gre_fini(void)
-{
- struct list_head *pos, *n;
-
- /* delete all keymap entries */
- write_lock_bh(&ip_ct_gre_lock);
- list_for_each_safe(pos, n, &gre_keymap_list) {
- DEBUGP("deleting keymap %p at module unload time\n", pos);
- list_del(pos);
- kfree(pos);
- }
- write_unlock_bh(&ip_ct_gre_lock);
-
- ip_conntrack_protocol_unregister(&gre);
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
deleted file mode 100644
index ad70c81a21e..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->dst.u.icmp.type = hp->type;
- tuple->src.u.icmp.id = hp->un.echo.id;
- tuple->dst.u.icmp.code = hp->code;
-
- return 1;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
- [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
- [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
- [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
- [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
- [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
- [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
- [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
- [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- if (orig->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[orig->dst.u.icmp.type])
- return 0;
-
- tuple->src.u.icmp.id = orig->src.u.icmp.id;
- tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
- tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
- }
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int icmp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- static const u_int8_t valid_new[] = {
- [ICMP_ECHO] = 1,
- [ICMP_TIMESTAMP] = 1,
- [ICMP_INFO_REQUEST] = 1,
- [ICMP_ADDRESS] = 1
- };
-
- if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
- /* Can't create a new ICMP `conn' with this. */
- DEBUGP("icmp: can't create new conn with type %u\n",
- conntrack->tuplehash[0].tuple.dst.u.icmp.type);
- DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
- return 0;
- }
- atomic_set(&conntrack->proto.icmp.count, 0);
- return 1;
-}
-
-static int
-icmp_error_message(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple innertuple, origtuple;
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } _in, *inside;
- struct ip_conntrack_protocol *innerproto;
- struct ip_conntrack_tuple_hash *h;
- int dataoff;
-
- IP_NF_ASSERT(skb->nfct == NULL);
-
- /* Not enough header? */
- inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
- if (inside == NULL)
- return -NF_ACCEPT;
-
- /* Ignore ICMP's containing fragments (shouldn't happen) */
- if (inside->ip.frag_off & htons(IP_OFFSET)) {
- DEBUGP("icmp_error_track: fragment of proto %u\n",
- inside->ip.protocol);
- return -NF_ACCEPT;
- }
-
- innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
- /* Are they talking about one of our connections? */
- if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
- DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
- ip_conntrack_proto_put(innerproto);
- return -NF_ACCEPT;
- }
-
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
- DEBUGP("icmp_error_track: Can't invert tuple\n");
- ip_conntrack_proto_put(innerproto);
- return -NF_ACCEPT;
- }
- ip_conntrack_proto_put(innerproto);
-
- *ctinfo = IP_CT_RELATED;
-
- h = ip_conntrack_find_get(&innertuple, NULL);
- if (!h) {
- /* Locally generated ICMPs will match inverted if they
- haven't been SNAT'ed yet */
- /* FIXME: NAT code has to handle half-done double NAT --RR */
- if (hooknum == NF_IP_LOCAL_OUT)
- h = ip_conntrack_find_get(&origtuple, NULL);
-
- if (!h) {
- DEBUGP("icmp_error_track: no match\n");
- return -NF_ACCEPT;
- }
- /* Reverse direction from that found */
- if (DIRECTION(h) != IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- } else {
- if (DIRECTION(h) == IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- }
-
- /* Update skb to refer to this connection */
- skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
- skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct icmphdr _ih, *icmph;
-
- /* Not enough header? */
- icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
- if (icmph == NULL) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* See ip_conntrack_proto_tcp.c */
- if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: bad ICMP checksum ");
- return -NF_ACCEPT;
- }
-
- /*
- * 18 is the highest 'known' ICMP type. Anything else is a mystery
- *
- * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
- * discarded.
- */
- if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: invalid ICMP type ");
- return -NF_ACCEPT;
- }
-
- /* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH
- && icmph->type != ICMP_SOURCE_QUENCH
- && icmph->type != ICMP_TIME_EXCEEDED
- && icmph->type != ICMP_PARAMETERPROB
- && icmph->type != ICMP_REDIRECT)
- return NF_ACCEPT;
-
- return icmp_error_message(skb, ctinfo, hooknum);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int icmp_tuple_to_nfattr(struct sk_buff *skb,
- const struct ip_conntrack_tuple *t)
-{
- NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
- &t->src.u.icmp.id);
- NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
- &t->dst.u.icmp.type);
- NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
- &t->dst.u.icmp.code);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static int icmp_nfattr_to_tuple(struct nfattr *tb[],
- struct ip_conntrack_tuple *tuple)
-{
- if (!tb[CTA_PROTO_ICMP_TYPE-1]
- || !tb[CTA_PROTO_ICMP_CODE-1]
- || !tb[CTA_PROTO_ICMP_ID-1])
- return -EINVAL;
-
- tuple->dst.u.icmp.type =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
- tuple->dst.u.icmp.code =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
- tuple->src.u.icmp.id =
- *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
-
- if (tuple->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[tuple->dst.u.icmp.type])
- return -EINVAL;
-
- return 0;
-}
-#endif
-
-struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
-{
- .proto = IPPROTO_ICMP,
- .name = "icmp",
- .pkt_to_tuple = icmp_pkt_to_tuple,
- .invert_tuple = icmp_invert_tuple,
- .print_tuple = icmp_print_tuple,
- .print_conntrack = icmp_print_conntrack,
- .packet = icmp_packet,
- .new = icmp_new,
- .error = icmp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = icmp_tuple_to_nfattr,
- .nfattr_to_tuple = icmp_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
deleted file mode 100644
index e6942992b2f..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/*
- * Connection tracking protocol helper module for SCTP.
- *
- * SCTP is defined in RFC 2960. References to various sections in this code
- * are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Added support for proc manipulation of timeouts.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/string.h>
-#include <linux/seq_file.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
-static const char *sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
-
-static const unsigned int * sctp_timeouts[]
-= { NULL, /* SCTP_CONNTRACK_NONE */
- &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
- &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
- &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
- &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
- &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
- &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
- &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
- };
-
-#define sNO SCTP_CONNTRACK_NONE
-#define sCL SCTP_CONNTRACK_CLOSED
-#define sCW SCTP_CONNTRACK_COOKIE_WAIT
-#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
-#define sES SCTP_CONNTRACK_ESTABLISHED
-#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
-#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
-#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
-#define sIV SCTP_CONNTRACK_MAX
-
-/*
- These are the descriptions of the states:
-
-NOTE: These state names are tantalizingly similar to the states of an
-SCTP endpoint. But the interpretation of the states is a little different,
-considering that these are the states of the connection and not of an end
-point. Please note the subtleties. -Kiran
-
-NONE - Nothing so far.
-COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
- an INIT_ACK chunk in the reply direction.
-COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
-ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
-SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
-SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
-SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
- to that of the SHUTDOWN chunk.
-CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
- the SHUTDOWN chunk. Connection is closed.
-*/
-
-/* TODO
- - I have assumed that the first INIT is in the original direction.
- This messes things when an INIT comes in the reply direction in CLOSED
- state.
- - Check the error type in the reply dir before transitioning from
-cookie echoed to closed.
- - Sec 5.2.4 of RFC 2960
- - Multi Homing support.
-*/
-
-/* SCTP conntrack state transitions */
-static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
- {
-/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
- },
- {
-/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
- }
-};
-
-static int sctp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- sctp_sctphdr_t _hdr, *hp;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.sctp.port = hp->source;
- tuple->dst.u.sctp.port = hp->dest;
- return 1;
-}
-
-static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- tuple->src.u.sctp.port = orig->dst.u.sctp.port;
- tuple->dst.u.sctp.port = orig->src.u.sctp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int sctp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- enum sctp_conntrack state;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- read_lock_bh(&sctp_lock);
- state = conntrack->proto.sctp.state;
- read_unlock_bh(&sctp_lock);
-
- return seq_printf(s, "%s ", sctp_conntrack_names[state]);
-}
-
-#define for_each_sctp_chunk(skb, sch, _sch, offset, count) \
-for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \
- offset < skb->len && \
- (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
- offset += (ntohs(sch->length) + 3) & ~3, count++)
-
-/* Some validity checks to make sure the chunks are fine */
-static int do_basic_checks(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- char *map)
-{
- u_int32_t offset, count;
- sctp_chunkhdr_t _sch, *sch;
- int flag;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- flag = 0;
-
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
-
- if (sch->type == SCTP_CID_INIT
- || sch->type == SCTP_CID_INIT_ACK
- || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- flag = 1;
- }
-
- /*
- * Cookie Ack/Echo chunks not the first OR
- * Init / Init Ack / Shutdown compl chunks not the only chunks
- * OR zero-length.
- */
- if (((sch->type == SCTP_CID_COOKIE_ACK
- || sch->type == SCTP_CID_COOKIE_ECHO
- || flag)
- && count !=0) || !sch->length) {
- DEBUGP("Basic checks failed\n");
- return 1;
- }
-
- if (map) {
- set_bit(sch->type, (void *)map);
- }
- }
-
- DEBUGP("Basic checks passed\n");
- return count == 0;
-}
-
-static int new_state(enum ip_conntrack_dir dir,
- enum sctp_conntrack cur_state,
- int chunk_type)
-{
- int i;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- DEBUGP("Chunk type: %d\n", chunk_type);
-
- switch (chunk_type) {
- case SCTP_CID_INIT:
- DEBUGP("SCTP_CID_INIT\n");
- i = 0; break;
- case SCTP_CID_INIT_ACK:
- DEBUGP("SCTP_CID_INIT_ACK\n");
- i = 1; break;
- case SCTP_CID_ABORT:
- DEBUGP("SCTP_CID_ABORT\n");
- i = 2; break;
- case SCTP_CID_SHUTDOWN:
- DEBUGP("SCTP_CID_SHUTDOWN\n");
- i = 3; break;
- case SCTP_CID_SHUTDOWN_ACK:
- DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
- i = 4; break;
- case SCTP_CID_ERROR:
- DEBUGP("SCTP_CID_ERROR\n");
- i = 5; break;
- case SCTP_CID_COOKIE_ECHO:
- DEBUGP("SCTP_CID_COOKIE_ECHO\n");
- i = 6; break;
- case SCTP_CID_COOKIE_ACK:
- DEBUGP("SCTP_CID_COOKIE_ACK\n");
- i = 7; break;
- case SCTP_CID_SHUTDOWN_COMPLETE:
- DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
- i = 8; break;
- default:
- /* Other chunks like DATA, SACK, HEARTBEAT and
- its ACK do not cause a change in state */
- DEBUGP("Unknown chunk type, Will stay in %s\n",
- sctp_conntrack_names[cur_state]);
- return cur_state;
- }
-
- DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
- dir, sctp_conntrack_names[cur_state], chunk_type,
- sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
- return sctp_conntracks[dir][i][cur_state];
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int sctp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- enum sctp_conntrack newconntrack, oldsctpstate;
- struct iphdr *iph = skb->nh.iph;
- sctp_sctphdr_t _sctph, *sh;
- sctp_chunkhdr_t _sch, *sch;
- u_int32_t offset, count;
- char map[256 / sizeof (char)] = {0};
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return -1;
-
- if (do_basic_checks(conntrack, skb, map) != 0)
- return -1;
-
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, (void *)map)
- && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
- && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
- && !test_bit(SCTP_CID_ABORT, (void *)map)
- && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
- && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
- DEBUGP("Verification tag check failed\n");
- return -1;
- }
-
- oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- write_lock_bh(&sctp_lock);
-
- /* Special cases of Verification tag check (Sec 8.5.1) */
- if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
- if (sh->vtag != 0) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
- && !(sh->vtag == conntrack->proto.sctp.vtag
- [1 - CTINFO2DIR(ctinfo)])) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
- && !(sh->vtag == conntrack->proto.sctp.vtag
- [1 - CTINFO2DIR(ctinfo)]
- && (sch->flags & 1))) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- }
-
- oldsctpstate = conntrack->proto.sctp.state;
- newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
-
- /* Invalid */
- if (newconntrack == SCTP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
- CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
- write_unlock_bh(&sctp_lock);
- return -1;
- }
-
- /* If it is an INIT or an INIT ACK note down the vtag */
- if (sch->type == SCTP_CID_INIT
- || sch->type == SCTP_CID_INIT_ACK) {
- sctp_inithdr_t _inithdr, *ih;
-
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- DEBUGP("Setting vtag %x for dir %d\n",
- ih->init_tag, !CTINFO2DIR(ctinfo));
- conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
- }
-
- conntrack->proto.sctp.state = newconntrack;
- if (oldsctpstate != newconntrack)
- ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
- write_unlock_bh(&sctp_lock);
- }
-
- ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
- if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
- && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
- && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
- DEBUGP("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- }
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int sctp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- enum sctp_conntrack newconntrack;
- struct iphdr *iph = skb->nh.iph;
- sctp_sctphdr_t _sctph, *sh;
- sctp_chunkhdr_t _sch, *sch;
- u_int32_t offset, count;
- char map[256 / sizeof (char)] = {0};
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return 0;
-
- if (do_basic_checks(conntrack, skb, map) != 0)
- return 0;
-
- /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
- if ((test_bit (SCTP_CID_ABORT, (void *)map))
- || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
- || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
- return 0;
- }
-
- newconntrack = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- /* Don't need lock here: this conntrack not in circulation yet */
- newconntrack = new_state (IP_CT_DIR_ORIGINAL,
- SCTP_CONNTRACK_NONE, sch->type);
-
- /* Invalid: delete conntrack */
- if (newconntrack == SCTP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
- return 0;
- }
-
- /* Copy the vtag into the state info */
- if (sch->type == SCTP_CID_INIT) {
- if (sh->vtag == 0) {
- sctp_inithdr_t _inithdr, *ih;
-
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
- return 0;
-
- DEBUGP("Setting vtag %x for new conn\n",
- ih->init_tag);
-
- conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
- ih->init_tag;
- } else {
- /* Sec 8.5.1 (A) */
- return 0;
- }
- }
- /* If it is a shutdown ack OOTB packet, we expect a return
- shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
- else {
- DEBUGP("Setting vtag %x for new conn OOTB\n",
- sh->vtag);
- conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
- }
-
- conntrack->proto.sctp.state = newconntrack;
- }
-
- return 1;
-}
-
-static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
- .proto = IPPROTO_SCTP,
- .name = "sctp",
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
- .print_conntrack = sctp_print_conntrack,
- .packet = sctp_packet,
- .new = sctp_new,
- .destroy = NULL,
- .me = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-#ifdef CONFIG_SYSCTL
-static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
- .procname = "ip_conntrack_sctp_timeout_closed",
- .data = &ip_ct_sctp_timeout_closed,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
- .procname = "ip_conntrack_sctp_timeout_cookie_wait",
- .data = &ip_ct_sctp_timeout_cookie_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
- .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
- .data = &ip_ct_sctp_timeout_cookie_echoed,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
- .procname = "ip_conntrack_sctp_timeout_established",
- .data = &ip_ct_sctp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
- .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
- .data = &ip_ct_sctp_timeout_shutdown_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
- .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
- .data = &ip_ct_sctp_timeout_shutdown_recd,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
- .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
- .data = &ip_ct_sctp_timeout_shutdown_ack_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_netfilter_table[] = {
- {
- .ctl_name = NET_IPV4_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = ip_ct_sysctl_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ip_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ip_ct_ipv4_table,
- },
- { .ctl_name = 0 }
-};
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-#endif
-
-static int __init ip_conntrack_proto_sctp_init(void)
-{
- int ret;
-
- ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
- if (ret) {
- printk("ip_conntrack_proto_sctp: protocol register failed\n");
- goto out;
- }
-
-#ifdef CONFIG_SYSCTL
- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
- if (ip_ct_sysctl_header == NULL) {
- ret = -ENOMEM;
- printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
- goto cleanup;
- }
-#endif
-
- return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup:
- ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#endif
- out:
- DEBUGP("SCTP conntrack module loading %s\n",
- ret ? "failed": "succeeded");
- return ret;
-}
-
-static void __exit ip_conntrack_proto_sctp_fini(void)
-{
- ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
- DEBUGP("SCTP conntrack module unloaded\n");
-}
-
-module_init(ip_conntrack_proto_sctp_init);
-module_exit(ip_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
deleted file mode 100644
index 0a72eab1462..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ /dev/null
@@ -1,1164 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- * - Real stateful connection tracking
- * - Modified state transitions table
- * - Window scaling support added
- * - SACK support added
- *
- * Willy Tarreau:
- * - State table bugfixes
- * - More robust state changes
- * - Tuning timer parameters
- *
- * version 2.2
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/spinlock.h>
-
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
-/* "Be conservative in what you do,
- be liberal in what you accept from others."
- If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal __read_mostly = 0;
-
-/* If it is set to zero, we disable picking up already established
- connections. */
-int ip_ct_tcp_loose __read_mostly = 1;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
- ACK from the destination. If this number is reached, a shorter timer
- will be started. */
-int ip_ct_tcp_max_retrans __read_mostly = 3;
-
- /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR */
-
-static const char *tcp_conntrack_names[] = {
- "NONE",
- "SYN_SENT",
- "SYN_RECV",
- "ESTABLISHED",
- "FIN_WAIT",
- "CLOSE_WAIT",
- "LAST_ACK",
- "TIME_WAIT",
- "CLOSE",
- "LISTEN"
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
-unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
-
-/* RFC1122 says the R2 limit should be at least 100 seconds.
- Linux uses 15 packets as limit, which corresponds
- to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
-
-static const unsigned int * tcp_timeouts[]
-= { NULL, /* TCP_CONNTRACK_NONE */
- &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
- &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
- &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
- &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
- &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
- &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
- &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
- &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
- NULL, /* TCP_CONNTRACK_LISTEN */
- };
-
-#define sNO TCP_CONNTRACK_NONE
-#define sSS TCP_CONNTRACK_SYN_SENT
-#define sSR TCP_CONNTRACK_SYN_RECV
-#define sES TCP_CONNTRACK_ESTABLISHED
-#define sFW TCP_CONNTRACK_FIN_WAIT
-#define sCW TCP_CONNTRACK_CLOSE_WAIT
-#define sLA TCP_CONNTRACK_LAST_ACK
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
-#define sIV TCP_CONNTRACK_MAX
-#define sIG TCP_CONNTRACK_IGNORE
-
-/* What TCP flags are set from RST/SYN/FIN/ACK. */
-enum tcp_bit_set {
- TCP_SYN_SET,
- TCP_SYNACK_SET,
- TCP_FIN_SET,
- TCP_ACK_SET,
- TCP_RST_SET,
- TCP_NONE_SET,
-};
-
-/*
- * The TCP state transition table needs a few words...
- *
- * We are the man in the middle. All the packets go through us
- * but might get lost in transit to the destination.
- * It is assumed that the destinations can't receive segments
- * we haven't seen.
- *
- * The checked segment is in window, but our windows are *not*
- * equivalent with the ones of the sender/receiver. We always
- * try to guess the state of the current sender.
- *
- * The meaning of the states are:
- *
- * NONE: initial state
- * SYN_SENT: SYN-only packet seen
- * SYN_RECV: SYN-ACK packet seen
- * ESTABLISHED: ACK packet seen
- * FIN_WAIT: FIN packet seen
- * CLOSE_WAIT: ACK seen (after FIN)
- * LAST_ACK: FIN seen (after FIN)
- * TIME_WAIT: last ACK seen
- * CLOSE: closed connection
- *
- * LISTEN state is not used.
- *
- * Packets marked as IGNORED (sIG):
- * if they may be either invalid or valid
- * and the receiver may send back a connection
- * closing RST or a SYN/ACK.
- *
- * Packets marked as INVALID (sIV):
- * if they are invalid
- * or we do not support the request (simultaneous open)
- */
-static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
- {
-/* ORIGINAL */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
-/*
- * sNO -> sSS Initialize a new connection
- * sSS -> sSS Retransmitted SYN
- * sSR -> sIG Late retransmitted SYN?
- * sES -> sIG Error: SYNs in window outside the SYN_SENT state
- * are errors. Receiver will reply with RST
- * and close the connection.
- * Or we are not in sync and hold a dead connection.
- * sFW -> sIG
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sSS Reopened connection (RFC 1122).
- * sCL -> sSS
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * A SYN/ACK from the client is always invalid:
- * - either it tries to set up a simultaneous open, which is
- * not supported;
- * - or the firewall has just been inserted between the two hosts
- * during the session set-up. The SYN will be retransmitted
- * by the true client (or it'll time out).
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- * sNO -> sIV Too late and no reason to do anything...
- * sSS -> sIV Client migth not send FIN in this state:
- * we enforce waiting for a SYN/ACK reply first.
- * sSR -> sFW Close started.
- * sES -> sFW
- * sFW -> sLA FIN seen in both directions, waiting for
- * the last ACK.
- * Migth be a retransmitted FIN as well...
- * sCW -> sLA
- * sLA -> sLA Retransmitted FIN. Remain in the same state.
- * sTW -> sTW
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- * sNO -> sES Assumed.
- * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
- * sSR -> sES Established state is reached.
- * sES -> sES :-)
- * sFW -> sCW Normal close request answered by ACK.
- * sCW -> sCW
- * sLA -> sTW Last ACK detected.
- * sTW -> sTW Retransmitted last ACK. Remain in the same state.
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- },
- {
-/* REPLY */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * sNO -> sIV Never reached.
- * sSS -> sIV Simultaneous open, not supported
- * sSR -> sIV Simultaneous open, not supported.
- * sES -> sIV Server may not initiate a connection.
- * sFW -> sIV
- * sCW -> sIV
- * sLA -> sIV
- * sTW -> sIV Reopened connection, but server may not do it.
- * sCL -> sIV
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
-/*
- * sSS -> sSR Standard open.
- * sSR -> sSR Retransmitted SYN/ACK.
- * sES -> sIG Late retransmitted SYN/ACK?
- * sFW -> sIG Might be SYN/ACK answering ignored SYN
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sIG
- * sCL -> sIG
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- * sSS -> sIV Server might not send FIN in this state.
- * sSR -> sFW Close started.
- * sES -> sFW
- * sFW -> sLA FIN seen in both directions.
- * sCW -> sLA
- * sLA -> sLA Retransmitted FIN.
- * sTW -> sTW
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- * sSS -> sIG Might be a half-open connection.
- * sSR -> sSR Might answer late resent SYN.
- * sES -> sES :-)
- * sFW -> sCW Normal close request answered by ACK.
- * sCW -> sCW
- * sLA -> sTW Last ACK detected.
- * sTW -> sTW Retransmitted last ACK.
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- }
-};
-
-static int tcp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct tcphdr _hdr, *hp;
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.tcp.port = hp->source;
- tuple->dst.u.tcp.port = hp->dest;
-
- return 1;
-}
-
-static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.tcp.port = orig->dst.u.tcp.port;
- tuple->dst.u.tcp.port = orig->src.u.tcp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int tcp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- enum tcp_conntrack state;
-
- read_lock_bh(&tcp_lock);
- state = conntrack->proto.tcp.state;
- read_unlock_bh(&tcp_lock);
-
- return seq_printf(s, "%s ", tcp_conntrack_names[state]);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
- const struct ip_conntrack *ct)
-{
- struct nfattr *nest_parms;
-
- read_lock_bh(&tcp_lock);
- nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
- NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
- &ct->proto.tcp.state);
- read_unlock_bh(&tcp_lock);
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- read_unlock_bh(&tcp_lock);
- return -1;
-}
-
-static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
-};
-
-static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
-{
- struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
- struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
-
- /* updates could not contain anything about the private
- * protocol info, in that case skip the parsing */
- if (!attr)
- return 0;
-
- nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
- return -EINVAL;
-
- if (!tb[CTA_PROTOINFO_TCP_STATE-1])
- return -EINVAL;
-
- write_lock_bh(&tcp_lock);
- ct->proto.tcp.state =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
- write_unlock_bh(&tcp_lock);
-
- return 0;
-}
-#endif
-
-static unsigned int get_conntrack_index(const struct tcphdr *tcph)
-{
- if (tcph->rst) return TCP_RST_SET;
- else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
- else if (tcph->fin) return TCP_FIN_SET;
- else if (tcph->ack) return TCP_ACK_SET;
- else return TCP_NONE_SET;
-}
-
-/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
- in IP Filter' by Guido van Rooij.
-
- http://www.nluug.nl/events/sane2000/papers.html
- http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
-
- The boundaries and the conditions are changed according to RFC793:
- the packet must intersect the window (i.e. segments may be
- after the right or before the left edge) and thus receivers may ACK
- segments after the right edge of the window.
-
- td_maxend = max(sack + max(win,1)) seen in reply packets
- td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
- td_maxwin += seq + len - sender.td_maxend
- if seq + len > sender.td_maxend
- td_end = max(seq + len) seen in sent packets
-
- I. Upper bound for valid data: seq <= sender.td_maxend
- II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
- III. Upper bound for valid ack: sack <= receiver.td_end
- IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
-
- where sack is the highest right edge of sack block found in the packet.
-
- The upper bound limit for a valid ack is not ignored -
- we doesn't have to deal with fragments.
-*/
-
-static inline __u32 segment_seq_plus_len(__u32 seq,
- size_t len,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- return (seq + len - (iph->ihl + tcph->doff)*4
- + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
-}
-
-/* Fixme: what about big packets? */
-#define MAXACKWINCONST 66000
-#define MAXACKWINDOW(sender) \
- ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
- : MAXACKWINCONST)
-
-/*
- * Simplified tcp_parse_options routine from tcp_input.c
- */
-static void tcp_options(const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph,
- struct ip_ct_tcp_state *state)
-{
- unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
- unsigned char *ptr;
- int length = (tcph->doff*4) - sizeof(struct tcphdr);
-
- if (!length)
- return;
-
- ptr = skb_header_pointer(skb,
- (iph->ihl * 4) + sizeof(struct tcphdr),
- length, buff);
- BUG_ON(ptr == NULL);
-
- state->td_scale =
- state->flags = 0;
-
- while (length > 0) {
- int opcode=*ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
-
- if (opcode == TCPOPT_SACK_PERM
- && opsize == TCPOLEN_SACK_PERM)
- state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
- else if (opcode == TCPOPT_WINDOW
- && opsize == TCPOLEN_WINDOW) {
- state->td_scale = *(u_int8_t *)ptr;
-
- if (state->td_scale > 14) {
- /* See RFC1323 */
- state->td_scale = 14;
- }
- state->flags |=
- IP_CT_TCP_FLAG_WINDOW_SCALE;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- }
-}
-
-static void tcp_sack(const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph,
- __u32 *sack)
-{
- unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
- unsigned char *ptr;
- int length = (tcph->doff*4) - sizeof(struct tcphdr);
- __u32 tmp;
-
- if (!length)
- return;
-
- ptr = skb_header_pointer(skb,
- (iph->ihl * 4) + sizeof(struct tcphdr),
- length, buff);
- BUG_ON(ptr == NULL);
-
- /* Fast path for timestamp-only option */
- if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__be32 *)ptr ==
- __constant_htonl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
- return;
-
- while (length > 0) {
- int opcode=*ptr++;
- int opsize, i;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
-
- if (opcode == TCPOPT_SACK
- && opsize >= (TCPOLEN_SACK_BASE
- + TCPOLEN_SACK_PERBLOCK)
- && !((opsize - TCPOLEN_SACK_BASE)
- % TCPOLEN_SACK_PERBLOCK)) {
- for (i = 0;
- i < (opsize - TCPOLEN_SACK_BASE);
- i += TCPOLEN_SACK_PERBLOCK) {
- tmp = ntohl(*((__be32 *)(ptr+i)+1));
-
- if (after(tmp, *sack))
- *sack = tmp;
- }
- return;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- }
-}
-
-static int tcp_in_window(struct ip_ct_tcp *state,
- enum ip_conntrack_dir dir,
- unsigned int index,
- const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- struct ip_ct_tcp_state *sender = &state->seen[dir];
- struct ip_ct_tcp_state *receiver = &state->seen[!dir];
- __u32 seq, ack, sack, end, win, swin;
- int res;
-
- /*
- * Get the required data from the packet.
- */
- seq = ntohl(tcph->seq);
- ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
- end = segment_seq_plus_len(seq, skb->len, iph, tcph);
-
- if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
- tcp_sack(skb, iph, tcph, &sack);
-
- DEBUGP("tcp_in_window: START\n");
- DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "seq=%u ack=%u sack=%u win=%u end=%u\n",
- NIPQUAD(iph->saddr), ntohs(tcph->source),
- NIPQUAD(iph->daddr), ntohs(tcph->dest),
- seq, ack, sack, win, end);
- DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
- if (sender->td_end == 0) {
- /*
- * Initialize sender data.
- */
- if (tcph->syn && tcph->ack) {
- /*
- * Outgoing SYN-ACK in reply to a SYN.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, iph, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- sender->td_end = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
- sender->td_maxend = end + sender->td_maxwin;
- }
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
- /*
- * RFC 793: "if a TCP is reinitialized ... then it need
- * not wait at all; it must only be sure to use sequence
- * numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, iph, tcph, sender);
- }
-
- if (!(tcph->ack)) {
- /*
- * If there is no ACK, just pretend it was set and OK.
- */
- ack = sack = receiver->td_end;
- } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
- (TCP_FLAG_ACK|TCP_FLAG_RST))
- && (ack == 0)) {
- /*
- * Broken TCP stacks, that set ACK in RST packets as well
- * with zero ack value.
- */
- ack = sack = receiver->td_end;
- }
-
- if (seq == end
- && (!tcph->rst
- || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
- /*
- * Packets contains no data: we assume it is valid
- * and check the ack value only.
- * However RST segments are always validated by their
- * SEQ number, except when seq == 0 (reset sent answering
- * SYN.
- */
- seq = end = sender->td_end;
-
- DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "seq=%u ack=%u sack =%u win=%u end=%u\n",
- NIPQUAD(iph->saddr), ntohs(tcph->source),
- NIPQUAD(iph->daddr), ntohs(tcph->dest),
- seq, ack, sack, win, end);
- DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
- DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
- before(seq, sender->td_maxend + 1),
- after(end, sender->td_end - receiver->td_maxwin - 1),
- before(sack, receiver->td_end + 1),
- after(ack, receiver->td_end - MAXACKWINDOW(sender)));
-
- if (before(seq, sender->td_maxend + 1) &&
- after(end, sender->td_end - receiver->td_maxwin - 1) &&
- before(sack, receiver->td_end + 1) &&
- after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
- /*
- * Take into account window scaling (RFC 1323).
- */
- if (!tcph->syn)
- win <<= sender->td_scale;
-
- /*
- * Update sender data.
- */
- swin = win + (sack - ack);
- if (sender->td_maxwin < swin)
- sender->td_maxwin = swin;
- if (after(end, sender->td_end))
- sender->td_end = end;
- /*
- * Update receiver data.
- */
- if (after(end, sender->td_maxend))
- receiver->td_maxwin += end - sender->td_maxend;
- if (after(sack + win, receiver->td_maxend - 1)) {
- receiver->td_maxend = sack + win;
- if (win == 0)
- receiver->td_maxend++;
- }
-
- /*
- * Check retransmissions.
- */
- if (index == TCP_ACK_SET) {
- if (state->last_dir == dir
- && state->last_seq == seq
- && state->last_ack == ack
- && state->last_end == end
- && state->last_win == win)
- state->retrans++;
- else {
- state->last_dir = dir;
- state->last_seq = seq;
- state->last_ack = ack;
- state->last_end = end;
- state->last_win = win;
- state->retrans = 0;
- }
- }
- res = 1;
- } else {
- res = 0;
- if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
- ip_ct_tcp_be_liberal)
- res = 1;
- if (!res && LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: %s ",
- before(seq, sender->td_maxend + 1) ?
- after(end, sender->td_end - receiver->td_maxwin - 1) ?
- before(sack, receiver->td_end + 1) ?
- after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
- : "ACK is under the lower bound (possible overly delayed ACK)"
- : "ACK is over the upper bound (ACKed data not seen yet)"
- : "SEQ is under the lower bound (already ACKed data retransmitted)"
- : "SEQ is over the upper bound (over the window of the receiver)");
- }
-
- DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
- "receiver end=%u maxend=%u maxwin=%u\n",
- res, sender->td_end, sender->td_maxend, sender->td_maxwin,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
-
- return res;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-/* Update sender->td_end after NAT successfully mangled the packet */
-void ip_conntrack_tcp_update(struct sk_buff *skb,
- struct ip_conntrack *conntrack,
- enum ip_conntrack_dir dir)
-{
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
- __u32 end;
-#ifdef DEBUGP_VARS
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
-
- end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
-
- write_lock_bh(&tcp_lock);
- /*
- * We have to worry for the ack in the reply packet only...
- */
- if (after(end, conntrack->proto.tcp.seen[dir].td_end))
- conntrack->proto.tcp.seen[dir].td_end = end;
- conntrack->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
- DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-}
-
-#endif
-
-#define TH_FIN 0x01
-#define TH_SYN 0x02
-#define TH_RST 0x04
-#define TH_PUSH 0x08
-#define TH_ACK 0x10
-#define TH_URG 0x20
-#define TH_ECE 0x40
-#define TH_CWR 0x80
-
-/* table of valid flag combinations - ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
-{
- [TH_SYN] = 1,
- [TH_SYN|TH_PUSH] = 1,
- [TH_SYN|TH_URG] = 1,
- [TH_SYN|TH_PUSH|TH_URG] = 1,
- [TH_SYN|TH_ACK] = 1,
- [TH_SYN|TH_ACK|TH_PUSH] = 1,
- [TH_RST] = 1,
- [TH_RST|TH_ACK] = 1,
- [TH_RST|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK] = 1,
- [TH_ACK] = 1,
- [TH_ACK|TH_PUSH] = 1,
- [TH_ACK|TH_URG] = 1,
- [TH_ACK|TH_URG|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_URG] = 1,
- [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
-};
-
-/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr _tcph, *th;
- unsigned int tcplen = skb->len - iph->ihl * 4;
- u_int8_t tcpflags;
-
- /* Smaller that minimal TCP header? */
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* Not whole TCP header or malformed packet */
- if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: truncated/malformed packet ");
- return -NF_ACCEPT;
- }
-
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because it is assumed to be correct.
- */
- /* FIXME: Source route IP option packets --RR */
- if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: bad TCP checksum ");
- return -NF_ACCEPT;
- }
-
- /* Check TCP flags. */
- tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
- if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid TCP flag combination ");
- return -NF_ACCEPT;
- }
-
- return NF_ACCEPT;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- enum tcp_conntrack new_state, old_state;
- enum ip_conntrack_dir dir;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
- unsigned long timeout;
- unsigned int index;
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- write_lock_bh(&tcp_lock);
- old_state = conntrack->proto.tcp.state;
- dir = CTINFO2DIR(ctinfo);
- index = get_conntrack_index(th);
- new_state = tcp_conntracks[dir][index][old_state];
-
- switch (new_state) {
- case TCP_CONNTRACK_IGNORE:
- /* Ignored packets:
- *
- * a) SYN in ORIGINAL
- * b) SYN/ACK in REPLY
- * c) ACK in reply direction after initial SYN in original.
- */
- if (index == TCP_SYNACK_SET
- && conntrack->proto.tcp.last_index == TCP_SYN_SET
- && conntrack->proto.tcp.last_dir != dir
- && ntohl(th->ack_seq) ==
- conntrack->proto.tcp.last_end) {
- /* This SYN/ACK acknowledges a SYN that we earlier
- * ignored as invalid. This means that the client and
- * the server are both in sync, while the firewall is
- * not. We kill this session and block the SYN/ACK so
- * that the client cannot but retransmit its SYN and
- * thus initiate a clean new session.
- */
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL, "ip_ct_tcp: "
- "killing out of sync session ");
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return -NF_DROP;
- }
- conntrack->proto.tcp.last_index = index;
- conntrack->proto.tcp.last_dir = dir;
- conntrack->proto.tcp.last_seq = ntohl(th->seq);
- conntrack->proto.tcp.last_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
-
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid packet ignored ");
- return NF_ACCEPT;
- case TCP_CONNTRACK_MAX:
- /* Invalid packet */
- DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
- dir, get_conntrack_index(th),
- old_state);
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid state ");
- return -NF_ACCEPT;
- case TCP_CONNTRACK_SYN_SENT:
- if (old_state < TCP_CONNTRACK_TIME_WAIT)
- break;
- if ((conntrack->proto.tcp.seen[dir].flags &
- IP_CT_TCP_FLAG_CLOSE_INIT)
- || after(ntohl(th->seq),
- conntrack->proto.tcp.seen[dir].td_end)) {
- /* Attempt to reopen a closed connection.
- * Delete this connection and look up again. */
- write_unlock_bh(&tcp_lock);
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return -NF_REPEAT;
- } else {
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL, "ip_ct_tcp: invalid SYN");
- return -NF_ACCEPT;
- }
- case TCP_CONNTRACK_CLOSE:
- if (index == TCP_RST_SET
- && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_SYN_SET)
- || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_ACK_SET))
- && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
- /* RST sent to invalid SYN or ACK we had let through
- * at a) and c) above:
- *
- * a) SYN was in window then
- * c) we hold a half-open connection.
- *
- * Delete our connection entry.
- * We skip window checking, because packet might ACK
- * segments we ignored. */
- goto in_window;
- }
- /* Just fall through */
- default:
- /* Keep compilers happy. */
- break;
- }
-
- if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
- skb, iph, th)) {
- write_unlock_bh(&tcp_lock);
- return -NF_ACCEPT;
- }
- in_window:
- /* From now on we have got in-window packets */
- conntrack->proto.tcp.last_index = index;
-
- DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest),
- (th->syn ? 1 : 0), (th->ack ? 1 : 0),
- (th->fin ? 1 : 0), (th->rst ? 1 : 0),
- old_state, new_state);
-
- conntrack->proto.tcp.state = new_state;
- if (old_state != new_state
- && (new_state == TCP_CONNTRACK_FIN_WAIT
- || new_state == TCP_CONNTRACK_CLOSE))
- conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
- timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
- && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
- ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
-
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- if (new_state != old_state)
- ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-
- if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- /* If only reply is a RST, we can consider ourselves not to
- have an established connection: this is a fairly common
- problem case, so we can delete the conntrack
- immediately. --RR */
- if (th->rst) {
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return NF_ACCEPT;
- }
- } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
- && (old_state == TCP_CONNTRACK_SYN_RECV
- || old_state == TCP_CONNTRACK_ESTABLISHED)
- && new_state == TCP_CONNTRACK_ESTABLISHED) {
- /* Set ASSURED if we see see valid ack in ESTABLISHED
- after SYN_RECV or a valid answer for a picked up
- connection. */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- }
- ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- enum tcp_conntrack new_state;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- /* Don't need lock here: this conntrack not in circulation yet */
- new_state
- = tcp_conntracks[0][get_conntrack_index(th)]
- [TCP_CONNTRACK_NONE];
-
- /* Invalid: delete conntrack */
- if (new_state >= TCP_CONNTRACK_MAX) {
- DEBUGP("ip_ct_tcp: invalid new deleting.\n");
- return 0;
- }
-
- if (new_state == TCP_CONNTRACK_SYN_SENT) {
- /* SYN packet */
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end;
-
- tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
- conntrack->proto.tcp.seen[1].flags = 0;
- } else if (ip_ct_tcp_loose == 0) {
- /* Don't try to pick up connections. */
- return 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end +
- conntrack->proto.tcp.seen[0].td_maxwin;
- conntrack->proto.tcp.seen[0].td_scale = 0;
-
- /* We assume SACK and liberal window checking to handle
- * window scaling */
- conntrack->proto.tcp.seen[0].flags =
- conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
- IP_CT_TCP_FLAG_BE_LIBERAL;
- }
-
- conntrack->proto.tcp.seen[1].td_end = 0;
- conntrack->proto.tcp.seen[1].td_maxend = 0;
- conntrack->proto.tcp.seen[1].td_maxwin = 1;
- conntrack->proto.tcp.seen[1].td_scale = 0;
-
- /* tcp_packet will set them */
- conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
- conntrack->proto.tcp.last_index = TCP_NONE_SET;
-
- DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
-{
- .proto = IPPROTO_TCP,
- .name = "tcp",
- .pkt_to_tuple = tcp_pkt_to_tuple,
- .invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
- .print_conntrack = tcp_print_conntrack,
- .packet = tcp_packet,
- .new = tcp_new,
- .error = tcp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .to_nfattr = tcp_to_nfattr,
- .from_nfattr = nfattr_to_tcp,
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
deleted file mode 100644
index 14c30c646c7..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/seq_file.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
-
-static int udp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct udphdr _hdr, *hp;
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.udp.port = hp->source;
- tuple->dst.u.udp.port = hp->dest;
-
- return 1;
-}
-
-static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.udp.port = orig->dst.u.udp.port;
- tuple->dst.u.udp.port = orig->src.u.udp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int udp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int udp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- return 0;
-}
-
-/* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- /* If we've seen traffic both ways, this is some kind of UDP
- stream. Extend timeout. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- ip_ct_refresh_acct(conntrack, ctinfo, skb,
- ip_ct_udp_timeout_stream);
- /* Also, more likely to be important, and not a probe */
- if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
- return 1;
-}
-
-static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct iphdr *iph = skb->nh.iph;
- unsigned int udplen = skb->len - iph->ihl * 4;
- struct udphdr _hdr, *hdr;
-
- /* Header is too small? */
- hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
- if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* Truncated/malformed packets */
- if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: truncated/malformed packet ");
- return -NF_ACCEPT;
- }
-
- /* Packet with no checksum */
- if (!hdr->check)
- return NF_ACCEPT;
-
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because the checksum is assumed to be correct.
- * FIXME: Source route IP option packets --RR */
- if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: bad UDP checksum ");
- return -NF_ACCEPT;
- }
-
- return NF_ACCEPT;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_udp =
-{
- .proto = IPPROTO_UDP,
- .name = "udp",
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
- .print_conntrack = udp_print_conntrack,
- .packet = udp_packet,
- .new = udp_new,
- .error = udp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
deleted file mode 100644
index c59a962c1f6..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* SIP extension for IP connection tracking.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_conntrack_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP connection tracking helper");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of sip servers");
-
-static unsigned int sip_timeout = SIP_TIMEOUT;
-module_param(sip_timeout, uint, 0600);
-MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
-
-unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sip_hook);
-
-unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp,
- const char *dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sdp_hook);
-
-static int digits_len(const char *dptr, const char *limit, int *shift);
-static int epaddr_len(const char *dptr, const char *limit, int *shift);
-static int skp_digits_len(const char *dptr, const char *limit, int *shift);
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift);
-
-struct sip_header_nfo {
- const char *lname;
- const char *sname;
- const char *ln_str;
- size_t lnlen;
- size_t snlen;
- size_t ln_strlen;
- int case_sensitive;
- int (*match_len)(const char *, const char *, int *);
-};
-
-static struct sip_header_nfo ct_sip_hdrs[] = {
- [POS_REG_REQ_URI] = { /* SIP REGISTER request URI */
- .lname = "sip:",
- .lnlen = sizeof("sip:") - 1,
- .ln_str = ":",
- .ln_strlen = sizeof(":") - 1,
- .match_len = epaddr_len
- },
- [POS_REQ_URI] = { /* SIP request URI */
- .lname = "sip:",
- .lnlen = sizeof("sip:") - 1,
- .ln_str = "@",
- .ln_strlen = sizeof("@") - 1,
- .match_len = epaddr_len
- },
- [POS_FROM] = { /* SIP From header */
- .lname = "From:",
- .lnlen = sizeof("From:") - 1,
- .sname = "\r\nf:",
- .snlen = sizeof("\r\nf:") - 1,
- .ln_str = "sip:",
- .ln_strlen = sizeof("sip:") - 1,
- .match_len = skp_epaddr_len,
- },
- [POS_TO] = { /* SIP To header */
- .lname = "To:",
- .lnlen = sizeof("To:") - 1,
- .sname = "\r\nt:",
- .snlen = sizeof("\r\nt:") - 1,
- .ln_str = "sip:",
- .ln_strlen = sizeof("sip:") - 1,
- .match_len = skp_epaddr_len,
- },
- [POS_VIA] = { /* SIP Via header */
- .lname = "Via:",
- .lnlen = sizeof("Via:") - 1,
- .sname = "\r\nv:",
- .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */
- .ln_str = "UDP ",
- .ln_strlen = sizeof("UDP ") - 1,
- .match_len = epaddr_len,
- },
- [POS_CONTACT] = { /* SIP Contact header */
- .lname = "Contact:",
- .lnlen = sizeof("Contact:") - 1,
- .sname = "\r\nm:",
- .snlen = sizeof("\r\nm:") - 1,
- .ln_str = "sip:",
- .ln_strlen = sizeof("sip:") - 1,
- .match_len = skp_epaddr_len
- },
- [POS_CONTENT] = { /* SIP Content length header */
- .lname = "Content-Length:",
- .lnlen = sizeof("Content-Length:") - 1,
- .sname = "\r\nl:",
- .snlen = sizeof("\r\nl:") - 1,
- .ln_str = ":",
- .ln_strlen = sizeof(":") - 1,
- .match_len = skp_digits_len
- },
- [POS_MEDIA] = { /* SDP media info */
- .case_sensitive = 1,
- .lname = "\nm=",
- .lnlen = sizeof("\nm=") - 1,
- .sname = "\rm=",
- .snlen = sizeof("\rm=") - 1,
- .ln_str = "audio ",
- .ln_strlen = sizeof("audio ") - 1,
- .match_len = digits_len
- },
- [POS_OWNER] = { /* SDP owner address*/
- .case_sensitive = 1,
- .lname = "\no=",
- .lnlen = sizeof("\no=") - 1,
- .sname = "\ro=",
- .snlen = sizeof("\ro=") - 1,
- .ln_str = "IN IP4 ",
- .ln_strlen = sizeof("IN IP4 ") - 1,
- .match_len = epaddr_len
- },
- [POS_CONNECTION] = { /* SDP connection info */
- .case_sensitive = 1,
- .lname = "\nc=",
- .lnlen = sizeof("\nc=") - 1,
- .sname = "\rc=",
- .snlen = sizeof("\rc=") - 1,
- .ln_str = "IN IP4 ",
- .ln_strlen = sizeof("IN IP4 ") - 1,
- .match_len = epaddr_len
- },
- [POS_SDP_HEADER] = { /* SDP version header */
- .case_sensitive = 1,
- .lname = "\nv=",
- .lnlen = sizeof("\nv=") - 1,
- .sname = "\rv=",
- .snlen = sizeof("\rv=") - 1,
- .ln_str = "=",
- .ln_strlen = sizeof("=") - 1,
- .match_len = digits_len
- }
-};
-
-/* get line lenght until first CR or LF seen. */
-int ct_sip_lnlen(const char *line, const char *limit)
-{
- const char *k = line;
-
- while ((line <= limit) && (*line == '\r' || *line == '\n'))
- line++;
-
- while (line <= limit) {
- if (*line == '\r' || *line == '\n')
- break;
- line++;
- }
- return line - k;
-}
-EXPORT_SYMBOL_GPL(ct_sip_lnlen);
-
-/* Linear string search, case sensitive. */
-const char *ct_sip_search(const char *needle, const char *haystack,
- size_t needle_len, size_t haystack_len,
- int case_sensitive)
-{
- const char *limit = haystack + (haystack_len - needle_len);
-
- while (haystack <= limit) {
- if (case_sensitive) {
- if (strncmp(haystack, needle, needle_len) == 0)
- return haystack;
- } else {
- if (strnicmp(haystack, needle, needle_len) == 0)
- return haystack;
- }
- haystack++;
- }
- return NULL;
-}
-EXPORT_SYMBOL_GPL(ct_sip_search);
-
-static int digits_len(const char *dptr, const char *limit, int *shift)
-{
- int len = 0;
- while (dptr <= limit && isdigit(*dptr)) {
- dptr++;
- len++;
- }
- return len;
-}
-
-/* get digits lenght, skiping blank spaces. */
-static int skp_digits_len(const char *dptr, const char *limit, int *shift)
-{
- for (; dptr <= limit && *dptr == ' '; dptr++)
- (*shift)++;
-
- return digits_len(dptr, limit, shift);
-}
-
-/* Simple ipaddr parser.. */
-static int parse_ipaddr(const char *cp, const char **endp,
- __be32 *ipaddr, const char *limit)
-{
- unsigned long int val;
- int i, digit = 0;
-
- for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) {
- digit = 0;
- if (!isdigit(*cp))
- break;
-
- val = simple_strtoul(cp, (char **)&cp, 10);
- if (val > 0xFF)
- return -1;
-
- ((u_int8_t *)ipaddr)[i] = val;
- digit = 1;
-
- if (*cp != '.')
- break;
- cp++;
- }
- if (!digit)
- return -1;
-
- if (endp)
- *endp = cp;
-
- return 0;
-}
-
-/* skip ip address. returns it lenght. */
-static int epaddr_len(const char *dptr, const char *limit, int *shift)
-{
- const char *aux = dptr;
- __be32 ip;
-
- if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
- DEBUGP("ip: %s parse failed.!\n", dptr);
- return 0;
- }
-
- /* Port number */
- if (*dptr == ':') {
- dptr++;
- dptr += digits_len(dptr, limit, shift);
- }
- return dptr - aux;
-}
-
-/* get address length, skiping user info. */
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift)
-{
- int s = *shift;
-
- /* Search for @, but stop at the end of the line.
- * We are inside a sip: URI, so we don't need to worry about
- * continuation lines. */
- while (dptr <= limit &&
- *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
- (*shift)++;
- dptr++;
- }
-
- if (dptr <= limit && *dptr == '@') {
- dptr++;
- (*shift)++;
- } else
- *shift = s;
-
- return epaddr_len(dptr, limit, shift);
-}
-
-/* Returns 0 if not found, -1 error parsing. */
-int ct_sip_get_info(const char *dptr, size_t dlen,
- unsigned int *matchoff,
- unsigned int *matchlen,
- enum sip_header_pos pos)
-{
- struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
- const char *limit, *aux, *k = dptr;
- int shift = 0;
-
- limit = dptr + (dlen - hnfo->lnlen);
-
- while (dptr <= limit) {
- if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
- (hnfo->sname == NULL ||
- strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
- dptr++;
- continue;
- }
- aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
- ct_sip_lnlen(dptr, limit),
- hnfo->case_sensitive);
- if (!aux) {
- DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
- hnfo->lname);
- return -1;
- }
- aux += hnfo->ln_strlen;
-
- *matchlen = hnfo->match_len(aux, limit, &shift);
- if (!*matchlen)
- return -1;
-
- *matchoff = (aux - k) + shift;
-
- DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
- *matchlen);
- return 1;
- }
- DEBUGP("%s header not found.\n", hnfo->lname);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ct_sip_get_info);
-
-static int set_expected_rtp(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- __be32 ipaddr, u_int16_t port,
- const char *dptr)
-{
- struct ip_conntrack_expect *exp;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- int ret;
- typeof(ip_nat_sdp_hook) ip_nat_sdp;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- return NF_DROP;
-
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.src.u.udp.port = 0;
- exp->tuple.dst.ip = ipaddr;
- exp->tuple.dst.u.udp.port = htons(port);
- exp->tuple.dst.protonum = IPPROTO_UDP;
-
- exp->mask.src.ip = htonl(0xFFFFFFFF);
- exp->mask.src.u.udp.port = 0;
- exp->mask.dst.ip = htonl(0xFFFFFFFF);
- exp->mask.dst.u.udp.port = htons(0xFFFF);
- exp->mask.dst.protonum = 0xFF;
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook);
- if (ip_nat_sdp)
- ret = ip_nat_sdp(pskb, ctinfo, exp, dptr);
- else {
- if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- else
- ret = NF_ACCEPT;
- }
- ip_conntrack_expect_put(exp);
-
- return ret;
-}
-
-static int sip_help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff, datalen;
- const char *dptr;
- int ret = NF_ACCEPT;
- int matchoff, matchlen;
- __be32 ipaddr;
- u_int16_t port;
- typeof(ip_nat_sip_hook) ip_nat_sip;
-
- /* No Data ? */
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
- DEBUGP("skb->len = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
-
- ip_ct_refresh(ct, *pskb, sip_timeout * HZ);
-
- if (!skb_is_nonlinear(*pskb))
- dptr = (*pskb)->data + dataoff;
- else {
- DEBUGP("Copy of skbuff not supported yet.\n");
- goto out;
- }
-
- ip_nat_sip = rcu_dereference(ip_nat_sip_hook);
- if (ip_nat_sip) {
- if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) {
- ret = NF_DROP;
- goto out;
- }
- }
-
- /* After this point NAT, could have mangled skb, so
- we need to recalculate payload lenght. */
- datalen = (*pskb)->len - dataoff;
-
- if (datalen < (sizeof("SIP/2.0 200") - 1))
- goto out;
-
- /* RTP info only in some SDP pkts */
- if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 &&
- memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) {
- goto out;
- }
- /* Get ip and port address from SDP packet. */
- if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
- POS_CONNECTION) > 0) {
-
- /* We'll drop only if there are parse problems. */
- if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr,
- dptr + datalen) < 0) {
- ret = NF_DROP;
- goto out;
- }
- if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
- POS_MEDIA) > 0) {
-
- port = simple_strtoul(dptr + matchoff, NULL, 10);
- if (port < 1024) {
- ret = NF_DROP;
- goto out;
- }
- ret = set_expected_rtp(pskb, ct, ctinfo,
- ipaddr, port, dptr);
- }
- }
-out:
- return ret;
-}
-
-static struct ip_conntrack_helper sip[MAX_PORTS];
-static char sip_names[MAX_PORTS][10];
-
-static void fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("unregistering helper for port %d\n", ports[i]);
- ip_conntrack_helper_unregister(&sip[i]);
- }
-}
-
-static int __init init(void)
-{
- int i, ret;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = SIP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- /* Create helper structure */
- memset(&sip[i], 0, sizeof(struct ip_conntrack_helper));
-
- sip[i].tuple.dst.protonum = IPPROTO_UDP;
- sip[i].tuple.src.u.udp.port = htons(ports[i]);
- sip[i].mask.src.u.udp.port = htons(0xFFFF);
- sip[i].mask.dst.protonum = 0xFF;
- sip[i].max_expected = 2;
- sip[i].timeout = 3 * 60; /* 3 minutes */
- sip[i].me = THIS_MODULE;
- sip[i].help = sip_help;
-
- tmpname = &sip_names[i][0];
- if (ports[i] == SIP_PORT)
- sprintf(tmpname, "sip");
- else
- sprintf(tmpname, "sip-%d", i);
- sip[i].name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret = ip_conntrack_helper_register(&sip[i]);
- if (ret) {
- printk("ERROR registering helper for port %d\n",
- ports[i]);
- fini();
- return ret;
- }
- }
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
deleted file mode 100644
index 56b2f7546d1..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ /dev/null
@@ -1,962 +0,0 @@
-/* This file contains all the functions required for the standalone
- ip_conntrack module.
-
- These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-#endif
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-
-extern atomic_t ip_conntrack_count;
-DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int kill_proto(struct ip_conntrack *i, void *data)
-{
- return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
- *((u_int8_t *) data));
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *proto)
-{
- seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
- NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
- return proto->print_tuple(s, tuple);
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
- const struct ip_conntrack_counter *counter)
-{
- return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)counter->packets,
- (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y) 0
-#endif
-
-struct ct_iter_state {
- unsigned int bucket;
-};
-
-static struct list_head *ct_get_first(struct seq_file *seq)
-{
- struct ct_iter_state *st = seq->private;
-
- for (st->bucket = 0;
- st->bucket < ip_conntrack_htable_size;
- st->bucket++) {
- if (!list_empty(&ip_conntrack_hash[st->bucket]))
- return ip_conntrack_hash[st->bucket].next;
- }
- return NULL;
-}
-
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
-{
- struct ct_iter_state *st = seq->private;
-
- head = head->next;
- while (head == &ip_conntrack_hash[st->bucket]) {
- if (++st->bucket >= ip_conntrack_htable_size)
- return NULL;
- head = ip_conntrack_hash[st->bucket].next;
- }
- return head;
-}
-
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
- struct list_head *head = ct_get_first(seq);
-
- if (head)
- while (pos && (head = ct_get_next(seq, head)))
- pos--;
- return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-{
- read_lock_bh(&ip_conntrack_lock);
- return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- (*pos)++;
- return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
- const struct ip_conntrack_tuple_hash *hash = v;
- const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
- struct ip_conntrack_protocol *proto;
-
- IP_NF_ASSERT(conntrack);
-
- /* we only want to print DIR_ORIGINAL */
- if (DIRECTION(hash))
- return 0;
-
- proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
- IP_NF_ASSERT(proto);
-
- if (seq_printf(s, "%-8s %u %ld ",
- proto->name,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
- timer_pending(&conntrack->timeout)
- ? (long)(conntrack->timeout.expires - jiffies)/HZ
- : 0) != 0)
- return -ENOSPC;
-
- if (proto->print_conntrack(s, conntrack))
- return -ENOSPC;
-
- if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- proto))
- return -ENOSPC;
-
- if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
- return -ENOSPC;
-
- if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
- if (seq_printf(s, "[UNREPLIED] "))
- return -ENOSPC;
-
- if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
- proto))
- return -ENOSPC;
-
- if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
- return -ENOSPC;
-
- if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
- if (seq_printf(s, "[ASSURED] "))
- return -ENOSPC;
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (seq_printf(s, "mark=%u ", conntrack->mark))
- return -ENOSPC;
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
- if (seq_printf(s, "secmark=%u ", conntrack->secmark))
- return -ENOSPC;
-#endif
-
- if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
- return -ENOSPC;
-
- return 0;
-}
-
-static struct seq_operations ct_seq_ops = {
- .start = ct_seq_start,
- .next = ct_seq_next,
- .stop = ct_seq_stop,
- .show = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- struct ct_iter_state *st;
- int ret;
-
- st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
- if (st == NULL)
- return -ENOMEM;
- ret = seq_open(file, &ct_seq_ops);
- if (ret)
- goto out_free;
- seq = file->private_data;
- seq->private = st;
- memset(st, 0, sizeof(struct ct_iter_state));
- return ret;
-out_free:
- kfree(st);
- return ret;
-}
-
-static const struct file_operations ct_file_ops = {
- .owner = THIS_MODULE,
- .open = ct_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-/* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct list_head *e = &ip_conntrack_expect_list;
- loff_t i;
-
- /* strange seq_file api calls stop even if we fail,
- * thus we need to grab lock since stop unlocks */
- read_lock_bh(&ip_conntrack_lock);
-
- if (list_empty(e))
- return NULL;
-
- for (i = 0; i <= *pos; i++) {
- e = e->next;
- if (e == &ip_conntrack_expect_list)
- return NULL;
- }
- return e;
-}
-
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct list_head *e = v;
-
- ++*pos;
- e = e->next;
-
- if (e == &ip_conntrack_expect_list)
- return NULL;
-
- return e;
-}
-
-static void exp_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
- struct ip_conntrack_expect *expect = v;
-
- if (expect->timeout.function)
- seq_printf(s, "%ld ", timer_pending(&expect->timeout)
- ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
- else
- seq_printf(s, "- ");
-
- seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
-
- print_tuple(s, &expect->tuple,
- __ip_conntrack_proto_find(expect->tuple.dst.protonum));
- return seq_putc(s, '\n');
-}
-
-static struct seq_operations exp_seq_ops = {
- .start = exp_seq_start,
- .next = exp_seq_next,
- .stop = exp_seq_stop,
- .show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &exp_seq_ops);
-}
-
-static const struct file_operations exp_file_ops = {
- .owner = THIS_MODULE,
- .open = exp_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
- int cpu;
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return &per_cpu(ip_conntrack_stat, cpu);
- }
-
- return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- int cpu;
-
- for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return &per_cpu(ip_conntrack_stat, cpu);
- }
-
- return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
- unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
- struct ip_conntrack_stat *st = v;
-
- if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
- return 0;
- }
-
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
- "%08x %08x %08x %08x %08x %08x %08x %08x \n",
- nr_conntracks,
- st->searched,
- st->found,
- st->new,
- st->invalid,
- st->ignore,
- st->delete,
- st->delete_list,
- st->insert,
- st->insert_failed,
- st->drop,
- st->early_drop,
- st->error,
-
- st->expect_new,
- st->expect_create,
- st->expect_delete
- );
- return 0;
-}
-
-static struct seq_operations ct_cpu_seq_ops = {
- .start = ct_cpu_seq_start,
- .next = ct_cpu_seq_next,
- .stop = ct_cpu_seq_stop,
- .show = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ct_cpu_seq_ops);
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
- .owner = THIS_MODULE,
- .open = ct_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-#endif
-
-static unsigned int ip_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* We've seen it coming out the other side: confirm it */
- return ip_conntrack_confirm(pskb);
-}
-
-static unsigned int ip_conntrack_help(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This is where we call the helper: as the packet goes out. */
- ct = ip_conntrack_get(*pskb, &ctinfo);
- if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) {
- unsigned int ret;
- ret = ct->helper->help(pskb, ct, ctinfo);
- if (ret != NF_ACCEPT)
- return ret;
- }
- return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
- /* Previously seen (loopback)? Ignore. Do this before
- fragment check. */
- if ((*pskb)->nfct)
- return NF_ACCEPT;
-#endif
-
- /* Gather fragments. */
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
- *pskb = ip_ct_gather_frags(*pskb,
- hooknum == NF_IP_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT);
- if (!*pskb)
- return NF_STOLEN;
- }
- return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
- if (net_ratelimit())
- printk("ipt_hook: happy cracking.\n");
- return NF_ACCEPT;
- }
- return ip_conntrack_in(hooknum, pskb, in, out, okfn);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
- make it the first hook. */
-static struct nf_hook_ops ip_conntrack_ops[] = {
- {
- .hook = ip_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
- },
- {
- .hook = ip_conntrack_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK,
- },
- {
- .hook = ip_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
- },
- {
- .hook = ip_conntrack_local,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK,
- },
- {
- .hook = ip_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
- .hook = ip_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
- .hook = ip_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
- },
- {
- .hook = ip_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
- },
-};
-
-/* Sysctl support */
-
-int ip_conntrack_checksum __read_mostly = 1;
-
-#ifdef CONFIG_SYSCTL
-
-/* From ip_conntrack_core.c */
-extern int ip_conntrack_max;
-extern unsigned int ip_conntrack_htable_size;
-
-/* From ip_conntrack_proto_tcp.c */
-extern unsigned int ip_ct_tcp_timeout_syn_sent;
-extern unsigned int ip_ct_tcp_timeout_syn_recv;
-extern unsigned int ip_ct_tcp_timeout_established;
-extern unsigned int ip_ct_tcp_timeout_fin_wait;
-extern unsigned int ip_ct_tcp_timeout_close_wait;
-extern unsigned int ip_ct_tcp_timeout_last_ack;
-extern unsigned int ip_ct_tcp_timeout_time_wait;
-extern unsigned int ip_ct_tcp_timeout_close;
-extern unsigned int ip_ct_tcp_timeout_max_retrans;
-extern int ip_ct_tcp_loose;
-extern int ip_ct_tcp_be_liberal;
-extern int ip_ct_tcp_max_retrans;
-
-/* From ip_conntrack_proto_udp.c */
-extern unsigned int ip_ct_udp_timeout;
-extern unsigned int ip_ct_udp_timeout_stream;
-
-/* From ip_conntrack_proto_icmp.c */
-extern unsigned int ip_ct_icmp_timeout;
-
-/* From ip_conntrack_proto_generic.c */
-extern unsigned int ip_ct_generic_timeout;
-
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-
-static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
- .procname = "ip_conntrack_max",
- .data = &ip_conntrack_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
- .procname = "ip_conntrack_count",
- .data = &ip_conntrack_count,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
- .procname = "ip_conntrack_buckets",
- .data = &ip_conntrack_htable_size,
- .maxlen = sizeof(unsigned int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
- .procname = "ip_conntrack_checksum",
- .data = &ip_conntrack_checksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
- .procname = "ip_conntrack_tcp_timeout_syn_sent",
- .data = &ip_ct_tcp_timeout_syn_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
- .procname = "ip_conntrack_tcp_timeout_syn_recv",
- .data = &ip_ct_tcp_timeout_syn_recv,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
- .procname = "ip_conntrack_tcp_timeout_established",
- .data = &ip_ct_tcp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
- .procname = "ip_conntrack_tcp_timeout_fin_wait",
- .data = &ip_ct_tcp_timeout_fin_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
- .procname = "ip_conntrack_tcp_timeout_close_wait",
- .data = &ip_ct_tcp_timeout_close_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
- .procname = "ip_conntrack_tcp_timeout_last_ack",
- .data = &ip_ct_tcp_timeout_last_ack,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
- .procname = "ip_conntrack_tcp_timeout_time_wait",
- .data = &ip_ct_tcp_timeout_time_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
- .procname = "ip_conntrack_tcp_timeout_close",
- .data = &ip_ct_tcp_timeout_close,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
- .procname = "ip_conntrack_udp_timeout",
- .data = &ip_ct_udp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
- .procname = "ip_conntrack_udp_timeout_stream",
- .data = &ip_ct_udp_timeout_stream,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
- .procname = "ip_conntrack_icmp_timeout",
- .data = &ip_ct_icmp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
- .procname = "ip_conntrack_generic_timeout",
- .data = &ip_ct_generic_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
- .procname = "ip_conntrack_log_invalid",
- .data = &ip_ct_log_invalid,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &log_invalid_proto_min,
- .extra2 = &log_invalid_proto_max,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
- .procname = "ip_conntrack_tcp_timeout_max_retrans",
- .data = &ip_ct_tcp_timeout_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
- .procname = "ip_conntrack_tcp_loose",
- .data = &ip_ct_tcp_loose,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
- .procname = "ip_conntrack_tcp_be_liberal",
- .data = &ip_ct_tcp_be_liberal,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
- .procname = "ip_conntrack_tcp_max_retrans",
- .data = &ip_ct_tcp_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- { .ctl_name = 0 }
-};
-
-#define NET_IP_CONNTRACK_MAX 2089
-
-static ctl_table ip_ct_netfilter_table[] = {
- {
- .ctl_name = NET_IPV4_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = ip_ct_sysctl_table,
- },
- {
- .ctl_name = NET_IP_CONNTRACK_MAX,
- .procname = "ip_conntrack_max",
- .data = &ip_conntrack_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ip_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ip_ct_ipv4_table,
- },
- { .ctl_name = 0 }
-};
-
-EXPORT_SYMBOL(ip_ct_log_invalid);
-#endif /* CONFIG_SYSCTL */
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
- them. --RR */
-int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
-{
- int ret = 0;
-
- write_lock_bh(&ip_conntrack_lock);
- if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
- ret = -EBUSY;
- goto out;
- }
- rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
- out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
-{
- write_lock_bh(&ip_conntrack_lock);
- rcu_assign_pointer(ip_ct_protos[proto->proto],
- &ip_conntrack_generic_protocol);
- write_unlock_bh(&ip_conntrack_lock);
- synchronize_rcu();
-
- /* Remove all contrack entries for this protocol */
- ip_ct_iterate_cleanup(kill_proto, &proto->proto);
-}
-
-static int __init ip_conntrack_standalone_init(void)
-{
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-#endif
- int ret = 0;
-
- ret = ip_conntrack_init();
- if (ret < 0)
- return ret;
-
-#ifdef CONFIG_PROC_FS
- ret = -ENOMEM;
- proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
- if (!proc) goto cleanup_init;
-
- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
- &exp_file_ops);
- if (!proc_exp) goto cleanup_proc;
-
- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
- if (!proc_stat)
- goto cleanup_proc_exp;
-
- proc_stat->proc_fops = &ct_cpu_seq_fops;
- proc_stat->owner = THIS_MODULE;
-#endif
-
- ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
- if (ret < 0) {
- printk("ip_conntrack: can't register hooks.\n");
- goto cleanup_proc_stat;
- }
-#ifdef CONFIG_SYSCTL
- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
- if (ip_ct_sysctl_header == NULL) {
- printk("ip_conntrack: can't register to sysctl.\n");
- ret = -ENOMEM;
- goto cleanup_hooks;
- }
-#endif
- return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup_hooks:
- nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#endif
- cleanup_proc_stat:
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("ip_conntrack", proc_net_stat);
- cleanup_proc_exp:
- proc_net_remove("ip_conntrack_expect");
- cleanup_proc:
- proc_net_remove("ip_conntrack");
- cleanup_init:
-#endif /* CONFIG_PROC_FS */
- ip_conntrack_cleanup();
- return ret;
-}
-
-static void __exit ip_conntrack_standalone_fini(void)
-{
- synchronize_net();
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
- nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("ip_conntrack", proc_net_stat);
- proc_net_remove("ip_conntrack_expect");
- proc_net_remove("ip_conntrack");
-#endif /* CONFIG_PROC_FS */
- ip_conntrack_cleanup();
-}
-
-module_init(ip_conntrack_standalone_init);
-module_exit(ip_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
- They should call this. */
-void need_conntrack(void)
-{
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-EXPORT_SYMBOL_GPL(ip_conntrack_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
-EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
-EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
-#endif
-EXPORT_SYMBOL(ip_conntrack_protocol_register);
-EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-EXPORT_SYMBOL(ip_ct_get_tuple);
-EXPORT_SYMBOL(invert_tuplepr);
-EXPORT_SYMBOL(ip_conntrack_alter_reply);
-EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_conntrack);
-EXPORT_SYMBOL(ip_conntrack_helper_register);
-EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(__ip_ct_refresh_acct);
-
-EXPORT_SYMBOL(ip_conntrack_expect_alloc);
-EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
-EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
-
-EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-EXPORT_SYMBOL(ip_ct_gather_frags);
-EXPORT_SYMBOL(ip_conntrack_htable_size);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(ip_conntrack_hash);
-EXPORT_SYMBOL(ip_conntrack_untracked);
-EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-EXPORT_SYMBOL(ip_conntrack_tcp_update);
-#endif
-
-EXPORT_SYMBOL_GPL(ip_conntrack_flush);
-EXPORT_SYMBOL_GPL(__ip_conntrack_find);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
-EXPORT_SYMBOL_GPL(ip_conntrack_free);
-EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
-
-EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_checksum);
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
-EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
-#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
deleted file mode 100644
index 76e175e7a97..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * - port to newnat API
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp connection tracking helper");
-MODULE_LICENSE("GPL");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of tftp servers");
-
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
-
-static int tftp_help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tftphdr _tftph, *tfh;
- struct ip_conntrack_expect *exp;
- unsigned int ret = NF_ACCEPT;
- typeof(ip_nat_tftp_hook) ip_nat_tftp;
-
- tfh = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
- sizeof(_tftph), &_tftph);
- if (tfh == NULL)
- return NF_ACCEPT;
-
- switch (ntohs(tfh->opcode)) {
- /* RRQ and WRQ works the same way */
- case TFTP_OPCODE_READ:
- case TFTP_OPCODE_WRITE:
- DEBUGP("");
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- return NF_DROP;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->mask.src.ip = htonl(0xffffffff);
- exp->mask.src.u.udp.port = 0;
- exp->mask.dst.ip = htonl(0xffffffff);
- exp->mask.dst.u.udp.port = htons(0xffff);
- exp->mask.dst.protonum = 0xff;
- exp->expectfn = NULL;
- exp->flags = 0;
-
- DEBUGP("expect: ");
- DUMP_TUPLE(&exp->tuple);
- DUMP_TUPLE(&exp->mask);
- ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook);
- if (ip_nat_tftp)
- ret = ip_nat_tftp(pskb, ctinfo, exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- break;
- case TFTP_OPCODE_DATA:
- case TFTP_OPCODE_ACK:
- DEBUGP("Data/ACK opcode\n");
- break;
- case TFTP_OPCODE_ERROR:
- DEBUGP("Error opcode\n");
- break;
- default:
- DEBUGP("Unknown opcode\n");
- }
- return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
-
-static void ip_conntrack_tftp_fini(void)
-{
- int i;
-
- for (i = 0 ; i < ports_c; i++) {
- DEBUGP("unregistering helper for port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&tftp[i]);
- }
-}
-
-static int __init ip_conntrack_tftp_init(void)
-{
- int i, ret;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = TFTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- /* Create helper structure */
- memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
-
- tftp[i].tuple.dst.protonum = IPPROTO_UDP;
- tftp[i].tuple.src.u.udp.port = htons(ports[i]);
- tftp[i].mask.dst.protonum = 0xFF;
- tftp[i].mask.src.u.udp.port = htons(0xFFFF);
- tftp[i].max_expected = 1;
- tftp[i].timeout = 5 * 60; /* 5 minutes */
- tftp[i].me = THIS_MODULE;
- tftp[i].help = tftp_help;
-
- tmpname = &tftp_names[i][0];
- if (ports[i] == TFTP_PORT)
- sprintf(tmpname, "tftp");
- else
- sprintf(tmpname, "tftp-%d", i);
- tftp[i].name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret=ip_conntrack_helper_register(&tftp[i]);
- if (ret) {
- printk("ERROR registering helper for port %d\n",
- ports[i]);
- ip_conntrack_tftp_fini();
- return(ret);
- }
- }
- return(0);
-}
-
-module_init(ip_conntrack_tftp_init);
-module_exit(ip_conntrack_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
deleted file mode 100644
index 85df1a9aed3..00000000000
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Amanda extension for TCP NAT alteration.
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on a copy of HW's ip_nat_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_nat_amanda.o
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp)
-{
- char buffer[sizeof("65535")];
- u_int16_t port;
- unsigned int ret;
-
- /* Connection comes from client. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_ORIGINAL;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one (ie. same IP: it will be TCP and master is UDP). */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- sprintf(buffer, "%u", port);
- ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
- matchoff, matchlen,
- buffer, strlen(buffer));
- if (ret != NF_ACCEPT)
- ip_conntrack_unexpect_related(exp);
- return ret;
-}
-
-static void __exit ip_nat_amanda_fini(void)
-{
- rcu_assign_pointer(ip_nat_amanda_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_amanda_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_amanda_hook));
- rcu_assign_pointer(ip_nat_amanda_hook, help);
- return 0;
-}
-
-module_init(ip_nat_amanda_init);
-module_exit(ip_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
deleted file mode 100644
index 40737fdbe9a..00000000000
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ /dev/null
@@ -1,634 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h> /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_nat_lock);
-
-/* Calculated at init based on memory size */
-static unsigned int ip_nat_htable_size;
-
-static struct list_head *bysource;
-
-#define MAX_IP_NAT_PROTO 256
-static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-
-static inline struct ip_nat_protocol *
-__ip_nat_proto_find(u_int8_t protonum)
-{
- return rcu_dereference(ip_nat_protos[protonum]);
-}
-
-struct ip_nat_protocol *
-ip_nat_proto_find_get(u_int8_t protonum)
-{
- struct ip_nat_protocol *p;
-
- rcu_read_lock();
- p = __ip_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &ip_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-
-void
-ip_nat_proto_put(struct ip_nat_protocol *p)
-{
- module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct ip_conntrack_tuple *tuple)
-{
- /* Original src, to ensure we map it consistently if poss. */
- return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
- tuple->dst.protonum, 0) % ip_nat_htable_size;
-}
-
-/* Noone using conntrack by the time this called. */
-static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
-{
- if (!(conn->status & IPS_NAT_DONE_MASK))
- return;
-
- write_lock_bh(&ip_nat_lock);
- list_del(&conn->nat.info.bysource);
- write_unlock_bh(&ip_nat_lock);
-}
-
-/* Is this tuple already taken? (not by us) */
-int
-ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- /* Conntrack tracking doesn't keep track of outgoing tuples; only
- incoming ones. NAT means they don't have a fixed mapping,
- so we invert the tuple and look for the incoming reply.
-
- We could keep a separate hash if this proves too slow. */
- struct ip_conntrack_tuple reply;
-
- invert_tuplepr(&reply, tuple);
- return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(ip_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range)
-{
- struct ip_nat_protocol *proto;
- int ret = 0;
-
- /* If we are supposed to map IPs, then we must be in the
- range specified, otherwise let this drag us onto a new src IP. */
- if (range->flags & IP_NAT_RANGE_MAP_IPS) {
- if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
- || ntohl(tuple->src.ip) > ntohl(range->max_ip))
- return 0;
- }
-
- rcu_read_lock();
- proto = __ip_nat_proto_find(tuple->dst.protonum);
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, IP_NAT_MANIP_SRC,
- &range->min, &range->max))
- ret = 1;
- rcu_read_unlock();
-
- return ret;
-}
-
-static inline int
-same_src(const struct ip_conntrack *ct,
- const struct ip_conntrack_tuple *tuple)
-{
- return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
- == tuple->dst.protonum
- && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
- == tuple->src.ip
- && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
- == tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_tuple *result,
- const struct ip_nat_range *range)
-{
- unsigned int h = hash_by_src(tuple);
- struct ip_conntrack *ct;
-
- read_lock_bh(&ip_nat_lock);
- list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
- if (same_src(ct, tuple)) {
- /* Copy source part from reply tuple. */
- invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(result, range)) {
- read_unlock_bh(&ip_nat_lock);
- return 1;
- }
- }
- }
- read_unlock_bh(&ip_nat_lock);
- return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
- src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
- if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
- 1-65535, we don't do pro-rata allocation based on ports; we choose
- the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- const struct ip_conntrack *conntrack,
- enum ip_nat_manip_type maniptype)
-{
- __be32 *var_ipp;
- /* Host order */
- u_int32_t minip, maxip, j;
-
- /* No IP mapping? Do nothing. */
- if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
- return;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- var_ipp = &tuple->src.ip;
- else
- var_ipp = &tuple->dst.ip;
-
- /* Fast path: only one choice. */
- if (range->min_ip == range->max_ip) {
- *var_ipp = range->min_ip;
- return;
- }
-
- /* Hashing source and destination IPs gives a fairly even
- * spread in practice (if there are a small number of IPs
- * involved, there usually aren't that many connections
- * anyway). The consistency means that servers see the same
- * client coming from the same IP (some Internet Banking sites
- * like this), even across reboots. */
- minip = ntohl(range->min_ip);
- maxip = ntohl(range->max_ip);
- j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
- *var_ipp = htonl(minip + j % (maxip - minip + 1));
-}
-
-/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
- * we change the source to map into the range. For NF_IP_PRE_ROUTING
- * and NF_IP_LOCAL_OUT, we change the destination to map into the
- * range. It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig_tuple,
- const struct ip_nat_range *range,
- struct ip_conntrack *conntrack,
- enum ip_nat_manip_type maniptype)
-{
- struct ip_nat_protocol *proto;
-
- /* 1) If this srcip/proto/src-proto-part is currently mapped,
- and that same mapping gives a unique tuple within the given
- range, use that.
-
- This is only required for source (ie. NAT/masq) mappings.
- So far, we don't do local source mappings, so multiple
- manips not an issue. */
- if (maniptype == IP_NAT_MANIP_SRC) {
- if (find_appropriate_src(orig_tuple, tuple, range)) {
- DEBUGP("get_unique_tuple: Found current src map\n");
- if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
- if (!ip_nat_used_tuple(tuple, conntrack))
- return;
- }
- }
-
- /* 2) Select the least-used IP/proto combination in the given
- range. */
- *tuple = *orig_tuple;
- find_best_ips_proto(tuple, range, conntrack, maniptype);
-
- /* 3) The per-protocol part of the manip is made to map into
- the range to make a unique tuple. */
-
- rcu_read_lock();
- proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
-
- /* Change protocol info to have some randomization */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
- proto->unique_tuple(tuple, range, maniptype, conntrack);
- goto out;
- }
-
- /* Only bother mapping if it's not already in range and unique */
- if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, maniptype, &range->min, &range->max))
- && !ip_nat_used_tuple(tuple, conntrack))
- goto out;
-
- /* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, maniptype, conntrack);
-out:
- rcu_read_unlock();
-}
-
-unsigned int
-ip_nat_setup_info(struct ip_conntrack *conntrack,
- const struct ip_nat_range *range,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple curr_tuple, new_tuple;
- struct ip_nat_info *info = &conntrack->nat.info;
- int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_POST_ROUTING
- || hooknum == NF_IP_LOCAL_IN
- || hooknum == NF_IP_LOCAL_OUT);
- BUG_ON(ip_nat_initialized(conntrack, maniptype));
-
- /* What we've got will look like inverse of reply. Normally
- this is what is in the conntrack, except for prior
- manipulations (future optimization: if num_manips == 0,
- orig_tp =
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
- invert_tuplepr(&curr_tuple,
- &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
-
- if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
- struct ip_conntrack_tuple reply;
-
- /* Alter conntrack table so will recognize replies. */
- invert_tuplepr(&reply, &new_tuple);
- ip_conntrack_alter_reply(conntrack, &reply);
-
- /* Non-atomic: we own this at the moment. */
- if (maniptype == IP_NAT_MANIP_SRC)
- conntrack->status |= IPS_SRC_NAT;
- else
- conntrack->status |= IPS_DST_NAT;
- }
-
- /* Place in source hash if this is the first time. */
- if (have_to_hash) {
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple);
- write_lock_bh(&ip_nat_lock);
- list_add(&info->bysource, &bysource[srchash]);
- write_unlock_bh(&ip_nat_lock);
- }
-
- /* It's done. */
- if (maniptype == IP_NAT_MANIP_DST)
- set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
- else
- set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
-
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL(ip_nat_setup_info);
-
-/* Returns true if succeeded. */
-static int
-manip_pkt(u_int16_t proto,
- struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *target,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph;
- struct ip_nat_protocol *p;
-
- if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
- return 0;
-
- iph = (void *)(*pskb)->data + iphdroff;
-
- /* Manipulate protcol part. */
-
- /* rcu_read_lock()ed by nf_hook_slow */
- p = __ip_nat_proto_find(proto);
- if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
- return 0;
-
- iph = (void *)(*pskb)->data + iphdroff;
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
- iph->saddr = target->src.ip;
- } else {
- nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
- iph->daddr = target->dst.ip;
- }
- return 1;
-}
-
-/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int ip_nat_packet(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
- if (mtype == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- /* Non-atomic: these bits don't change. */
- if (ct->status & statusbit) {
- struct ip_conntrack_tuple target;
-
- /* We are aiming to look like inverse of other direction. */
- invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
- if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(ip_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } *inside;
- struct ip_conntrack_protocol *proto;
- struct ip_conntrack_tuple inner, target;
- int hdrlen = (*pskb)->nh.iph->ihl * 4;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
-
- if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
- return 0;
-
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-
- /* We're actually going to mangle it beyond trivial checksum
- adjustment, so make sure the current checksum is correct. */
- if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
- return 0;
-
- /* Must be RELATED */
- IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
- (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
-
- /* Redirects on non-null nats must be dropped, else they'll
- start talking to each other without our translation, and be
- confused... --RR */
- if (inside->icmp.type == ICMP_REDIRECT) {
- /* If NAT isn't finished, assume it and drop. */
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
- return 0;
-
- if (ct->status & IPS_NAT_MASK)
- return 0;
- }
-
- DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
- *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
- /* rcu_read_lock()ed by nf_hook_slow */
- proto = __ip_conntrack_proto_find(inside->ip.protocol);
- if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
- sizeof(struct icmphdr) + inside->ip.ihl*4,
- &inner, proto))
- return 0;
-
- /* Change inner back to look like incoming packet. We do the
- opposite manip on this hook to normal, because it might not
- pass all hooks (locally-generated ICMP). Consider incoming
- packet: PREROUTING (DST manip), routing produces ICMP, goes
- through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4
- + sizeof(inside->icmp),
- &ct->tuplehash[!dir].tuple,
- !manip))
- return 0;
-
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- inside->icmp.checksum = 0;
- inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen,
- 0));
- }
-
- /* Change outer to look the reply to an incoming packet
- * (proto 0 means don't invert per-proto part). */
- if (manip == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, pskb, 0, &target, manip))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int ip_nat_protocol_register(struct ip_nat_protocol *proto)
-{
- int ret = 0;
-
- write_lock_bh(&ip_nat_lock);
- if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
- ret = -EBUSY;
- goto out;
- }
- rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
- out:
- write_unlock_bh(&ip_nat_lock);
- return ret;
-}
-EXPORT_SYMBOL(ip_nat_protocol_register);
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
-{
- write_lock_bh(&ip_nat_lock);
- rcu_assign_pointer(ip_nat_protos[proto->protonum],
- &ip_nat_unknown_protocol);
- write_unlock_bh(&ip_nat_lock);
- synchronize_rcu();
-}
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-int
-ip_nat_port_range_to_nfattr(struct sk_buff *skb,
- const struct ip_nat_range *range)
-{
- NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
- &range->min.tcp.port);
- NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
- &range->max.tcp.port);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-int
-ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
-{
- int ret = 0;
-
- /* we have to return whether we actually parsed something or not */
-
- if (tb[CTA_PROTONAT_PORT_MIN-1]) {
- ret = 1;
- range->min.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
- }
-
- if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
- if (ret)
- range->max.tcp.port = range->min.tcp.port;
- } else {
- ret = 1;
- range->max.tcp.port =
- *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
-EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
-#endif
-
-static int __init ip_nat_init(void)
-{
- size_t i;
-
- /* Leave them the same for the moment. */
- ip_nat_htable_size = ip_conntrack_htable_size;
-
- /* One vmalloc for both hash tables */
- bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
- if (!bysource)
- return -ENOMEM;
-
- /* Sew in builtin protocols. */
- write_lock_bh(&ip_nat_lock);
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
- rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
- rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
- rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
- write_unlock_bh(&ip_nat_lock);
-
- for (i = 0; i < ip_nat_htable_size; i++) {
- INIT_LIST_HEAD(&bysource[i]);
- }
-
- /* FIXME: Man, this is a hack. <SIGH> */
- IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
- rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
-
- /* Initialize fake conntrack so that NAT will skip it */
- ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
- return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct ip_conntrack *i, void *data)
-{
- memset(&i->nat, 0, sizeof(i->nat));
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
- return 0;
-}
-
-static void __exit ip_nat_cleanup(void)
-{
- ip_ct_iterate_cleanup(&clean_nat, NULL);
- rcu_assign_pointer(ip_conntrack_destroyed, NULL);
- synchronize_rcu();
- vfree(bysource);
-}
-
-MODULE_LICENSE("GPL");
-
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
deleted file mode 100644
index 32e01d8dffc..00000000000
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* FTP extension for TCP NAT alteration. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* FIXME: Time out? --RR */
-
-static int
-mangle_rfc959_packet(struct sk_buff **pskb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
- sprintf(buffer, "%u,%u,%u,%u,%u,%u",
- NIPQUAD(newip), port>>8, port&0xFF);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff **pskb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("|1|255.255.255.255|65535|")];
-
- sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff **pskb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("|||65535|")];
-
- sprintf(buffer, "|||%u|", port);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
- unsigned int,
- unsigned int,
- struct ip_conntrack *,
- enum ip_conntrack_info,
- u32 *seq)
-= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
- [IP_CT_FTP_PASV] = mangle_rfc959_packet,
- [IP_CT_FTP_EPRT] = mangle_eprt_packet,
- [IP_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_ftp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq)
-{
- __be32 newip;
- u_int16_t port;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack *ct = exp->master;
-
- DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
-
- /* Connection will come from wherever this packet goes, hence !dir */
- newip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
- seq)) {
- ip_conntrack_unexpect_related(exp);
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-static void __exit ip_nat_ftp_fini(void)
-{
- rcu_assign_pointer(ip_nat_ftp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_ftp_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_ftp_hook));
- rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp);
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_ftp_init);
-module_exit(ip_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
deleted file mode 100644
index dc778cfef58..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
- * - add support for SACK adjustment
- * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- * - merge SACK support into newnat API
- * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
- * - make ip_nat_resize_packet more generic (TCP and UDP)
- * - add ip_nat_mangle_udp_packet
- */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-
-#if 0
-#define DEBUGP printk
-#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
-#else
-#define DEBUGP(format, args...)
-#define DUMP_OFFSET(x)
-#endif
-
-static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
- int sizediff,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir;
- struct ip_nat_seq *this_way, *other_way;
-
- DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
- (*skb)->len, new_size);
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &ct->nat.info.seq[dir];
- other_way = &ct->nat.info.seq[!dir];
-
- DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
- DUMP_OFFSET(this_way);
-
- spin_lock_bh(&ip_nat_seqofs_lock);
-
- /* SYN adjust. If it's uninitialized, or this is after last
- * correction, record it: we don't handle more than one
- * adjustment in the window, but do deal with common case of a
- * retransmit */
- if (this_way->offset_before == this_way->offset_after
- || before(this_way->correction_pos, seq)) {
- this_way->correction_pos = seq;
- this_way->offset_before = this_way->offset_after;
- this_way->offset_after += sizediff;
- }
- spin_unlock_bh(&ip_nat_seqofs_lock);
-
- DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
- DUMP_OFFSET(this_way);
-}
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
- unsigned int dataoff,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- unsigned char *data;
-
- BUG_ON(skb_is_nonlinear(skb));
- data = (unsigned char *)skb->nh.iph + dataoff;
-
- /* move post-replacement */
- memmove(data + match_offset + rep_len,
- data + match_offset + match_len,
- skb->tail - (data + match_offset + match_len));
-
- /* insert data from buffer */
- memcpy(data + match_offset, rep_buffer, rep_len);
-
- /* update skb info */
- if (rep_len > match_len) {
- DEBUGP("ip_nat_mangle_packet: Extending packet by "
- "%u from %u bytes\n", rep_len - match_len,
- skb->len);
- skb_put(skb, rep_len - match_len);
- } else {
- DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
- "%u from %u bytes\n", match_len - rep_len,
- skb->len);
- __skb_trim(skb, skb->len + rep_len - match_len);
- }
-
- /* fix IP hdr checksum information */
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
-{
- struct sk_buff *nskb;
-
- if ((*pskb)->len + extra > 65535)
- return 0;
-
- nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
- if (!nskb)
- return 0;
-
- /* Transfer socket to new skb. */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int
-ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct iphdr *iph;
- struct tcphdr *tcph;
- int oldlen, datalen;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return 0;
-
- if (rep_len > match_len
- && rep_len - match_len > skb_tailroom(*pskb)
- && !enlarge_skb(pskb, rep_len - match_len))
- return 0;
-
- SKB_LINEAR_ASSERT(*pskb);
-
- iph = (*pskb)->nh.iph;
- tcph = (void *)iph + iph->ihl*4;
-
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
- match_offset, match_len, rep_buffer, rep_len);
-
- datalen = (*pskb)->len - iph->ihl*4;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- tcph->check = 0;
- tcph->check = tcp_v4_check(datalen,
- iph->saddr, iph->daddr,
- csum_partial((char *)tcph,
- datalen, 0));
- } else
- nf_proto_csum_replace2(&tcph->check, *pskb,
- htons(oldlen), htons(datalen), 1);
-
- if (rep_len != match_len) {
- set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
- adjust_tcp_sequence(ntohl(tcph->seq),
- (int)rep_len - (int)match_len,
- ct, ctinfo);
- /* Tell TCP window tracking about seq change */
- ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
- }
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
- * should be fairly easy to do.
- */
-int
-ip_nat_mangle_udp_packet(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct iphdr *iph;
- struct udphdr *udph;
- int datalen, oldlen;
-
- /* UDP helpers might accidentally mangle the wrong packet */
- iph = (*pskb)->nh.iph;
- if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
- match_offset + match_len)
- return 0;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return 0;
-
- if (rep_len > match_len
- && rep_len - match_len > skb_tailroom(*pskb)
- && !enlarge_skb(pskb, rep_len - match_len))
- return 0;
-
- iph = (*pskb)->nh.iph;
- udph = (void *)iph + iph->ihl*4;
-
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
- match_offset, match_len, rep_buffer, rep_len);
-
- /* update the length of the UDP packet */
- datalen = (*pskb)->len - iph->ihl*4;
- udph->len = htons(datalen);
-
- /* fix udp checksum if udp checksum was previously calculated */
- if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
- return 1;
-
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- udph->check = 0;
- udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- csum_partial((char *)udph,
- datalen, 0));
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
- } else
- nf_proto_csum_replace2(&udph->check, *pskb,
- htons(oldlen), htons(datalen), 1);
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int sackoff,
- unsigned int sackend,
- struct ip_nat_seq *natseq)
-{
- while (sackoff < sackend) {
- struct tcp_sack_block_wire *sack;
- __be32 new_start_seq, new_end_seq;
-
- sack = (void *)skb->data + sackoff;
- if (after(ntohl(sack->start_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_after);
- else
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_before);
-
- if (after(ntohl(sack->end_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_after);
- else
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_before);
-
- DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
- ntohl(sack->start_seq), new_start_seq,
- ntohl(sack->end_seq), new_end_seq);
-
- nf_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
- nf_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
- sack->start_seq = new_start_seq;
- sack->end_seq = new_end_seq;
- sackoff += sizeof(*sack);
- }
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-ip_nat_sack_adjust(struct sk_buff **pskb,
- struct tcphdr *tcph,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dir, optoff, optend;
-
- optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
- optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
-
- if (!skb_make_writable(pskb, optend))
- return 0;
-
- dir = CTINFO2DIR(ctinfo);
-
- while (optoff < optend) {
- /* Usually: option, length. */
- unsigned char *op = (*pskb)->data + optoff;
-
- switch (op[0]) {
- case TCPOPT_EOL:
- return 1;
- case TCPOPT_NOP:
- optoff++;
- continue;
- default:
- /* no partial options */
- if (optoff + 1 == optend
- || optoff + op[1] > optend
- || op[1] < 2)
- return 0;
- if (op[0] == TCPOPT_SACK
- && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
- && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(*pskb, tcph, optoff+2,
- optoff+op[1],
- &ct->nat.info.seq[!dir]);
- optoff += op[1];
- }
- }
- return 1;
-}
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-int
-ip_nat_seq_adjust(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tcphdr *tcph;
- int dir;
- __be32 newseq, newack;
- struct ip_nat_seq *this_way, *other_way;
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &ct->nat.info.seq[dir];
- other_way = &ct->nat.info.seq[!dir];
-
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
- return 0;
-
- tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- if (after(ntohl(tcph->seq), this_way->correction_pos))
- newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
- else
- newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
-
- if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
- newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
- else
- newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
-
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
-
- DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
- ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
- ntohl(newack));
-
- tcph->seq = newseq;
- tcph->ack_seq = newack;
-
- if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
- return 0;
-
- ip_conntrack_tcp_update(*pskb, ct, dir);
-
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_seq_adjust);
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void ip_nat_follow_master(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- struct ip_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.ip;
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
deleted file mode 100644
index bdc99ef6159..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * H.323 extension for NAT alteration.
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 NAT module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
- unsigned char **data, int dataoff,
- unsigned int addroff, __be32 ip, u_int16_t port)
-{
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
- struct {
- __be32 ip;
- __be16 port;
- } __attribute__ ((__packed__)) buf;
- struct tcphdr _tcph, *th;
-
- buf.ip = ip;
- buf.port = htons(port);
- addroff += dataoff;
-
- if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
- if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- addroff, sizeof(buf),
- (char *) &buf, sizeof(buf))) {
- if (net_ratelimit())
- printk("ip_nat_h323: ip_nat_mangle_tcp_packet"
- " error\n");
- return -1;
- }
-
- /* Relocate data pointer */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return -1;
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
- th->doff * 4 + dataoff;
- } else {
- if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- addroff, sizeof(buf),
- (char *) &buf, sizeof(buf))) {
- if (net_ratelimit())
- printk("ip_nat_h323: ip_nat_mangle_udp_packet"
- " error\n");
- return -1;
- }
- /* ip_nat_mangle_udp_packet uses skb_make_writable() to copy
- * or pull everything in a linear buffer, so we can safely
- * use the skb pointers now */
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
- sizeof(struct udphdr);
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
- unsigned char **data, int dataoff,
- TransportAddress * addr,
- __be32 ip, u_int16_t port)
-{
- return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
-}
-
-/****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- __be32 ip, u_int16_t port)
-{
- return set_addr(pskb, data, dataoff,
- addr->unicastAddress.iPAddress.network, ip, port);
-}
-
-/****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int i;
- __be32 ip;
- u_int16_t port;
-
- for (i = 0; i < count; i++) {
- if (get_h225_addr(*data, &addr[i], &ip, &port)) {
- if (ip == ct->tuplehash[dir].tuple.src.ip &&
- port == info->sig_port[dir]) {
- /* GW->GK */
-
- /* Fix for Gnomemeeting */
- if (i > 0 &&
- get_h225_addr(*data, &addr[0],
- &ip, &port) &&
- (ntohl(ip) & 0xff000000) == 0x7f000000)
- i = 0;
-
- DEBUGP
- ("ip_nat_ras: set signal address "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.dst.
- ip), info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &addr[i],
- ct->tuplehash[!dir].
- tuple.dst.ip,
- info->sig_port[!dir]);
- } else if (ip == ct->tuplehash[dir].tuple.dst.ip &&
- port == info->sig_port[dir]) {
- /* GK->GW */
- DEBUGP
- ("ip_nat_ras: set signal address "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.src.
- ip), info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &addr[i],
- ct->tuplehash[!dir].
- tuple.src.ip,
- info->sig_port[!dir]);
- }
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data,
- TransportAddress * addr, int count)
-{
- int dir = CTINFO2DIR(ctinfo);
- int i;
- __be32 ip;
- u_int16_t port;
-
- for (i = 0; i < count; i++) {
- if (get_h225_addr(*data, &addr[i], &ip, &port) &&
- ip == ct->tuplehash[dir].tuple.src.ip &&
- port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) {
- DEBUGP("ip_nat_ras: set rasAddress "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(ip), port,
- NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
- ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
- port));
- return set_h225_addr(pskb, data, 0, &addr[i],
- ct->tuplehash[!dir].tuple.dst.ip,
- ntohs(ct->tuplehash[!dir].tuple.
- dst.u.udp.port));
- }
- }
-
- return 0;
-}
-
-/****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr,
- u_int16_t port, u_int16_t rtp_port,
- struct ip_conntrack_expect *rtp_exp,
- struct ip_conntrack_expect *rtcp_exp)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- int i;
- u_int16_t nated_port;
-
- /* Set expectations for NAT */
- rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
- rtp_exp->expectfn = ip_nat_follow_master;
- rtp_exp->dir = !dir;
- rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
- rtcp_exp->expectfn = ip_nat_follow_master;
- rtcp_exp->dir = !dir;
-
- /* Lookup existing expects */
- for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
- if (info->rtp_port[i][dir] == rtp_port) {
- /* Expected */
-
- /* Use allocated ports first. This will refresh
- * the expects */
- rtp_exp->tuple.dst.u.udp.port =
- htons(info->rtp_port[i][dir]);
- rtcp_exp->tuple.dst.u.udp.port =
- htons(info->rtp_port[i][dir] + 1);
- break;
- } else if (info->rtp_port[i][dir] == 0) {
- /* Not expected */
- break;
- }
- }
-
- /* Run out of expectations */
- if (i >= H323_RTP_CHANNEL_MAX) {
- if (net_ratelimit())
- printk("ip_nat_h323: out of expectations\n");
- return 0;
- }
-
- /* Try to get a pair of ports. */
- for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
- nated_port != 0; nated_port += 2) {
- rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- if (ip_conntrack_expect_related(rtp_exp) == 0) {
- rtcp_exp->tuple.dst.u.udp.port =
- htons(nated_port + 1);
- if (ip_conntrack_expect_related(rtcp_exp) == 0)
- break;
- ip_conntrack_unexpect_related(rtp_exp);
- }
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_h323: out of RTP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip,
- (port & 1) ? nated_port + 1 : nated_port) == 0) {
- /* Save ports */
- info->rtp_port[i][dir] = rtp_port;
- info->rtp_port[i][!dir] = nated_port;
- } else {
- ip_conntrack_unexpect_related(rtp_exp);
- ip_conntrack_unexpect_related(rtcp_exp);
- return -1;
- }
-
- /* Success */
- DEBUGP("ip_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtp_exp->tuple.src.ip),
- ntohs(rtp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtp_exp->tuple.dst.ip),
- ntohs(rtp_exp->tuple.dst.u.udp.port));
- DEBUGP("ip_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(rtcp_exp->tuple.src.ip),
- ntohs(rtcp_exp->tuple.src.u.udp.port),
- NIPQUAD(rtcp_exp->tuple.dst.ip),
- ntohs(rtcp_exp->tuple.dst.u.udp.port));
-
- return 0;
-}
-
-/****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- H245_TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect *exp)
-{
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port = port;
-
- /* Set expectations for NAT */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_follow_master;
- exp->dir = !dir;
-
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_h323: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip, nated_port) < 0) {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- DEBUGP("ip_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_h245_expect()
- * which was set by ip_conntrack_helper_h323.c. It calls both
- * ip_nat_follow_master() and ip_conntrack_h245_expect()
- ****************************************************************************/
-static void ip_nat_h245_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- ip_nat_follow_master(new, this);
- ip_conntrack_h245_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect *exp)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port = port;
-
- /* Set expectations for NAT */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_h245_expect;
- exp->dir = !dir;
-
- /* Check existing expects */
- if (info->sig_port[dir] == port)
- nated_port = info->sig_port[!dir];
-
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_q931: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h225_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip,
- nated_port) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = nated_port;
- } else {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- DEBUGP("ip_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_q931_expect()
- * which was set by ip_conntrack_helper_h323.c.
- ****************************************************************************/
-static void ip_nat_q931_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- struct ip_nat_range range;
-
- if (this->tuple.src.ip != 0) { /* Only accept calls from GK */
- ip_nat_follow_master(new, this);
- goto out;
- }
-
- /* This must be a fresh one. */
- BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip =
- new->master->tuplehash[!this->dir].tuple.src.ip;
-
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
- out:
- ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, TransportAddress * addr, int idx,
- u_int16_t port, struct ip_conntrack_expect *exp)
-{
- struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port = port;
- __be32 ip;
-
- /* Set expectations for NAT */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_q931_expect;
- exp->dir = !dir;
-
- /* Check existing expects */
- if (info->sig_port[dir] == port)
- nated_port = info->sig_port[!dir];
-
- /* Try to get same port: if not, try to change it. */
- for (; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_ras: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (set_h225_addr(pskb, data, 0, &addr[idx],
- ct->tuplehash[!dir].tuple.dst.ip,
- nated_port) == 0) {
- /* Save ports */
- info->sig_port[dir] = port;
- info->sig_port[!dir] = nated_port;
-
- /* Fix for Gnomemeeting */
- if (idx > 0 &&
- get_h225_addr(*data, &addr[0], &ip, &port) &&
- (ntohl(ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr_hook(pskb, data, 0, &addr[0],
- ct->tuplehash[!dir].tuple.dst.ip,
- info->sig_port[!dir]);
- }
- } else {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- /* Success */
- DEBUGP("ip_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************/
-static void ip_nat_callforwarding_expect(struct ip_conntrack *new,
- struct ip_conntrack_expect *this)
-{
- struct ip_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip = this->saved_ip;
-
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
- ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
- TransportAddress * addr, u_int16_t port,
- struct ip_conntrack_expect *exp)
-{
- int dir = CTINFO2DIR(ctinfo);
- u_int16_t nated_port;
-
- /* Set expectations for NAT */
- exp->saved_ip = exp->tuple.dst.ip;
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->expectfn = ip_nat_callforwarding_expect;
- exp->dir = !dir;
-
- /* Try to get same port: if not, try to change it. */
- for (nated_port = port; nated_port != 0; nated_port++) {
- exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("ip_nat_q931: out of TCP ports\n");
- return 0;
- }
-
- /* Modify signal */
- if (!set_h225_addr(pskb, data, dataoff, addr,
- ct->tuplehash[!dir].tuple.dst.ip,
- nated_port) == 0) {
- ip_conntrack_unexpect_related(exp);
- return -1;
- }
-
- /* Success */
- DEBUGP("ip_nat_q931: expect Call Forwarding "
- "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
- NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
- return 0;
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
- BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
- BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
- BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
- BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
- BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
- BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
- BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
-
- rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
- rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
- rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
- rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
- rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
- rcu_assign_pointer(nat_t120_hook, nat_t120);
- rcu_assign_pointer(nat_h245_hook, nat_h245);
- rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
- rcu_assign_pointer(nat_q931_hook, nat_q931);
-
- DEBUGP("ip_nat_h323: init success\n");
- return 0;
-}
-
-/****************************************************************************/
-static void __exit fini(void)
-{
- rcu_assign_pointer(set_h245_addr_hook, NULL);
- rcu_assign_pointer(set_h225_addr_hook, NULL);
- rcu_assign_pointer(set_sig_addr_hook, NULL);
- rcu_assign_pointer(set_ras_addr_hook, NULL);
- rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
- rcu_assign_pointer(nat_t120_hook, NULL);
- rcu_assign_pointer(nat_h245_hook, NULL);
- rcu_assign_pointer(nat_callforwarding_hook, NULL);
- rcu_assign_pointer(nat_q931_hook, NULL);
- synchronize_rcu();
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 NAT helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
deleted file mode 100644
index 24ce4a5023d..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * ip_nat_pptp.c - Version 3.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - NAT to a unique tuple, not to TCP source port
- * (needs netfilter tuple reservation)
- *
- * Changes:
- * 2002-02-10 - Version 1.3
- * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- * in local connections (Philip Craig <philipc@snapgear.com>)
- * - add checks for magicCookie and pptp version
- * - make argument list of pptp_{out,in}bound_packet() shorter
- * - move to C99 style initializers
- * - print version number at module loadtime
- * 2003-09-22 - Version 1.5
- * - use SNATed tcp sourceport as callid, since we get called before
- * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- * 2004-10-22 - Version 2.0
- * - kernel 2.6.x version
- * 2005-06-10 - Version 3.0
- * - kernel >= 2.6.11 version,
- * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "3.0"
-
-#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-extern const char *pptp_msg_name[];
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
- __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static void pptp_nat_expected(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *master = ct->master;
- struct ip_conntrack_expect *other_exp;
- struct ip_conntrack_tuple t;
- struct ip_ct_pptp_master *ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info;
- struct ip_nat_range range;
-
- ct_pptp_info = &master->help.ct_pptp_info;
- nat_pptp_info = &master->nat.help.nat_pptp_info;
-
- /* And here goes the grand finale of corrosion... */
-
- if (exp->dir == IP_CT_DIR_ORIGINAL) {
- DEBUGP("we are PNS->PAC\n");
- /* therefore, build tuple for PAC->PNS */
- t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
- t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
- t.dst.protonum = IPPROTO_GRE;
- } else {
- DEBUGP("we are PAC->PNS\n");
- /* build tuple for PNS->PAC */
- t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
- t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
- t.dst.protonum = IPPROTO_GRE;
- }
-
- DEBUGP("trying to unexpect other dir: ");
- DUMP_TUPLE(&t);
- other_exp = ip_conntrack_expect_find_get(&t);
- if (other_exp) {
- ip_conntrack_unexpect_related(other_exp);
- ip_conntrack_expect_put(other_exp);
- DEBUGP("success\n");
- } else {
- DEBUGP("not found!\n");
- }
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
- if (exp->dir == IP_CT_DIR_ORIGINAL) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.ip;
- if (exp->dir == IP_CT_DIR_REPLY) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-
-/* outbound packets == from PNS to PAC */
-static int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq)
-
-{
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg;
- __be16 new_callid;
- unsigned int cid_off;
-
- new_callid = ct_pptp_info->pns_call_id;
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
- /* FIXME: ideally we would want to reserve a call ID
- * here. current netfilter NAT core is not able to do
- * this :( For now we use TCP source port. This breaks
- * multiple calls within one control session */
-
- /* save original call ID in nat_info */
- nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
- /* don't use tcph->source since we are at a DSTmanip
- * hook (e.g. PREROUTING) and pkt is not mangled yet */
- new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- /* save new call ID in ct info */
- ct_pptp_info->pns_call_id = new_callid;
- break;
- case PPTP_IN_CALL_REPLY:
- cid_off = offsetof(union pptp_ctrl_union, icack.callID);
- break;
- case PPTP_CALL_CLEAR_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
- break;
- default:
- DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
- (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_SET_LINK_INFO:
- /* only need to NAT in case PAC is behind NAT box */
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
- * down to here */
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
-
- /* mangle packet */
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_callid), (char *)&new_callid,
- sizeof(new_callid)) == 0)
- return NF_DROP;
-
- return NF_ACCEPT;
-}
-
-static void
-pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
- struct ip_conntrack_expect *expect_reply)
-{
- struct ip_conntrack *ct = expect_orig->master;
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
- /* save original PAC call ID in nat_info */
- nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
- /* alter expectation for PNS->PAC direction */
- expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
- expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
- expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
- expect_orig->dir = IP_CT_DIR_ORIGINAL;
-
- /* alter expectation for PAC->PNS direction */
- expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
- expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
- expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
- expect_reply->dir = IP_CT_DIR_REPLY;
-}
-
-/* inbound packets == from PAC to PNS */
-static int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq)
-{
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg;
- __be16 new_pcid;
- unsigned int pcid_off;
-
- new_pcid = nat_pptp_info->pns_call_id;
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REPLY:
- pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
- break;
- case PPTP_IN_CALL_CONNECT:
- pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
- break;
- case PPTP_IN_CALL_REQUEST:
- /* only need to nat in case PAC is behind NAT box */
- return NF_ACCEPT;
- case PPTP_WAN_ERROR_NOTIFY:
- pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
- break;
- case PPTP_CALL_DISCONNECT_NOTIFY:
- pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
- break;
- case PPTP_SET_LINK_INFO:
- pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
- break;
-
- default:
- DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
- * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
-
- /* mangle packet */
- DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
- ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
-
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- pcid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_pcid), (char *)&new_pcid,
- sizeof(new_pcid)) == 0)
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-
-extern int __init ip_nat_proto_gre_init(void);
-extern void __exit ip_nat_proto_gre_fini(void);
-
-static int __init ip_nat_helper_pptp_init(void)
-{
- int ret;
-
- DEBUGP("%s: registering NAT helper\n", __FILE__);
-
- ret = ip_nat_proto_gre_init();
- if (ret < 0)
- return ret;
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound));
- rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt);
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound));
- rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt);
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre));
- rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre);
-
- BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn));
- rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected);
-
- printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit ip_nat_helper_pptp_fini(void)
-{
- DEBUGP("cleanup_module\n" );
-
- rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL);
- rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL);
- rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL);
- rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL);
- synchronize_rcu();
-
- ip_nat_proto_gre_fini();
-
- printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(ip_nat_helper_pptp_init);
-module_exit(ip_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
deleted file mode 100644
index cfaeea38314..00000000000
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/* IRC extension for TCP NAT alteration.
- * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
- * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * based on a copy of RR's ip_nat_ftp.c
- *
- * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/kernel.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/moduleparam.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("IRC (DCC) NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp)
-{
- u_int16_t port;
- unsigned int ret;
-
- /* "4294967296 65635 " */
- char buffer[18];
-
- DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
- expect->seq, exp_irc_info->len,
- ntohl(tcph->seq));
-
- /* Reply comes from server. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_REPLY;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
- * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
- * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
- * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
- * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
- * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
- * 255.255.255.255==4294967296, 10 digits)
- * P: bound port (min 1 d, max 5d (65635))
- * F: filename (min 1 d )
- * S: size (min 1 d )
- * 0x01, \n: terminators
- */
-
- /* AAA = "us", ie. where server normally talks to. */
- sprintf(buffer, "%u %u",
- ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
- port);
- DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
- buffer, NIPQUAD(exp->tuple.src.ip), port);
-
- ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
- matchoff, matchlen, buffer,
- strlen(buffer));
- if (ret != NF_ACCEPT)
- ip_conntrack_unexpect_related(exp);
- return ret;
-}
-
-static void __exit ip_nat_irc_fini(void)
-{
- rcu_assign_pointer(ip_nat_irc_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_irc_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_irc_hook));
- rcu_assign_pointer(ip_nat_irc_hook, help);
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_irc_init);
-module_exit(ip_nat_irc_fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 95810202d84..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
- __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- __be16 key;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- key = tuple->src.u.gre.key;
- else
- key = tuple->dst.u.gre.key;
-
- return ntohs(key) >= ntohs(min->gre.key)
- && ntohs(key) <= ntohs(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t key;
- __be16 *keyptr;
- unsigned int min, i, range_size;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- keyptr = &tuple->src.u.gre.key;
- else
- keyptr = &tuple->dst.u.gre.key;
-
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- DEBUGP("%p: NATing GRE PPTP\n", conntrack);
- min = 1;
- range_size = 0xffff;
- } else {
- min = ntohs(range->min.gre.key);
- range_size = ntohs(range->max.gre.key) - min + 1;
- }
-
- DEBUGP("min = %u, range_size = %u\n", min, range_size);
-
- for (i = 0; i < range_size; i++, key++) {
- *keyptr = htons(min + key % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
-
- DEBUGP("%p: no NAT mapping\n", conntrack);
-
- return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct gre_hdr *greh;
- struct gre_hdr_pptp *pgreh;
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- unsigned int hdroff = iphdroff + iph->ihl*4;
-
- /* pgreh includes two optional 32bit fields which are not required
- * to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
- return 0;
-
- greh = (void *)(*pskb)->data + hdroff;
- pgreh = (struct gre_hdr_pptp *) greh;
-
- /* we only have destination manip of a packet, since 'source key'
- * is not present in the packet itself */
- if (maniptype == IP_NAT_MANIP_DST) {
- /* key manipulation is always dest */
- switch (greh->version) {
- case 0:
- if (!greh->key) {
- DEBUGP("can't nat GRE w/o key\n");
- break;
- }
- if (greh->csum) {
- /* FIXME: Never tested this code... */
- nf_proto_csum_replace4(gre_csum(greh), *pskb,
- *(gre_key(greh)),
- tuple->dst.u.gre.key, 0);
- }
- *(gre_key(greh)) = tuple->dst.u.gre.key;
- break;
- case GRE_VERSION_PPTP:
- DEBUGP("call_id -> 0x%04x\n",
- ntohs(tuple->dst.u.gre.key));
- pgreh->call_id = tuple->dst.u.gre.key;
- break;
- default:
- DEBUGP("can't nat unknown GRE version\n");
- return 0;
- break;
- }
- }
- return 1;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = {
- .name = "GRE",
- .protonum = IPPROTO_GRE,
- .manip_pkt = gre_manip_pkt,
- .in_range = gre_in_range,
- .unique_tuple = gre_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
-
-int __init ip_nat_proto_gre_init(void)
-{
- return ip_nat_protocol_register(&gre);
-}
-
-void __exit ip_nat_proto_gre_fini(void)
-{
- ip_nat_protocol_unregister(&gre);
-}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
deleted file mode 100644
index 22a528ae038..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-icmp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
- ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static int
-icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xFFFF;
-
- for (i = 0; i < range_size; i++, id++) {
- tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
- (id % range_size));
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
- return 0;
-}
-
-static int
-icmp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct icmphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
-
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
- return 0;
-
- hdr = (struct icmphdr *)((*pskb)->data + hdroff);
- nf_proto_csum_replace2(&hdr->checksum, *pskb,
- hdr->un.echo.id, tuple->src.u.icmp.id, 0);
- hdr->un.echo.id = tuple->src.u.icmp.id;
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_icmp = {
- .name = "ICMP",
- .protonum = IPPROTO_ICMP,
- .me = THIS_MODULE,
- .manip_pkt = icmp_manip_pkt,
- .in_range = icmp_in_range,
- .unique_tuple = icmp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
deleted file mode 100644
index 14ff24f53a7..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-
-static int
-tcp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.tcp.port;
- else
- port = tuple->dst.u.tcp.port;
-
- return ntohs(port) >= ntohs(min->tcp.port)
- && ntohs(port) <= ntohs(max->tcp.port);
-}
-
-static int
-tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t port;
- __be16 *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.tcp.port;
- else
- portptr = &tuple->dst.u.tcp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- /* Map privileged onto privileged. */
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.tcp.port);
- range_size = ntohs(range->max.tcp.port) - min + 1;
- }
-
- /* Start from random port to avoid prediction */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- port = net_random();
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack)) {
- return 1;
- }
- }
- return 0;
-}
-
-static int
-tcp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct tcphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport, oldport;
- int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
- /* this could be a inner header returned in icmp packet; in such
- cases we cannot update the checksum field since it is outside of
- the 8 bytes of transport layer headers we are guaranteed */
- if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
- hdrsize = sizeof(struct tcphdr);
-
- if (!skb_make_writable(pskb, hdroff + hdrsize))
- return 0;
-
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct tcphdr *)((*pskb)->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.ip;
- newport = tuple->src.u.tcp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.ip;
- newport = tuple->dst.u.tcp.port;
- portptr = &hdr->dest;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return 1;
-
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_tcp = {
- .name = "TCP",
- .protonum = IPPROTO_TCP,
- .me = THIS_MODULE,
- .manip_pkt = tcp_manip_pkt,
- .in_range = tcp_in_range,
- .unique_tuple = tcp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
deleted file mode 100644
index dfd52167289..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-udp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.udp.port;
- else
- port = tuple->dst.u.udp.port;
-
- return ntohs(port) >= ntohs(min->udp.port)
- && ntohs(port) <= ntohs(max->udp.port);
-}
-
-static int
-udp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t port;
- __be16 *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.udp.port;
- else
- portptr = &tuple->dst.u.udp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.udp.port);
- range_size = ntohs(range->max.udp.port) - min + 1;
- }
-
- /* Start from random port to avoid prediction */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- port = net_random();
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
- return 0;
-}
-
-static int
-udp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport;
-
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
- return 0;
-
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct udphdr *)((*pskb)->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.ip;
- newport = tuple->src.u.udp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.ip;
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
-
- if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0);
- if (!hdr->check)
- hdr->check = CSUM_MANGLED_0;
- }
- *portptr = newport;
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_udp = {
- .name = "UDP",
- .protonum = IPPROTO_UDP,
- .me = THIS_MODULE,
- .manip_pkt = udp_manip_pkt,
- .in_range = udp_in_range,
- .unique_tuple = udp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
deleted file mode 100644
index 3bf04951724..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* The "unknown" protocol. This is what is used for protocols we
- * don't understand. It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type manip_type,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- return 1;
-}
-
-static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- /* Sorry: we can't help you; if it's not unique, we can't frob
- anything. */
- return 0;
-}
-
-static int
-unknown_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_unknown_protocol = {
- .name = "unknown",
- /* .me isn't set: getting a ref to this cannot fail. */
- .manip_pkt = unknown_manip_pkt,
- .in_range = unknown_in_range,
- .unique_tuple = unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
deleted file mode 100644
index 080eb1d9220..00000000000
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
-
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} nat_initial_table __initdata
-= { { "nat", NAT_VALID_HOOKS, 4,
- sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- 0, NULL, { } },
- {
- /* PRE_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* POST_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* LOCAL_OUT */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } }
- },
- /* ERROR */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_error),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
- { } },
- "ERROR"
- }
- }
-};
-
-static struct xt_table nat_table = {
- .name = "nat",
- .valid_hooks = NAT_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
- .me = THIS_MODULE,
- .af = AF_INET,
-};
-
-/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const struct xt_target *target,
- const void *targinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
- IP_NF_ASSERT(out);
-
- return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
-{
- static int warned = 0;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
- struct rtable *rt;
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return;
-
- if (rt->rt_src != srcip && !warned) {
- printk("NAT: no longer support implicit source local NAT\n");
- printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
- NIPQUAD(srcip), NIPQUAD(dstip));
- warned = 1;
- }
- ip_rt_put(rt);
-}
-
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const struct xt_target *target,
- const void *targinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_LOCAL_OUT);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- if (hooknum == NF_IP_LOCAL_OUT
- && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle((*pskb)->nh.iph->daddr,
- mr->range[0].min_ip);
-
- return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-static int ipt_snat_checkentry(const char *tablename,
- const void *entry,
- const struct xt_target *target,
- void *targinfo,
- unsigned int hook_mask)
-{
- struct ip_nat_multi_range_compat *mr = targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("SNAT: multiple ranges no longer supported\n");
- return 0;
- }
- return 1;
-}
-
-static int ipt_dnat_checkentry(const char *tablename,
- const void *entry,
- const struct xt_target *target,
- void *targinfo,
- unsigned int hook_mask)
-{
- struct ip_nat_multi_range_compat *mr = targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("DNAT: multiple ranges no longer supported\n");
- return 0;
- }
- if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
- printk("DNAT: port randomization not supported\n");
- return 0;
- }
- return 1;
-}
-
-inline unsigned int
-alloc_null_binding(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
- unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- Use reply in case it's already been mangled (eg local packet).
- */
- __be32 ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- struct ip_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
-
- DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
- NIPQUAD(ip));
- return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-unsigned int
-alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
- unsigned int hooknum)
-{
- __be32 ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- u_int16_t all
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
- struct ip_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
-
- DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
- conntrack, NIPQUAD(ip));
- return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-int ip_nat_rule_find(struct sk_buff **pskb,
- unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- int ret;
-
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
-
- if (ret == NF_ACCEPT) {
- if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
- /* NUL mapping */
- ret = alloc_null_binding(ct, info, hooknum);
- }
- return ret;
-}
-
-static struct xt_target ipt_snat_reg = {
- .name = "SNAT",
- .family = AF_INET,
- .target = ipt_snat_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
- .table = "nat",
- .hooks = 1 << NF_IP_POST_ROUTING,
- .checkentry = ipt_snat_checkentry,
-};
-
-static struct xt_target ipt_dnat_reg = {
- .name = "DNAT",
- .family = AF_INET,
- .target = ipt_dnat_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
- .checkentry = ipt_dnat_checkentry,
-};
-
-int __init ip_nat_rule_init(void)
-{
- int ret;
-
- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
- if (ret != 0)
- return ret;
- ret = xt_register_target(&ipt_snat_reg);
- if (ret != 0)
- goto unregister_table;
-
- ret = xt_register_target(&ipt_dnat_reg);
- if (ret != 0)
- goto unregister_snat;
-
- return ret;
-
- unregister_snat:
- xt_unregister_target(&ipt_snat_reg);
- unregister_table:
- xt_unregister_table(&nat_table);
-
- return ret;
-}
-
-void ip_nat_rule_cleanup(void)
-{
- xt_unregister_target(&ipt_dnat_reg);
- xt_unregister_target(&ipt_snat_reg);
- ipt_unregister_table(&nat_table);
-}
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
deleted file mode 100644
index 325c5a9dc2e..00000000000
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/* SIP extension for UDP NAT alteration.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_nat_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-struct addr_map {
- struct {
- char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned int srclen, srciplen;
- unsigned int dstlen, dstiplen;
- } addr[IP_CT_DIR_MAX];
-};
-
-static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map)
-{
- struct ip_conntrack_tuple *t;
- enum ip_conntrack_dir dir;
- unsigned int n;
-
- for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
- t = &ct->tuplehash[dir].tuple;
-
- n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
- NIPQUAD(t->src.ip));
- map->addr[dir].srciplen = n;
- n += sprintf(map->addr[dir].src + n, ":%u",
- ntohs(t->src.u.udp.port));
- map->addr[dir].srclen = n;
-
- n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
- NIPQUAD(t->dst.ip));
- map->addr[dir].dstiplen = n;
- n += sprintf(map->addr[dir].dst + n, ":%u",
- ntohs(t->dst.u.udp.port));
- map->addr[dir].dstlen = n;
- }
-}
-
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct, const char **dptr, size_t dlen,
- enum sip_header_pos pos, struct addr_map *map)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned int matchlen, matchoff, addrlen;
- char *addr;
-
- if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
- return 1;
-
- if ((matchlen == map->addr[dir].srciplen ||
- matchlen == map->addr[dir].srclen) &&
- memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
- addr = map->addr[!dir].dst;
- addrlen = map->addr[!dir].dstlen;
- } else if ((matchlen == map->addr[dir].dstiplen ||
- matchlen == map->addr[dir].dstlen) &&
- memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
- addr = map->addr[!dir].src;
- addrlen = map->addr[!dir].srclen;
- } else
- return 1;
-
- if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen, addr, addrlen))
- return 0;
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- return 1;
-
-}
-
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr)
-{
- enum sip_header_pos pos;
- struct addr_map map;
- int dataoff, datalen;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- datalen = (*pskb)->len - dataoff;
- if (datalen < sizeof("SIP/2.0") - 1)
- return NF_DROP;
-
- addr_map_init(ct, &map);
-
- /* Basic rules: requests and responses. */
- if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
- /* 10.2: Constructing the REGISTER Request:
- *
- * The "userinfo" and "@" components of the SIP URI MUST NOT
- * be present.
- */
- if (datalen >= sizeof("REGISTER") - 1 &&
- strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
- pos = POS_REG_REQ_URI;
- else
- pos = POS_REQ_URI;
-
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
- return NF_DROP;
- }
-
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char **dptr, size_t dlen,
- char *buffer, int bufflen,
- enum sip_header_pos pos)
-{
- unsigned int matchlen, matchoff;
-
- if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
- return 0;
-
- if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen, buffer, bufflen))
- return 0;
-
- /* We need to reload this. Thanks Patrick. */
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- return 1;
-}
-
-static int mangle_content_len(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- const char *dptr)
-{
- unsigned int dataoff, matchoff, matchlen;
- char buffer[sizeof("65536")];
- int bufflen;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-
- /* Get actual SDP lenght */
- if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
- &matchlen, POS_SDP_HEADER) > 0) {
-
- /* since ct_sip_get_info() give us a pointer passing 'v='
- we need to add 2 bytes in this count. */
- int c_len = (*pskb)->len - dataoff - matchoff + 2;
-
- /* Now, update SDP lenght */
- if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
- &matchlen, POS_CONTENT) > 0) {
-
- bufflen = sprintf(buffer, "%u", c_len);
-
- return ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- matchoff, matchlen,
- buffer, bufflen);
- }
- }
- return 0;
-}
-
-static unsigned int mangle_sdp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack *ct,
- __be32 newip, u_int16_t port,
- const char *dptr)
-{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
- unsigned int dataoff, bufflen;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-
- /* Mangle owner and contact info. */
- bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, POS_OWNER))
- return 0;
-
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, POS_CONNECTION))
- return 0;
-
- /* Mangle media port. */
- bufflen = sprintf(buffer, "%u", port);
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
- buffer, bufflen, POS_MEDIA))
- return 0;
-
- return mangle_content_len(pskb, ctinfo, ct, dptr);
-}
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp,
- const char *dptr)
-{
- struct ip_conntrack *ct = exp->master;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- __be32 newip;
- u_int16_t port;
-
- DEBUGP("ip_nat_sdp():\n");
-
- /* Connection will come from reply */
- newip = ct->tuplehash[!dir].tuple.dst.ip;
-
- exp->tuple.dst.ip = newip;
- exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
- exp->tuple.dst.u.udp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
- ip_conntrack_unexpect_related(exp);
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-static void __exit fini(void)
-{
- rcu_assign_pointer(ip_nat_sip_hook, NULL);
- rcu_assign_pointer(ip_nat_sdp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_sip_hook));
- BUG_ON(rcu_dereference(ip_nat_sdp_hook));
- rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip);
- rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp);
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
deleted file mode 100644
index e41d0efae51..00000000000
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ /dev/null
@@ -1,1333 +0,0 @@
-/*
- * ip_nat_snmp_basic.c
- *
- * Basic SNMP Application Layer Gateway
- *
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
- * conflicting private address realms.
- *
- * Static NAT is used to remap the networks from the view of the network
- * management system at the IP layer, and this module remaps some application
- * layer addresses to match.
- *
- * The simplest form of ALG is performed, where only tagged IP addresses
- * are modified. The module does not need to be MIB aware and only scans
- * messages at the ASN.1/BER level.
- *
- * Currently, only SNMPv1 and SNMPv2 are supported.
- *
- * More information on ALG and associated issues can be found in
- * RFC 2962
- *
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
- * McLean & Jochen Friedrich, stripped down for use in the kernel.
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
- */
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-#include <asm/uaccess.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
-
-#define SNMP_PORT 161
-#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)n)
-
-static int debug;
-static DEFINE_SPINLOCK(snmp_lock);
-
-/*
- * Application layer address mapping mimics the NAT mapping, but
- * only for the first octet in this case (a more flexible system
- * can be implemented if needed).
- */
-struct oct1_map
-{
- u_int8_t from;
- u_int8_t to;
-};
-
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI 0 /* Universal */
-#define ASN1_APL 1 /* Application */
-#define ASN1_CTX 2 /* Context */
-#define ASN1_PRV 3 /* Private */
-
-/* Tag */
-#define ASN1_EOC 0 /* End Of Contents */
-#define ASN1_BOL 1 /* Boolean */
-#define ASN1_INT 2 /* Integer */
-#define ASN1_BTS 3 /* Bit String */
-#define ASN1_OTS 4 /* Octet String */
-#define ASN1_NUL 5 /* Null */
-#define ASN1_OJI 6 /* Object Identifier */
-#define ASN1_OJD 7 /* Object Description */
-#define ASN1_EXT 8 /* External */
-#define ASN1_SEQ 16 /* Sequence */
-#define ASN1_SET 17 /* Set */
-#define ASN1_NUMSTR 18 /* Numerical String */
-#define ASN1_PRNSTR 19 /* Printable String */
-#define ASN1_TEXSTR 20 /* Teletext String */
-#define ASN1_VIDSTR 21 /* Video String */
-#define ASN1_IA5STR 22 /* IA5 String */
-#define ASN1_UNITIM 23 /* Universal Time */
-#define ASN1_GENTIM 24 /* General Time */
-#define ASN1_GRASTR 25 /* Graphical String */
-#define ASN1_VISSTR 26 /* Visible String */
-#define ASN1_GENSTR 27 /* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI 0 /* Primitive */
-#define ASN1_CON 1 /* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR 0
-#define ASN1_ERR_DEC_EMPTY 2
-#define ASN1_ERR_DEC_EOC_MISMATCH 3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
-#define ASN1_ERR_DEC_BADVALUE 5
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx
-{
- int error; /* Error condition */
- unsigned char *pointer; /* Octet just to be decoded */
- unsigned char *begin; /* First octet */
- unsigned char *end; /* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr
-{
- unsigned char *data;
- unsigned int len;
-};
-
-static void asn1_open(struct asn1_ctx *ctx,
- unsigned char *buf,
- unsigned int len)
-{
- ctx->begin = buf;
- ctx->end = buf + len;
- ctx->pointer = buf;
- ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
- if (ctx->pointer >= ctx->end) {
- ctx->error = ASN1_ERR_DEC_EMPTY;
- return 0;
- }
- *ch = *(ctx->pointer)++;
- return 1;
-}
-
-static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
- unsigned char ch;
-
- *tag = 0;
-
- do
- {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *tag <<= 7;
- *tag |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned char ch;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *cls = (ch & 0xC0) >> 6;
- *con = (ch & 0x20) >> 5;
- *tag = (ch & 0x1F);
-
- if (*tag == 0x1F) {
- if (!asn1_tag_decode(ctx, tag))
- return 0;
- }
- return 1;
-}
-
-static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
- unsigned int *def,
- unsigned int *len)
-{
- unsigned char ch, cnt;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch == 0x80)
- *def = 0;
- else {
- *def = 1;
-
- if (ch < 0x80)
- *len = ch;
- else {
- cnt = (unsigned char) (ch & 0x7F);
- *len = 0;
-
- while (cnt > 0) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *len <<= 8;
- *len |= ch;
- cnt--;
- }
- }
- }
- return 1;
-}
-
-static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
- unsigned char **eoc,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned int def, len;
-
- if (!asn1_id_decode(ctx, cls, con, tag))
- return 0;
-
- def = len = 0;
- if (!asn1_length_decode(ctx, &def, &len))
- return 0;
-
- if (def)
- *eoc = ctx->pointer + len;
- else
- *eoc = NULL;
- return 1;
-}
-
-static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- unsigned char ch;
-
- if (eoc == 0) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
- return 1;
- } else {
- if (ctx->pointer != eoc) {
- ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
- return 0;
- }
- return 1;
- }
-}
-
-static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- ctx->pointer = eoc;
- return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = (signed char) ch;
- len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned int *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned int)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned char **octets,
- unsigned int *len)
-{
- unsigned char *ptr;
-
- *len = 0;
-
- *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
- if (*octets == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
-
- ptr = *octets;
- while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
- kfree(*octets);
- *octets = NULL;
- return 0;
- }
- (*len)++;
- }
- return 1;
-}
-
-static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
- unsigned long *subid)
-{
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long **oid,
- unsigned int *len)
-{
- unsigned long subid;
- unsigned int size;
- unsigned long *optr;
-
- size = eoc - ctx->pointer + 1;
- *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
- if (*oid == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
-
- optr = *oid;
-
- if (!asn1_subid_decode(ctx, &subid)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (subid < 40) {
- optr [0] = 0;
- optr [1] = subid;
- } else if (subid < 80) {
- optr [0] = 1;
- optr [1] = subid - 40;
- } else {
- optr [0] = 2;
- optr [1] = subid - 80;
- }
-
- *len = 2;
- optr += 2;
-
- while (ctx->pointer < eoc) {
- if (++(*len) > size) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (!asn1_subid_decode(ctx, optr++)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
- }
- return 1;
-}
-
-/*****************************************************************************
- *
- * SNMP decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* SNMP Versions */
-#define SNMP_V1 0
-#define SNMP_V2C 1
-#define SNMP_V2 2
-#define SNMP_V3 3
-
-/* Default Sizes */
-#define SNMP_SIZE_COMM 256
-#define SNMP_SIZE_OBJECTID 128
-#define SNMP_SIZE_BUFCHR 256
-#define SNMP_SIZE_BUFINT 128
-#define SNMP_SIZE_SMALLOBJECTID 16
-
-/* Requests */
-#define SNMP_PDU_GET 0
-#define SNMP_PDU_NEXT 1
-#define SNMP_PDU_RESPONSE 2
-#define SNMP_PDU_SET 3
-#define SNMP_PDU_TRAP1 4
-#define SNMP_PDU_BULK 5
-#define SNMP_PDU_INFORM 6
-#define SNMP_PDU_TRAP2 7
-
-/* Errors */
-#define SNMP_NOERROR 0
-#define SNMP_TOOBIG 1
-#define SNMP_NOSUCHNAME 2
-#define SNMP_BADVALUE 3
-#define SNMP_READONLY 4
-#define SNMP_GENERROR 5
-#define SNMP_NOACCESS 6
-#define SNMP_WRONGTYPE 7
-#define SNMP_WRONGLENGTH 8
-#define SNMP_WRONGENCODING 9
-#define SNMP_WRONGVALUE 10
-#define SNMP_NOCREATION 11
-#define SNMP_INCONSISTENTVALUE 12
-#define SNMP_RESOURCEUNAVAILABLE 13
-#define SNMP_COMMITFAILED 14
-#define SNMP_UNDOFAILED 15
-#define SNMP_AUTHORIZATIONERROR 16
-#define SNMP_NOTWRITABLE 17
-#define SNMP_INCONSISTENTNAME 18
-
-/* General SNMP V1 Traps */
-#define SNMP_TRAP_COLDSTART 0
-#define SNMP_TRAP_WARMSTART 1
-#define SNMP_TRAP_LINKDOWN 2
-#define SNMP_TRAP_LINKUP 3
-#define SNMP_TRAP_AUTFAILURE 4
-#define SNMP_TRAP_EQPNEIGHBORLOSS 5
-#define SNMP_TRAP_ENTSPECIFIC 6
-
-/* SNMPv1 Types */
-#define SNMP_NULL 0
-#define SNMP_INTEGER 1 /* l */
-#define SNMP_OCTETSTR 2 /* c */
-#define SNMP_DISPLAYSTR 2 /* c */
-#define SNMP_OBJECTID 3 /* ul */
-#define SNMP_IPADDR 4 /* uc */
-#define SNMP_COUNTER 5 /* ul */
-#define SNMP_GAUGE 6 /* ul */
-#define SNMP_TIMETICKS 7 /* ul */
-#define SNMP_OPAQUE 8 /* c */
-
-/* Additional SNMPv2 Types */
-#define SNMP_UINTEGER 5 /* ul */
-#define SNMP_BITSTR 9 /* uc */
-#define SNMP_NSAP 10 /* uc */
-#define SNMP_COUNTER64 11 /* ul */
-#define SNMP_NOSUCHOBJECT 12
-#define SNMP_NOSUCHINSTANCE 13
-#define SNMP_ENDOFMIBVIEW 14
-
-union snmp_syntax
-{
- unsigned char uc[0]; /* 8 bit unsigned */
- char c[0]; /* 8 bit signed */
- unsigned long ul[0]; /* 32 bit unsigned */
- long l[0]; /* 32 bit signed */
-};
-
-struct snmp_object
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned short type;
- unsigned int syntax_len;
- union snmp_syntax syntax;
-};
-
-struct snmp_request
-{
- unsigned long id;
- unsigned int error_status;
- unsigned int error_index;
-};
-
-struct snmp_v1_trap
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned long ip_address; /* pointer */
- unsigned int general;
- unsigned int specific;
- unsigned long time;
-};
-
-/* SNMP types */
-#define SNMP_IPA 0
-#define SNMP_CNT 1
-#define SNMP_GGE 2
-#define SNMP_TIT 3
-#define SNMP_OPQ 4
-#define SNMP_C64 6
-
-/* SNMP errors */
-#define SERR_NSO 0
-#define SERR_NSI 1
-#define SERR_EOM 2
-
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check);
-struct snmp_cnv
-{
- unsigned int class;
- unsigned int tag;
- int syntax;
-};
-
-static struct snmp_cnv snmp_conv [] =
-{
- {ASN1_UNI, ASN1_NUL, SNMP_NULL},
- {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
- {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
- {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
- {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
- {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
- {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
- {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
- {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
- {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
- /* SNMPv2 data types and errors */
- {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
- {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
- {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
- {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
- {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
- {0, 0, -1}
-};
-
-static unsigned char snmp_tag_cls2syntax(unsigned int tag,
- unsigned int cls,
- unsigned short *syntax)
-{
- struct snmp_cnv *cnv;
-
- cnv = snmp_conv;
-
- while (cnv->syntax != -1) {
- if (cnv->tag == tag && cnv->class == cls) {
- *syntax = cnv->syntax;
- return 1;
- }
- cnv++;
- }
- return 0;
-}
-
-static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
- struct snmp_object **obj)
-{
- unsigned int cls, con, tag, len, idlen;
- unsigned short type;
- unsigned char *eoc, *end, *p;
- unsigned long *lp, *id;
- unsigned long ul;
- long l;
-
- *obj = NULL;
- id = NULL;
-
- if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &id, &idlen))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
- kfree(id);
- return 0;
- }
-
- if (con != ASN1_PRI) {
- kfree(id);
- return 0;
- }
-
- type = 0;
- if (!snmp_tag_cls2syntax(tag, cls, &type)) {
- kfree(id);
- return 0;
- }
-
- l = 0;
- switch (type) {
- case SNMP_INTEGER:
- len = sizeof(long);
- if (!asn1_long_decode(ctx, end, &l)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.l[0] = l;
- break;
- case SNMP_OCTETSTR:
- case SNMP_OPAQUE:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.c, p, len);
- kfree(p);
- break;
- case SNMP_NULL:
- case SNMP_NOSUCHOBJECT:
- case SNMP_NOSUCHINSTANCE:
- case SNMP_ENDOFMIBVIEW:
- len = 0;
- *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- if (!asn1_null_decode(ctx, end)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- break;
- case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
- kfree(id);
- return 0;
- }
- len *= sizeof(unsigned long);
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(lp);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.ul, lp, len);
- kfree(lp);
- break;
- case SNMP_IPADDR:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- if (len != 4) {
- kfree(p);
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.uc, p, len);
- kfree(p);
- break;
- case SNMP_COUNTER:
- case SNMP_GAUGE:
- case SNMP_TIMETICKS:
- len = sizeof(unsigned long);
- if (!asn1_ulong_decode(ctx, end, &ul)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.ul[0] = ul;
- break;
- default:
- kfree(id);
- return 0;
- }
-
- (*obj)->syntax_len = len;
- (*obj)->type = type;
- (*obj)->id = id;
- (*obj)->id_len = idlen;
-
- if (!asn1_eoc_decode(ctx, eoc)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- return 1;
-}
-
-static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
- struct snmp_request *request)
-{
- unsigned int cls, con, tag;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_ulong_decode(ctx, end, &request->id))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_status))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_index))
- return 0;
-
- return 1;
-}
-
-/*
- * Fast checksum update for possibly oddly-aligned UDP byte, from the
- * code example in the draft.
- */
-static void fast_csum(__sum16 *csum,
- const unsigned char *optr,
- const unsigned char *nptr,
- int offset)
-{
- unsigned char s[4];
-
- if (offset & 1) {
- s[0] = s[2] = 0;
- s[1] = ~*optr;
- s[3] = *nptr;
- } else {
- s[1] = s[3] = 0;
- s[0] = ~*optr;
- s[2] = *nptr;
- }
-
- *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-}
-
-/*
- * Mangle IP address.
- * - begin points to the start of the snmp messgae
- * - addr points to the start of the address
- */
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check)
-{
- if (map->from == NOCT1(addr)) {
- u_int32_t old;
-
- if (debug)
- memcpy(&old, (unsigned char *)addr, sizeof(old));
-
- *addr = map->to;
-
- /* Update UDP checksum if being used */
- if (*check) {
- fast_csum(check,
- &map->from, &map->to, addr - begin);
- }
-
- if (debug)
- printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
- "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
- }
-}
-
-static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
- struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned int cls, con, tag, len;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_id_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
- goto err_id_free;
-
- if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
- goto err_id_free;
-
- /* IPv4 only */
- if (len != 4)
- goto err_addr_free;
-
- mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->general))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->specific))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
- goto err_addr_free;
-
- if (!asn1_ulong_decode(ctx, end, &trap->time))
- goto err_addr_free;
-
- return 1;
-
-err_addr_free:
- kfree((unsigned long *)trap->ip_address);
-
-err_id_free:
- kfree(trap->id);
-
- return 0;
-}
-
-/*****************************************************************************
- *
- * Misc. routines
- *
- *****************************************************************************/
-
-static void hex_dump(unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
-/*
- * Parse and mangle SNMP message according to mapping.
- * (And this is the fucking 'basic' method).
- */
-static int snmp_parse_mangle(unsigned char *msg,
- u_int16_t len,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned char *eoc, *end;
- unsigned int cls, con, tag, vers, pdutype;
- struct asn1_ctx ctx;
- struct asn1_octstr comm;
- struct snmp_object **obj;
-
- if (debug > 1)
- hex_dump(msg, len);
-
- asn1_open(&ctx, msg, len);
-
- /*
- * Start of SNMP message.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- /*
- * Version 1 or 2 handled.
- */
- if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
- if (!asn1_uint_decode (&ctx, end, &vers))
- return 0;
- if (debug > 1)
- printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
- if (vers > 1)
- return 1;
-
- /*
- * Community.
- */
- if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
- return 0;
- if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
- return 0;
- if (debug > 1) {
- unsigned int i;
-
- printk(KERN_DEBUG "bsalg: community: ");
- for (i = 0; i < comm.len; i++)
- printk("%c", comm.data[i]);
- printk("\n");
- }
- kfree(comm.data);
-
- /*
- * PDU type
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
- return 0;
- if (cls != ASN1_CTX || con != ASN1_CON)
- return 0;
- if (debug > 1) {
- unsigned char *pdus[] = {
- [SNMP_PDU_GET] = "get",
- [SNMP_PDU_NEXT] = "get-next",
- [SNMP_PDU_RESPONSE] = "response",
- [SNMP_PDU_SET] = "set",
- [SNMP_PDU_TRAP1] = "trapv1",
- [SNMP_PDU_BULK] = "bulk",
- [SNMP_PDU_INFORM] = "inform",
- [SNMP_PDU_TRAP2] = "trapv2"
- };
-
- if (pdutype > SNMP_PDU_TRAP2)
- printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
- else
- printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
- }
- if (pdutype != SNMP_PDU_RESPONSE &&
- pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
- return 1;
-
- /*
- * Request header or v1 trap
- */
- if (pdutype == SNMP_PDU_TRAP1) {
- struct snmp_v1_trap trap;
- unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
- if (ret) {
- kfree(trap.id);
- kfree((unsigned long *)trap.ip_address);
- } else
- return ret;
-
- } else {
- struct snmp_request req;
-
- if (!snmp_request_decode(&ctx, &req))
- return 0;
-
- if (debug > 1)
- printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
- "error_index=%u\n", req.id, req.error_status,
- req.error_index);
- }
-
- /*
- * Loop through objects, look for IP addresses to mangle.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (obj == NULL) {
- if (net_ratelimit())
- printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
- return 0;
- }
-
- while (!asn1_eoc_decode(&ctx, eoc)) {
- unsigned int i;
-
- if (!snmp_object_decode(&ctx, obj)) {
- if (*obj) {
- kfree((*obj)->id);
- kfree(*obj);
- }
- kfree(obj);
- return 0;
- }
-
- if (debug > 1) {
- printk(KERN_DEBUG "bsalg: object: ");
- for (i = 0; i < (*obj)->id_len; i++) {
- if (i > 0)
- printk(".");
- printk("%lu", (*obj)->id[i]);
- }
- printk(": type=%u\n", (*obj)->type);
-
- }
-
- if ((*obj)->type == SNMP_IPADDR)
- mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
-
- kfree((*obj)->id);
- kfree(*obj);
- }
- kfree(obj);
-
- if (!asn1_eoc_decode(&ctx, eoc))
- return 0;
-
- return 1;
-}
-
-/*****************************************************************************
- *
- * NAT routines.
- *
- *****************************************************************************/
-
-/*
- * SNMP translation routine.
- */
-static int snmp_translate(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
- u_int16_t udplen = ntohs(udph->len);
- u_int16_t paylen = udplen - sizeof(struct udphdr);
- int dir = CTINFO2DIR(ctinfo);
- struct oct1_map map;
-
- /*
- * Determine mappping for application layer addresses based
- * on NAT manipulations for the packet.
- */
- if (dir == IP_CT_DIR_ORIGINAL) {
- /* SNAT traps */
- map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
- map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
- } else {
- /* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
- }
-
- if (map.from == map.to)
- return NF_ACCEPT;
-
- if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
- paylen, &map, &udph->check)) {
- if (net_ratelimit())
- printk(KERN_WARNING "bsalg: parser failed\n");
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We don't actually set up expectations, just adjust internal IP
- * addresses if this is being NATted */
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir = CTINFO2DIR(ctinfo);
- unsigned int ret;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
-
- /* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
- return NF_ACCEPT;
- if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* No NAT? */
- if (!(ct->status & IPS_NAT_MASK))
- return NF_ACCEPT;
-
- /*
- * Make sure the packet length is ok. So far, we were only guaranteed
- * to have a valid length IP header plus 8 bytes, which means we have
- * enough room for a UDP header. Just verify the UDP length field so we
- * can mess around with the payload.
- */
- if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
- if (net_ratelimit())
- printk(KERN_WARNING "SNMP: dropping malformed packet "
- "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
- NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
- return NF_DROP;
- }
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return NF_DROP;
-
- spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, pskb);
- spin_unlock_bh(&snmp_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper snmp_helper = {
- .max_expected = 0,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "snmp",
-
- .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}},
- .dst = {.protonum = IPPROTO_UDP},
- },
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF},
- },
-};
-
-static struct ip_conntrack_helper snmp_trap_helper = {
- .max_expected = 0,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "snmp_trap",
-
- .tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}},
- .dst = {.protonum = IPPROTO_UDP},
- },
- .mask = {.src = {.u = {0xFFFF}},
- .dst = {.protonum = 0xFF},
- },
-};
-
-/*****************************************************************************
- *
- * Module stuff.
- *
- *****************************************************************************/
-
-static int __init ip_nat_snmp_basic_init(void)
-{
- int ret = 0;
-
- ret = ip_conntrack_helper_register(&snmp_helper);
- if (ret < 0)
- return ret;
- ret = ip_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- ip_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
-}
-
-static void __exit ip_nat_snmp_basic_fini(void)
-{
- ip_conntrack_helper_unregister(&snmp_helper);
- ip_conntrack_helper_unregister(&snmp_trap_helper);
-}
-
-module_init(ip_nat_snmp_basic_init);
-module_exit(ip_nat_snmp_basic_fini);
-
-module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
deleted file mode 100644
index 6bcfdf6dfcc..00000000000
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/* This file contains all the functions required for the standalone
- ip_nat module.
-
- These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
- struct ip_conntrack *ct;
- struct ip_conntrack_tuple *t;
- enum ip_conntrack_info ctinfo;
- enum ip_conntrack_dir dir;
- unsigned long statusbit;
-
- ct = ip_conntrack_get(skb, &ctinfo);
- if (ct == NULL)
- return;
- dir = CTINFO2DIR(ctinfo);
- t = &ct->tuplehash[dir].tuple;
-
- if (dir == IP_CT_DIR_ORIGINAL)
- statusbit = IPS_DST_NAT;
- else
- statusbit = IPS_SRC_NAT;
-
- if (ct->status & statusbit) {
- fl->fl4_dst = t->dst.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
- fl->fl_ip_dport = t->dst.u.tcp.port;
- }
-
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- fl->fl4_src = t->src.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
- fl->fl_ip_sport = t->src.u.tcp.port;
- }
-}
-#endif
-
-static unsigned int
-ip_nat_fn(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- struct ip_nat_info *info;
- /* maniptype == SRC for postrouting. */
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- /* We never see fragments: conntrack defrags on pre-routing
- and local-out, and ip_nat_out protects post-routing. */
- IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
- & htons(IP_MF|IP_OFFSET)));
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- have dropped it. Hence it's the user's responsibilty to
- packet filter it out, or implement conntrack/NAT for that
- protocol. 8) --RR */
- if (!ct) {
- /* Exception: ICMP redirect to new connection (not in
- hash table yet). We must not let this through, in
- case we're doing NAT to the same network. */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4,
- sizeof(_hdr), &_hdr);
- if (hp != NULL &&
- hp->type == ICMP_REDIRECT)
- return NF_DROP;
- }
- return NF_ACCEPT;
- }
-
- /* Don't try to NAT if this packet is not conntracked */
- if (ct == &ip_conntrack_untracked)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!ip_nat_icmp_reply_translation(ct, ctinfo,
- hooknum, pskb))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
- info = &ct->nat.info;
-
- /* Seen it before? This can happen for loopback, retrans,
- or local packets.. */
- if (!ip_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- if (unlikely(is_confirmed(ct)))
- /* NAT module was loaded late */
- ret = alloc_null_binding_confirmed(ct, info,
- hooknum);
- else if (hooknum == NF_IP_LOCAL_IN)
- /* LOCAL_IN hook doesn't have a chain! */
- ret = alloc_null_binding(ct, info, hooknum);
- else
- ret = ip_nat_rule_find(pskb, hooknum,
- in, out, ct,
- info);
-
- if (ret != NF_ACCEPT) {
- return ret;
- }
- } else
- DEBUGP("Already setup manip %s for ct %p\n",
- maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- break;
-
- default:
- /* ESTABLISHED */
- IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
- || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
- info = &ct->nat.info;
- }
-
- IP_NF_ASSERT(info);
- return ip_nat_packet(ct, ctinfo, hooknum, pskb);
-}
-
-static unsigned int
-ip_nat_in(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- unsigned int ret;
- __be32 daddr = (*pskb)->nh.iph->daddr;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && daddr != (*pskb)->nh.iph->daddr) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_out(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#ifdef CONFIG_XFRM
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-#endif
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip
- || ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all
- )
- return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
- }
-#endif
- return ret;
-}
-
-static unsigned int
-ip_nat_local_fn(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.dst.ip !=
- ct->tuplehash[!dir].tuple.src.ip) {
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
- ret = NF_DROP;
- }
-#ifdef CONFIG_XFRM
- else if (ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (ip_xfrm_me_harder(pskb))
- ret = NF_DROP;
-#endif
-
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_adjust(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
- DEBUGP("ip_nat_standalone: adjusting sequence number\n");
- if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops ip_nat_ops[] = {
- /* Before packet filtering, change destination */
- {
- .hook = ip_nat_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = ip_nat_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC,
- },
- /* After conntrack, adjust sequence number */
- {
- .hook = ip_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
- },
- /* Before packet filtering, change destination */
- {
- .hook = ip_nat_local_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = ip_nat_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SRC,
- },
- /* After conntrack, adjust sequence number */
- {
- .hook = ip_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
- },
-};
-
-static int __init ip_nat_standalone_init(void)
-{
- int ret = 0;
-
- need_conntrack();
-
-#ifdef CONFIG_XFRM
- BUG_ON(ip_nat_decode_session != NULL);
- ip_nat_decode_session = nat_decode_session;
-#endif
- ret = ip_nat_rule_init();
- if (ret < 0) {
- printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_decode_session;
- }
- ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
- if (ret < 0) {
- printk("ip_nat_init: can't register hooks.\n");
- goto cleanup_rule_init;
- }
- return ret;
-
- cleanup_rule_init:
- ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
- ip_nat_decode_session = NULL;
- synchronize_net();
-#endif
- return ret;
-}
-
-static void __exit ip_nat_standalone_fini(void)
-{
- nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
- ip_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
- ip_nat_decode_session = NULL;
- synchronize_net();
-#endif
-}
-
-module_init(ip_nat_standalone_init);
-module_exit(ip_nat_standalone_fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
deleted file mode 100644
index 604793536fc..00000000000
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * - Port to newnat API
- *
- * This module currently supports DNAT:
- * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
- *
- * and SNAT:
- * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
- *
- * It has not been tested with
- * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
- * If you do test this please let me know if it works or not.
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *ct = exp->master;
-
- exp->saved_proto.udp.port
- = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
- exp->dir = IP_CT_DIR_REPLY;
- exp->expectfn = ip_nat_follow_master;
- if (ip_conntrack_expect_related(exp) != 0)
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-static void __exit ip_nat_tftp_fini(void)
-{
- rcu_assign_pointer(ip_nat_tftp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init ip_nat_tftp_init(void)
-{
- BUG_ON(rcu_dereference(ip_nat_tftp_hook));
- rcu_assign_pointer(ip_nat_tftp_hook, help);
- return 0;
-}
-
-module_init(ip_nat_tftp_init);
-module_exit(ip_nat_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a14798a850d..702d94db19b 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -8,18 +8,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
- * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
- * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
- * Zander).
- * 2000-08-01: Added Nick Williams' MAC support.
- * 2002-06-25: Code cleanup.
- * 2005-01-10: Added /proc counter for dropped packets; fixed so
- * packets aren't delivered to user space if they're going
- * to be dropped.
- * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
- *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -191,12 +179,13 @@ ipq_flush(int verdict)
static struct sk_buff *
ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
{
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
+ struct timeval tv;
read_lock_bh(&queue_lock);
@@ -234,15 +223,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
if (!skb)
goto nlmsg_failure;
- old_tail= skb->tail;
+ old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
+ tv = ktime_to_timeval(entry->skb->tstamp);
+ pmsg->timestamp_sec = tv.tv_sec;
+ pmsg->timestamp_usec = tv.tv_usec;
pmsg->mark = entry->skb->mark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
@@ -378,7 +368,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
}
if (!skb_make_writable(&e->skb, v->data_len))
return -ENOMEM;
- memcpy(e->skb->data, v->payload, v->data_len);
+ skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
return 0;
@@ -495,7 +485,7 @@ ipq_rcv_skb(struct sk_buff *skb)
if (skblen < sizeof(*nlh))
return;
- nlh = (struct nlmsghdr *)skb->data;
+ nlh = nlmsg_hdr(skb);
nlmsglen = nlh->nlmsg_len;
if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
return;
@@ -678,7 +668,7 @@ static int __init ip_queue_init(void)
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 50cc4b92e28..e3f83bf160d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -7,12 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * - increase module usage count as soon as we have rules inside
- * a table
- * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
- * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
*/
#include <linux/cache.h>
#include <linux/capability.h>
@@ -198,7 +192,7 @@ int do_match(struct ipt_entry_match *m,
{
/* Stop iteration if it doesn't match */
if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
- offset, skb->nh.iph->ihl*4, hotdrop))
+ offset, ip_hdrlen(skb), hotdrop))
return 1;
else
return 0;
@@ -231,7 +225,7 @@ ipt_do_table(struct sk_buff **pskb,
struct xt_table_info *private;
/* Initialization */
- ip = (*pskb)->nh.iph;
+ ip = ip_hdr(*pskb);
datalen = (*pskb)->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
@@ -320,7 +314,7 @@ ipt_do_table(struct sk_buff **pskb,
= 0x57acc001;
#endif
/* Target might have changed stuff. */
- ip = (*pskb)->nh.iph;
+ ip = ip_hdr(*pskb);
datalen = (*pskb)->len - ip->ihl * 4;
if (verdict == IPT_CONTINUE)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 42b08029e86..40e27342139 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -21,15 +21,12 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-
-#include <net/checksum.h>
-
#include <linux/netfilter_arp.h>
-
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/checksum.h>
#define CLUSTERIP_VERSION "0.8"
@@ -240,7 +237,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
static inline u_int32_t
clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
unsigned long hashval;
u_int16_t sport, dport;
u_int16_t *ports;
@@ -310,15 +307,16 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- u_int32_t *mark, hash;
+ u_int32_t hash;
/* don't need to clusterip_config_get() here, since refcount
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
- mark = nf_ct_get_mark((*pskb), &ctinfo);
- if (mark == NULL) {
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct == NULL) {
printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
* to outgoing connections of other nodes will be
@@ -328,7 +326,7 @@ target(struct sk_buff **pskb,
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+ if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
&& (ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
return XT_CONTINUE;
@@ -341,7 +339,7 @@ target(struct sk_buff **pskb,
switch (ctinfo) {
case IP_CT_NEW:
- *mark = hash;
+ ct->mark = hash;
break;
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -358,7 +356,7 @@ target(struct sk_buff **pskb,
#ifdef DEBUG_CLUSTERP
DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
+ DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
if (!clusterip_responsible(cipinfo->config, hash)) {
DEBUGP("not responsible\n");
return NF_DROP;
@@ -521,7 +519,7 @@ arp_mangle(unsigned int hook,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct arphdr *arp = (*pskb)->nh.arph;
+ struct arphdr *arp = arp_hdr(*pskb);
struct arp_payload *payload;
struct clusterip_config *c;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4f565633631..918ca92e534 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -5,14 +5,13 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
*/
#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/tcp.h>
#include <net/checksum.h>
@@ -29,13 +28,13 @@ MODULE_DESCRIPTION("iptables ECN modification module");
static inline int
set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos;
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return 0;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
oldtos = iph->tos;
iph->tos &= ~IPT_ECN_IP_MASK;
iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -52,7 +51,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
__be16 oldval;
/* Not enought header? */
- tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+ tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
sizeof(_tcph), &_tcph);
if (!tcph)
return 0;
@@ -63,9 +62,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
tcph->cwr == einfo->proto.tcp.cwr)))
return 1;
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
return 0;
- tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+ tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
oldval = ((__be16 *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -93,7 +92,7 @@ target(struct sk_buff **pskb,
return NF_DROP;
if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
- && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
+ && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
if (!set_ect_tcp(pskb, einfo))
return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index d9c37fd9422..a42c5cd968b 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf,
/* MAC logging for input chain only. */
printk("MAC=");
if (skb->dev && skb->dev->hard_header_len
- && skb->mac.raw != (void*)skb->nh.iph) {
+ && skb->mac_header != skb->network_header) {
int i;
- unsigned char *p = skb->mac.raw;
+ const unsigned char *p = skb_mac_header(skb);
for (i = 0; i < skb->dev->hard_header_len; i++,p++)
printk("%02x%c", *p,
i==skb->dev->hard_header_len - 1
@@ -477,14 +477,10 @@ static int __init ipt_log_init(void)
ret = xt_register_target(&ipt_log_reg);
if (ret < 0)
return ret;
- if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
- printk(KERN_WARNING "ipt_LOG: not logging via system console "
- "since somebody else already registered for PF_INET\n");
- /* we cannot make module load fail here, since otherwise
- * iptables userspace would abort */
- }
-
- return 0;
+ ret = nf_log_register(PF_INET, &ipt_log_logger);
+ if (ret < 0 && ret != -EEXIST)
+ xt_unregister_target(&ipt_log_reg);
+ return ret;
}
static void __exit ipt_log_fini(void)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index b5955f3a3f8..d4f2d777533 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,12 +19,8 @@
#include <net/ip.h>
#include <net/checksum.h>
#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
+#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
MODULE_LICENSE("GPL");
@@ -48,7 +44,7 @@ masquerade_check(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
DEBUGP("masquerade_check: bad MAP_IPS.\n");
@@ -69,33 +65,26 @@ masquerade_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
-#ifdef CONFIG_NF_NAT_NEEDED
+ struct nf_conn *ct;
struct nf_conn_nat *nat;
-#endif
- struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- struct ip_nat_range newrange;
- const struct ip_nat_multi_range_compat *mr;
+ struct nf_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr;
struct rtable *rt;
__be32 newsrc;
- IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+ NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
- ct = ip_conntrack_get(*pskb, &ctinfo);
-#ifdef CONFIG_NF_NAT_NEEDED
+ ct = nf_ct_get(*pskb, &ctinfo);
nat = nfct_nat(ct);
-#endif
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
*/
-#ifdef CONFIG_NF_NAT_NEEDED
if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
-#else
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
-#endif
return NF_ACCEPT;
mr = targinfo;
@@ -107,40 +96,30 @@ masquerade_target(struct sk_buff **pskb,
}
write_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
nat->masq_index = out->ifindex;
-#else
- ct->nat.masq_index = out->ifindex;
-#endif
write_unlock_bh(&masq_lock);
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
newsrc, newsrc,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static inline int
-device_cmp(struct ip_conntrack *i, void *ifindex)
+device_cmp(struct nf_conn *i, void *ifindex)
{
- int ret;
-#ifdef CONFIG_NF_NAT_NEEDED
struct nf_conn_nat *nat = nfct_nat(i);
+ int ret;
if (!nat)
return 0;
-#endif
read_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
ret = (nat->masq_index == (int)(long)ifindex);
-#else
- ret = (i->nat.masq_index == (int)(long)ifindex);
-#endif
read_unlock_bh(&masq_lock);
return ret;
@@ -156,9 +135,9 @@ static int masq_device_event(struct notifier_block *this,
/* Device was downed. Search entire table for
conntracks which were associated with that device,
and forget them. */
- IP_NF_ASSERT(dev->ifindex != 0);
+ NF_CT_ASSERT(dev->ifindex != 0);
- ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
}
return NOTIFY_DONE;
@@ -174,9 +153,9 @@ static int masq_inet_event(struct notifier_block *this,
/* IP address was deleted. Search entire table for
conntracks which were associated with that device,
and forget them. */
- IP_NF_ASSERT(dev->ifindex != 0);
+ NF_CT_ASSERT(dev->ifindex != 0);
- ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
}
return NOTIFY_DONE;
@@ -194,7 +173,7 @@ static struct xt_target masquerade = {
.name = "MASQUERADE",
.family = AF_INET,
.target = masquerade_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
.table = "nat",
.hooks = 1 << NF_IP_POST_ROUTING,
.checkentry = masquerade_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index fd7aaa347cd..068c69bce30 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -16,11 +16,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
#define MODULENAME "NETMAP"
MODULE_LICENSE("GPL");
@@ -40,7 +36,7 @@ check(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
@@ -61,39 +57,39 @@ target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 new_ip, netmask;
- const struct ip_nat_multi_range_compat *mr = targinfo;
- struct ip_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
+ struct nf_nat_range newrange;
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = ip_conntrack_get(*pskb, &ctinfo);
+ ct = nf_ct_get(*pskb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
- new_ip = (*pskb)->nh.iph->daddr & ~netmask;
+ new_ip = ip_hdr(*pskb)->daddr & ~netmask;
else
- new_ip = (*pskb)->nh.iph->saddr & ~netmask;
+ new_ip = ip_hdr(*pskb)->saddr & ~netmask;
new_ip |= mr->range[0].min_ip & netmask;
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
new_ip, new_ip,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static struct xt_target target_module = {
.name = MODULENAME,
.family = AF_INET,
.target = target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
(1 << NF_IP_LOCAL_OUT),
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c2b6b80670f..68cc76a198e 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -19,11 +19,7 @@
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -43,7 +39,7 @@ redirect_check(const char *tablename,
void *targinfo,
unsigned int hook_mask)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
DEBUGP("redirect_check: bad MAP_IPS.\n");
@@ -64,17 +60,17 @@ redirect_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 newdst;
- const struct ip_nat_multi_range_compat *mr = targinfo;
- struct ip_nat_range newrange;
+ const struct nf_nat_multi_range_compat *mr = targinfo;
+ struct nf_nat_range newrange;
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = ip_conntrack_get(*pskb, &ctinfo);
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ ct = nf_ct_get(*pskb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
/* Local packets: make them go to loopback */
if (hooknum == NF_IP_LOCAL_OUT)
@@ -96,20 +92,20 @@ redirect_target(struct sk_buff **pskb,
}
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
newdst, newdst,
mr->range[0].min, mr->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static struct xt_target redirect_reg = {
.name = "REDIRECT",
.family = AF_INET,
.target = redirect_target,
- .targetsize = sizeof(struct ip_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_multi_range_compat),
.table = "nat",
.hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
.checkentry = redirect_check,
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 80f739e2182..9041e0741f6 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -1,7 +1,5 @@
/*
* This is a module which is used for rejecting packets.
- * Added support for customized reject packets (Jozsef Kadlecsik).
- * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
*/
/* (C) 1999-2001 Paul `Rusty' Russell
@@ -43,7 +41,7 @@ MODULE_DESCRIPTION("iptables REJECT target module");
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
- struct iphdr *iph = oldskb->nh.iph;
+ struct iphdr *niph;
struct tcphdr _otcph, *oth, *tcph;
__be16 tmp_port;
__be32 tmp_addr;
@@ -51,10 +49,10 @@ static void send_reset(struct sk_buff *oldskb, int hook)
unsigned int addr_type;
/* IP header checks: fragment. */
- if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return;
- oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+ oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(_otcph), &_otcph);
if (oth == NULL)
return;
@@ -64,7 +62,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
return;
/* Check checksum */
- if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
+ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return;
/* We need a linear, writeable skb. We also need to expand
@@ -84,20 +82,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
skb_shinfo(nskb)->gso_segs = 0;
skb_shinfo(nskb)->gso_type = 0;
- tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
+ tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
/* Swap source and dest */
- tmp_addr = nskb->nh.iph->saddr;
- nskb->nh.iph->saddr = nskb->nh.iph->daddr;
- nskb->nh.iph->daddr = tmp_addr;
+ niph = ip_hdr(nskb);
+ tmp_addr = niph->saddr;
+ niph->saddr = niph->daddr;
+ niph->daddr = tmp_addr;
tmp_port = tcph->source;
tcph->source = tcph->dest;
tcph->dest = tmp_port;
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
- skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
- nskb->nh.iph->tot_len = htons(nskb->len);
+ skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
+ niph->tot_len = htons(nskb->len);
if (tcph->ack) {
needs_ack = 0;
@@ -105,9 +104,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->ack_seq = 0;
} else {
needs_ack = 1;
- tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
- + oldskb->len - oldskb->nh.iph->ihl*4
- - (oth->doff<<2));
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+ oldskb->len - ip_hdrlen(oldskb) -
+ (oth->doff << 2));
tcph->seq = 0;
}
@@ -122,14 +121,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
/* Adjust TCP checksum */
tcph->check = 0;
tcph->check = tcp_v4_check(sizeof(struct tcphdr),
- nskb->nh.iph->saddr,
- nskb->nh.iph->daddr,
+ niph->saddr, niph->daddr,
csum_partial((char *)tcph,
sizeof(struct tcphdr), 0));
/* Set DF, id = 0 */
- nskb->nh.iph->frag_off = htons(IP_DF);
- nskb->nh.iph->id = 0;
+ niph->frag_off = htons(IP_DF);
+ niph->id = 0;
addr_type = RTN_UNSPEC;
if (hook != NF_IP_FORWARD
@@ -145,12 +143,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
nskb->ip_summed = CHECKSUM_NONE;
/* Adjust IP TTL */
- nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
+ niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
/* Adjust IP checksum */
- nskb->nh.iph->check = 0;
- nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
- nskb->nh.iph->ihl);
+ niph->check = 0;
+ niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
/* "Never happens" */
if (nskb->len > dst_mtu(nskb->dst))
@@ -182,7 +179,7 @@ static unsigned int reject(struct sk_buff **pskb,
/* Our naive response construction doesn't deal with IP
options, and probably shouldn't try. */
- if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+ if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
return NF_DROP;
/* WARNING: This code causes reentry within iptables.
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index bd4404e5c68..511e5ff8493 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -7,21 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
- * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
- * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
- * * added --nodst to not include destination-ip in new source
- * calculations.
- * * added some more sanity-checks.
- * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
- * * fixed a buggy if-statement in same_check(), should have
- * used ntohl() but didn't.
- * * added support for multiple ranges. IPT_SAME_MAX_RANGE is
- * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
- * and is currently set to 10.
- * * added support for 1-address range, nice to have now that
- * we have multiple ranges.
*/
#include <linux/types.h>
#include <linux/ip.h>
@@ -35,11 +20,7 @@
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
#include <linux/netfilter_ipv4/ipt_SAME.h>
MODULE_LICENSE("GPL");
@@ -138,17 +119,17 @@ same_target(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
u_int32_t tmpip, aindex;
__be32 new_ip;
const struct ipt_same_info *same = targinfo;
- struct ip_nat_range newrange;
- const struct ip_conntrack_tuple *t;
+ struct nf_nat_range newrange;
+ const struct nf_conntrack_tuple *t;
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+ NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
hooknum == NF_IP_POST_ROUTING);
- ct = ip_conntrack_get(*pskb, &ctinfo);
+ ct = nf_ct_get(*pskb, &ctinfo);
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
@@ -157,17 +138,10 @@ same_target(struct sk_buff **pskb,
Here we calculate the index in same->iparray which
holds the ipaddress we should use */
-#ifdef CONFIG_NF_NAT_NEEDED
tmpip = ntohl(t->src.u3.ip);
if (!(same->info & IPT_SAME_NODST))
tmpip += ntohl(t->dst.u3.ip);
-#else
- tmpip = ntohl(t->src.ip);
-
- if (!(same->info & IPT_SAME_NODST))
- tmpip += ntohl(t->dst.ip);
-#endif
aindex = tmpip % same->ipnum;
new_ip = htonl(same->iparray[aindex]);
@@ -178,13 +152,13 @@ same_target(struct sk_buff **pskb,
NIPQUAD(new_ip));
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
+ newrange = ((struct nf_nat_range)
{ same->range[0].flags, new_ip, new_ip,
/* FIXME: Use ports from correct range! */
same->range[0].min, same->range[0].max });
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, hooknum);
}
static struct xt_target same_reg = {
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index cedf9f7d9d6..0ad02f24983 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct ipt_tos_target_info *tosinfo = targinfo;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
__u8 oldtos;
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
oldtos = iph->tos;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 64be31c22ba..a991ec7bd4e 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -32,7 +32,7 @@ ipt_ttl_target(struct sk_buff **pskb,
if (!skb_make_writable(pskb, (*pskb)->len))
return NF_DROP;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
switch (info->mode) {
case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 9acc018766f..23b607b33b3 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -2,20 +2,6 @@
* netfilter module for userspace packet logging daemons
*
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- *
- * 2000/09/22 ulog-cprange feature added
- * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
- * <zander@fokus.gmd.de>
- * 2001/01/30 per-rule nlgroup conflicts with global queue.
- * nlgroup now global (sysctl)
- * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
- * module loadtime -HW
- * 2002/07/07 remove broken nflog_rcv() function -HW
- * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
- * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
- * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
- * resulting in bogus 'error during NLMSG_PUT' messages.
- *
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
@@ -42,8 +28,6 @@
* flushtimeout:
* Specify, after how many hundredths of a second the queue should be
* flushed even if it is not full yet.
- *
- * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
*/
#include <linux/module.h>
@@ -187,6 +171,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
+ struct timeval tv;
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -232,13 +217,14 @@ static void ipt_ulog_packet(unsigned int hooknum,
pm = NLMSG_DATA(nlh);
/* We might not have a timestamp, get one */
- if (skb->tstamp.off_sec == 0)
+ if (skb->tstamp.tv64 == 0)
__net_timestamp((struct sk_buff *)skb);
/* copy hook, prefix, timestamp, payload, etc. */
pm->data_len = copy_len;
- put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec);
- put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec);
+ tv = ktime_to_timeval(skb->tstamp);
+ put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+ put_unaligned(tv.tv_usec, &pm->timestamp_usec);
put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
if (prefix != NULL)
@@ -249,9 +235,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
*(pm->prefix) = '\0';
if (in && in->hard_header_len > 0
- && skb->mac.raw != (void *) skb->nh.iph
+ && skb->mac_header != skb->network_header
&& in->hard_header_len <= ULOG_MAC_LEN) {
- memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+ memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
pm->mac_len = in->hard_header_len;
} else
pm->mac_len = 0;
@@ -363,12 +349,52 @@ static int ipt_ulog_checkentry(const char *tablename,
return 1;
}
+#ifdef CONFIG_COMPAT
+struct compat_ipt_ulog_info {
+ compat_uint_t nl_group;
+ compat_size_t copy_range;
+ compat_size_t qthreshold;
+ char prefix[ULOG_PREFIX_LEN];
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+ struct compat_ipt_ulog_info *cl = src;
+ struct ipt_ulog_info l = {
+ .nl_group = cl->nl_group,
+ .copy_range = cl->copy_range,
+ .qthreshold = cl->qthreshold,
+ };
+
+ memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
+ memcpy(dst, &l, sizeof(l));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+ struct ipt_ulog_info *l = src;
+ struct compat_ipt_ulog_info cl = {
+ .nl_group = l->nl_group,
+ .copy_range = l->copy_range,
+ .qthreshold = l->qthreshold,
+ };
+
+ memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
+ return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
static struct xt_target ipt_ulog_reg = {
.name = "ULOG",
.family = AF_INET,
.target = ipt_ulog_target,
.targetsize = sizeof(struct ipt_ulog_info),
.checkentry = ipt_ulog_checkentry,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_ulog_info),
+ .compat_from_user = compat_from_user,
+ .compat_to_user = compat_to_user,
+#endif
.me = THIS_MODULE,
};
@@ -390,14 +416,11 @@ static int __init ipt_ulog_init(void)
}
/* initialize ulog_buffers */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- init_timer(&ulog_buffers[i].timer);
- ulog_buffers[i].timer.function = ulog_timer;
- ulog_buffers[i].timer.data = i;
- }
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+ setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
- THIS_MODULE);
+ NULL, THIS_MODULE);
if (!nflognl)
return -ENOMEM;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index cfa0472617f..a652a145155 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -33,7 +33,7 @@ static int match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_addrtype_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
int ret = 1;
if (info->source)
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 37508b2cfea..26218122f86 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the value of the IPv4 and TCP ECN bits
*
- * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
- *
* (C) 2002 by Harald Welte <laforge@gnumonks.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -11,6 +9,7 @@
#include <linux/in.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/tcp.h>
@@ -26,7 +25,7 @@ MODULE_LICENSE("GPL");
static inline int match_ip(const struct sk_buff *skb,
const struct ipt_ecn_info *einfo)
{
- return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
+ return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
}
static inline int match_tcp(const struct sk_buff *skb,
@@ -38,8 +37,7 @@ static inline int match_tcp(const struct sk_buff *skb,
/* In practice, TCP match does this, so can't fail. But let's
* be good citizens.
*/
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL) {
*hotdrop = 0;
return 0;
@@ -80,7 +78,7 @@ static int match(const struct sk_buff *skb,
return 0;
if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
- if (skb->nh.iph->protocol != IPPROTO_TCP)
+ if (ip_hdr(skb)->protocol != IPPROTO_TCP)
return 0;
if (!match_tcp(skb, info, hotdrop))
return 0;
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index bc5d5e6091e..33af9e94088 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_iprange_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
if (info->flags & IPRANGE_SRC) {
if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index aecb9c48e15..15a9e8bbb7c 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -183,11 +183,11 @@ ipt_recent_match(const struct sk_buff *skb,
int ret = info->invert;
if (info->side == IPT_RECENT_DEST)
- addr = skb->nh.iph->daddr;
+ addr = ip_hdr(skb)->daddr;
else
- addr = skb->nh.iph->saddr;
+ addr = ip_hdr(skb)->saddr;
- ttl = skb->nh.iph->ttl;
+ ttl = ip_hdr(skb)->ttl;
/* use TTL as seen before forwarding */
if (out && !skb->sk)
ttl++;
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 5d33b51d49d..d314844af12 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -30,7 +30,7 @@ match(const struct sk_buff *skb,
{
const struct ipt_tos_info *info = matchinfo;
- return (skb->nh.iph->tos == info->tos) ^ info->invert;
+ return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
}
static struct xt_match tos_match = {
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 1eca9f40037..ab02d9e3139 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the value of the TTL
*
- * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
- *
* (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -26,19 +24,20 @@ static int match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ipt_ttl_info *info = matchinfo;
+ const u8 ttl = ip_hdr(skb)->ttl;
switch (info->mode) {
case IPT_TTL_EQ:
- return (skb->nh.iph->ttl == info->ttl);
+ return (ttl == info->ttl);
break;
case IPT_TTL_NE:
- return (!(skb->nh.iph->ttl == info->ttl));
+ return (!(ttl == info->ttl));
break;
case IPT_TTL_LT:
- return (skb->nh.iph->ttl < info->ttl);
+ return (ttl < info->ttl);
break;
case IPT_TTL_GT:
- return (skb->nh.iph->ttl > info->ttl);
+ return (ttl > info->ttl);
break;
default:
printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index d1d61e97b97..42728909eba 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -102,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 98b66ef0c71..9278802f274 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -7,8 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
*/
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -17,6 +15,7 @@
#include <net/sock.h>
#include <net/route.h>
#include <linux/ip.h>
+#include <net/ip.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -130,13 +129,14 @@ ipt_local_hook(unsigned int hook,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
+ const struct iphdr *iph;
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
@@ -144,19 +144,23 @@ ipt_local_hook(unsigned int hook,
/* Save things which could affect route */
mark = (*pskb)->mark;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
- tos = (*pskb)->nh.iph->tos;
+ iph = ip_hdr(*pskb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
/* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr
- || (*pskb)->mark != mark
- || (*pskb)->nh.iph->tos != tos))
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
- ret = NF_DROP;
+ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+ iph = ip_hdr(*pskb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ (*pskb)->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
return ret;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8f3e92d20df..0654eaae70c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -4,14 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - move L3 protocol dependent part to this file.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add get_features() to support various size of conntrack
- * structures.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
#include <linux/types.h>
@@ -87,7 +79,7 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
local_bh_enable();
if (skb)
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
return skb;
}
@@ -97,16 +89,16 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
u_int8_t *protonum)
{
/* Never happen */
- if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+ if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
if (net_ratelimit()) {
printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
- (*pskb)->nh.iph->protocol, hooknum);
+ ip_hdr(*pskb)->protocol, hooknum);
}
return -NF_DROP;
}
- *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
- *protonum = (*pskb)->nh.iph->protocol;
+ *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
+ *protonum = ip_hdr(*pskb)->protocol;
return NF_ACCEPT;
}
@@ -152,9 +144,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
return NF_ACCEPT;
return help->helper->help(pskb,
- (*pskb)->nh.raw - (*pskb)->data
- + (*pskb)->nh.iph->ihl*4,
- ct, ctinfo);
+ skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+ ct, ctinfo);
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -171,7 +162,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
#endif
/* Gather fragments. */
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
*pskb = nf_ct_ipv4_gather_frags(*pskb,
hooknum == NF_IP_PRE_ROUTING ?
IP_DEFRAG_CONNTRACK_IN :
@@ -199,7 +190,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
{
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5fd1e5363c1..f4fc657c198 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -4,11 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
*/
#include <linux/types.h>
@@ -158,7 +153,7 @@ icmp_error_message(struct sk_buff *skb,
NF_CT_ASSERT(skb->nfct == NULL);
/* Not enough header? */
- inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+ inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
if (inside == NULL)
return -NF_ACCEPT;
@@ -172,7 +167,7 @@ icmp_error_message(struct sk_buff *skb,
/* rcu_read_lock()ed by nf_hook_slow */
innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+ dataoff = ip_hdrlen(skb) + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
inside->ip.protocol, &origtuple,
@@ -227,7 +222,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
struct icmphdr _ih, *icmph;
/* Not enough header? */
- icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 452e9d32668..ea02f00d2da 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -431,7 +431,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
} *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
- int hdrlen = (*pskb)->nh.iph->ihl * 4;
+ int hdrlen = ip_hdrlen(*pskb);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
@@ -439,7 +439,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
@@ -469,9 +469,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
if (!nf_ct_get_tuple(*pskb,
- (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
- (*pskb)->nh.iph->ihl*4 +
- sizeof(struct icmphdr) + inside->ip.ihl*4,
+ ip_hdrlen(*pskb) + sizeof(struct icmphdr),
+ (ip_hdrlen(*pskb) +
+ sizeof(struct icmphdr) + inside->ip.ihl * 4),
(u_int16_t)AF_INET,
inside->ip.protocol,
&inner, l3proto, l4proto))
@@ -483,14 +483,14 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
packet: PREROUTING (DST manip), routing produces ICMP, goes
through POSTROUTING (which must correct the DST manip). */
if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
+ ip_hdrlen(*pskb) + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple,
!manip))
return 0;
if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
/* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
inside->icmp.checksum = 0;
inside->icmp.checksum =
csum_fold(skb_checksum(*pskb, hdrlen,
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9cbf3f9be13..fcebc968d37 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -33,7 +33,7 @@ static int set_addr(struct sk_buff **pskb,
unsigned int addroff, __be32 ip, __be16 port)
{
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
struct {
__be32 ip;
__be16 port;
@@ -44,7 +44,7 @@ static int set_addr(struct sk_buff **pskb,
buf.port = port;
addroff += dataoff;
- if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
+ if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
@@ -55,11 +55,11 @@ static int set_addr(struct sk_buff **pskb,
}
/* Relocate data pointer */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+ th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
sizeof(_tcph), &_tcph);
if (th == NULL)
return -1;
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+ *data = (*pskb)->data + ip_hdrlen(*pskb) +
th->doff * 4 + dataoff;
} else {
if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
@@ -73,8 +73,8 @@ static int set_addr(struct sk_buff **pskb,
/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
* or pull everything in a linear buffer, so we can safely
* use the skb pointers now */
- *data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
- sizeof(struct udphdr);
+ *data = ((*pskb)->data + ip_hdrlen(*pskb) +
+ sizeof(struct udphdr));
}
return 0;
@@ -383,7 +383,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
static void ip_nat_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
- struct ip_nat_range range;
+ struct nf_nat_range range;
if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
nf_nat_follow_master(new, this);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 49a90c39ffc..15b6e5ce3a0 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -87,12 +87,13 @@ static void mangle_contents(struct sk_buff *skb,
unsigned char *data;
BUG_ON(skb_is_nonlinear(skb));
- data = (unsigned char *)skb->nh.iph + dataoff;
+ data = skb_network_header(skb) + dataoff;
/* move post-replacement */
memmove(data + match_offset + rep_len,
data + match_offset + match_len,
- skb->tail - (data + match_offset + match_len));
+ skb->tail - (skb->network_header + dataoff +
+ match_offset + match_len));
/* insert data from buffer */
memcpy(data + match_offset, rep_buffer, rep_len);
@@ -111,8 +112,8 @@ static void mangle_contents(struct sk_buff *skb,
}
/* fix IP hdr checksum information */
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
}
/* Unusual, but possible case. */
@@ -152,6 +153,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
const char *rep_buffer,
unsigned int rep_len)
{
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct iphdr *iph;
struct tcphdr *tcph;
int oldlen, datalen;
@@ -166,7 +168,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
SKB_LINEAR_ASSERT(*pskb);
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
tcph = (void *)iph + iph->ihl*4;
oldlen = (*pskb)->len - iph->ihl*4;
@@ -175,11 +177,22 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
datalen = (*pskb)->len - iph->ihl*4;
if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- tcph->check = 0;
- tcph->check = tcp_v4_check(datalen,
- iph->saddr, iph->daddr,
- csum_partial((char *)tcph,
- datalen, 0));
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+ (*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ (*pskb)->csum_start = skb_headroom(*pskb) +
+ skb_network_offset(*pskb) +
+ iph->ihl * 4;
+ (*pskb)->csum_offset = offsetof(struct tcphdr, check);
+ tcph->check = ~tcp_v4_check(datalen,
+ iph->saddr, iph->daddr, 0);
+ } else {
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(datalen,
+ iph->saddr, iph->daddr,
+ csum_partial((char *)tcph,
+ datalen, 0));
+ }
} else
nf_proto_csum_replace2(&tcph->check, *pskb,
htons(oldlen), htons(datalen), 1);
@@ -190,7 +203,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
(int)rep_len - (int)match_len,
ct, ctinfo);
/* Tell TCP window tracking about seq change */
- nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4,
+ nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
ct, CTINFO2DIR(ctinfo));
}
return 1;
@@ -216,12 +229,13 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
const char *rep_buffer,
unsigned int rep_len)
{
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct iphdr *iph;
struct udphdr *udph;
int datalen, oldlen;
/* UDP helpers might accidentally mangle the wrong packet */
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
match_offset + match_len)
return 0;
@@ -234,7 +248,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
!enlarge_skb(pskb, rep_len - match_len))
return 0;
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
udph = (void *)iph + iph->ihl*4;
oldlen = (*pskb)->len - iph->ihl*4;
@@ -250,13 +264,25 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
return 1;
if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- udph->check = 0;
- udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- csum_partial((char *)udph,
- datalen, 0));
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+ (*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ (*pskb)->csum_start = skb_headroom(*pskb) +
+ skb_network_offset(*pskb) +
+ iph->ihl * 4;
+ (*pskb)->csum_offset = offsetof(struct udphdr, check);
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ 0);
+ } else {
+ udph->check = 0;
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, IPPROTO_UDP,
+ csum_partial((char *)udph,
+ datalen, 0));
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
} else
nf_proto_csum_replace2(&udph->check, *pskb,
htons(oldlen), htons(datalen), 1);
@@ -318,8 +344,8 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
unsigned int dir, optoff, optend;
struct nf_conn_nat *nat = nfct_nat(ct);
- optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
- optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+ optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
+ optend = ip_hdrlen(*pskb) + tcph->doff * 4;
if (!skb_make_writable(pskb, optend))
return 0;
@@ -371,10 +397,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
this_way = &nat->info.seq[dir];
other_way = &nat->info.seq[!dir];
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+ if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
return 0;
- tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
if (after(ntohl(tcph->seq), this_way->correction_pos))
newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
else
@@ -399,7 +425,7 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
return 0;
- nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir);
+ nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 7ba341c22ea..a66888749ce 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -53,7 +53,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_tuple t;
struct nf_ct_pptp_master *ct_pptp_info;
struct nf_nat_pptp *nat_pptp_info;
- struct ip_nat_range range;
+ struct nf_nat_range range;
ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index e5a34c17d92..c3908bc5a70 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -72,6 +72,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
__be16 *keyptr;
unsigned int min, i, range_size;
+ /* If there is no master conntrack we are not PPTP,
+ do not change tuples */
+ if (!conntrack->master)
+ return 0;
+
if (maniptype == IP_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
else
@@ -122,18 +127,9 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
if (maniptype != IP_NAT_MANIP_DST)
return 1;
switch (greh->version) {
- case 0:
- if (!greh->key) {
- DEBUGP("can't nat GRE w/o key\n");
- break;
- }
- if (greh->csum) {
- /* FIXME: Never tested this code... */
- nf_proto_csum_replace4(gre_csum(greh), *pskb,
- *(gre_key(greh)),
- tuple->dst.u.gre.key, 0);
- }
- *(gre_key(greh)) = tuple->dst.u.gre.key;
+ case GRE_VERSION_1701:
+ /* We do not currently NAT any GREv0 packets.
+ * Try to behave like "nf_nat_proto_unknown" */
break;
case GRE_VERSION_PPTP:
DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 147a4370cf0..2534f718ab9 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -191,7 +191,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
if (hooknum == NF_IP_LOCAL_OUT &&
mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+ warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
mr->range[0].min_ip);
return nf_nat_setup_info(ct, &mr->range[0], hooknum);
@@ -226,10 +226,6 @@ static int ipt_dnat_checkentry(const char *tablename,
printk("DNAT: multiple ranges no longer supported\n");
return 0;
}
- if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
- printk("DNAT: port randomization not supported\n");
- return 0;
- }
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b12cd7c314c..fac97cf51ae 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/udp.h>
#include <net/netfilter/nf_nat.h>
@@ -92,7 +93,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
matchoff, matchlen, addr, addrlen))
return 0;
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
return 1;
}
@@ -106,7 +107,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
struct addr_map map;
int dataoff, datalen;
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
datalen = (*pskb)->len - dataoff;
if (datalen < sizeof("SIP/2.0") - 1)
return NF_DROP;
@@ -155,7 +156,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
return 0;
/* We need to reload this. Thanks Patrick. */
- *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
return 1;
}
@@ -168,7 +169,7 @@ static int mangle_content_len(struct sk_buff **pskb,
char buffer[sizeof("65536")];
int bufflen;
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
/* Get actual SDP lenght */
if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
@@ -200,7 +201,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
char buffer[sizeof("nnn.nnn.nnn.nnn")];
unsigned int dataoff, bufflen;
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
/* Mangle owner and contact info. */
bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
@@ -221,6 +222,29 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
return mangle_content_len(pskb, ctinfo, ct, dptr);
}
+static void ip_nat_sdp_expect(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ /* hook doesn't matter, but it has to do source manip */
+ nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip = exp->saved_ip;
+ /* hook doesn't matter, but it has to do destination manip */
+ nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
+
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
static unsigned int ip_nat_sdp(struct sk_buff **pskb,
@@ -238,13 +262,14 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
/* Connection will come from reply */
newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_ip = exp->tuple.dst.u3.ip;
exp->tuple.dst.u3.ip = newip;
exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
exp->dir = !dir;
/* When you see the packet, we need to NAT it the same as the
this one. */
- exp->expectfn = nf_nat_follow_master;
+ exp->expectfn = ip_nat_sdp_expect;
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ce5c4939a6e..6e88505d616 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,10 +38,6 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -1194,7 +1190,7 @@ static int snmp_translate(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
struct sk_buff **pskb)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
u_int16_t udplen = ntohs(udph->len);
u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1235,7 +1231,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
{
int dir = CTINFO2DIR(ctinfo);
unsigned int ret;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 15aa3db8cb3..64bbed2ba78 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -86,8 +86,7 @@ nf_nat_fn(unsigned int hooknum,
/* We never see fragments: conntrack defrags on pre-routing
and local-out, and nf_nat_out protects post-routing. */
- NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off
- & htons(IP_MF|IP_OFFSET)));
+ NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
ct = nf_ct_get(*pskb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
@@ -98,11 +97,10 @@ nf_nat_fn(unsigned int hooknum,
/* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in
case we're doing NAT to the same network. */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
struct icmphdr _hdr, *hp;
- hp = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4,
+ hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
sizeof(_hdr), &_hdr);
if (hp != NULL &&
hp->type == ICMP_REDIRECT)
@@ -122,7 +120,7 @@ nf_nat_fn(unsigned int hooknum,
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(ct, ctinfo,
hooknum, pskb))
return NF_DROP;
@@ -177,11 +175,11 @@ nf_nat_in(unsigned int hooknum,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
- __be32 daddr = (*pskb)->nh.iph->daddr;
+ __be32 daddr = ip_hdr(*pskb)->daddr;
ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != (*pskb)->nh.iph->daddr) {
+ daddr != ip_hdr(*pskb)->daddr) {
dst_release((*pskb)->dst);
(*pskb)->dst = NULL;
}
@@ -203,7 +201,7 @@ nf_nat_out(unsigned int hooknum,
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr) ||
- (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ ip_hdrlen(*pskb) < sizeof(struct iphdr))
return NF_ACCEPT;
ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
@@ -236,7 +234,7 @@ nf_nat_local_fn(unsigned int hooknum,
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr) ||
- (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+ ip_hdrlen(*pskb) < sizeof(struct iphdr))
return NF_ACCEPT;
ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae68a691e8c..37ab5802ca0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -87,19 +87,6 @@ static const struct file_operations sockstat_seq_fops = {
.release = single_release,
};
-static unsigned long
-fold_field(void *mib[], int offt)
-{
- unsigned long res = 0;
- int i;
-
- for_each_possible_cpu(i) {
- res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
- res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
- }
- return res;
-}
-
/* snmp items */
static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
@@ -266,8 +253,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) ip_statistics,
- snmp4_ipstats_list[i].entry));
+ snmp_fold_field((void **)ip_statistics,
+ snmp4_ipstats_list[i].entry));
seq_puts(seq, "\nIcmp:");
for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
@@ -276,8 +263,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nIcmp:");
for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) icmp_statistics,
- snmp4_icmp_list[i].entry));
+ snmp_fold_field((void **)icmp_statistics,
+ snmp4_icmp_list[i].entry));
seq_puts(seq, "\nTcp:");
for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
@@ -288,12 +275,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld",
- fold_field((void **) tcp_statistics,
- snmp4_tcp_list[i].entry));
+ snmp_fold_field((void **)tcp_statistics,
+ snmp4_tcp_list[i].entry));
else
seq_printf(seq, " %lu",
- fold_field((void **) tcp_statistics,
- snmp4_tcp_list[i].entry));
+ snmp_fold_field((void **)tcp_statistics,
+ snmp4_tcp_list[i].entry));
}
seq_puts(seq, "\nUdp:");
@@ -303,8 +290,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) udp_statistics,
- snmp4_udp_list[i].entry));
+ snmp_fold_field((void **)udp_statistics,
+ snmp4_udp_list[i].entry));
/* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:");
@@ -314,8 +301,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) udplite_statistics,
- snmp4_udp_list[i].entry) );
+ snmp_fold_field((void **)udplite_statistics,
+ snmp4_udp_list[i].entry));
seq_putc(seq, '\n');
return 0;
@@ -348,8 +335,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nTcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- fold_field((void **) net_statistics,
- snmp4_net_list[i].entry));
+ snmp_fold_field((void **)net_statistics,
+ snmp4_net_list[i].entry));
seq_putc(seq, '\n');
return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index da70fef82c9..971ab9356e5 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -45,7 +45,7 @@
#include <net/ipip.h>
#include <linux/igmp.h>
-struct net_protocol *inet_protos[MAX_INET_PROTOS];
+struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
static DEFINE_SPINLOCK(inet_proto_lock);
/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 87e9c161810..24d7c9f3191 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -132,7 +132,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
return 1;
- type = skb->h.icmph->type;
+ type = icmp_hdr(skb)->type;
if (type < 32) {
__u32 data = raw_sk(sk)->filter.data;
@@ -184,8 +184,8 @@ out:
void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
{
struct inet_sock *inet = inet_sk(sk);
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int err = 0;
int harderr = 0;
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
}
nf_reset(skb);
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
raw_rcv_skb(sk, skb);
return 0;
@@ -291,11 +291,13 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst);
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ skb_put(skb, length);
skb->ip_summed = CHECKSUM_NONE;
- skb->h.raw = skb->nh.raw;
+ skb->transport_header = skb->network_header;
err = memcpy_fromiovecend((void *)iph, from, 0, length);
if (err)
goto error_fault;
@@ -613,7 +615,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* Copy the address. */
if (sin) {
sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
}
@@ -887,7 +889,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations raw_seq_ops = {
+static const struct seq_operations raw_seq_ops = {
.start = raw_seq_start,
.next = raw_seq_next,
.stop = raw_seq_stop,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 37e0d4d5cf9..cb76e3c725a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -82,7 +82,6 @@
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/pkt_sched.h>
@@ -104,6 +103,7 @@
#include <net/xfrm.h>
#include <net/ip_mp_alg.h>
#include <net/netevent.h>
+#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -364,7 +364,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations rt_cache_seq_ops = {
+static const struct seq_operations rt_cache_seq_ops = {
.start = rt_cache_seq_start,
.next = rt_cache_seq_next,
.stop = rt_cache_seq_stop,
@@ -470,7 +470,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations rt_cpu_seq_ops = {
+static const struct seq_operations rt_cpu_seq_ops = {
.start = rt_cpu_seq_start,
.next = rt_cpu_seq_next,
.stop = rt_cpu_seq_stop,
@@ -1519,7 +1519,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
static int ip_rt_bug(struct sk_buff *skb)
{
printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
- NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr),
+ NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
skb->dev ? skb->dev->name : "?");
kfree_skb(skb);
return 0;
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev,
printk(KERN_WARNING "martian source %u.%u.%u.%u from "
"%u.%u.%u.%u, on dev %s\n",
NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
- if (dev->hard_header_len && skb->mac.raw) {
+ if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
int i;
- unsigned char *p = skb->mac.raw;
+ const unsigned char *p = skb_mac_header(skb);
printk(KERN_WARNING "ll header: ");
for (i = 0; i < dev->hard_header_len; i++, p++) {
printk("%02x", *p);
@@ -2134,7 +2134,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rcu_read_lock();
if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
int our = ip_check_mc(in_dev, daddr, saddr,
- skb->nh.iph->protocol);
+ ip_hdr(skb)->protocol);
if (our
#ifdef CONFIG_IP_MROUTE
|| (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
dev_out = ip_dev_find(oldflp->fl4_src);
- if (dev_out == NULL)
+ if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
goto out;
/* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
of another iface. --ANK
*/
- if (oldflp->oif == 0
+ if (dev_out && oldflp->oif == 0
&& (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
/* Special hack: user can direct multicasts
and limited broadcast via necessary interface
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
id = rt->peer->ip_id_count;
if (rt->peer->tcp_ts_stamp) {
ts = rt->peer->tcp_ts;
- tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+ tsage = get_seconds() - rt->peer->tcp_ts_stamp;
}
}
@@ -2721,7 +2721,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
@@ -2747,10 +2747,11 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
/* Reserve room for dummy headers, this skb can pass
through good chunk of routing engine.
*/
- skb->mac.raw = skb->nh.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- skb->nh.iph->protocol = IPPROTO_ICMP;
+ ip_hdr(skb)->protocol = IPPROTO_ICMP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
@@ -3193,6 +3194,8 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
+ rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+
return rc;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 33016cc90f0..2da1be0589a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -125,10 +125,11 @@ static __u16 const msstab[] = {
__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
{
struct tcp_sock *tp = tcp_sk(sk);
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
-
tp->last_synq_overflow = jiffies;
/* XXX sort msstab[] by probability? Binary search? */
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
- return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
- skb->h.th->source, skb->h.th->dest,
- ntohl(skb->h.th->seq),
+ return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
+ th->source, th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
*/
static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
{
- __u32 seq;
- __u32 mssind;
-
- seq = ntohl(skb->h.th->seq)-1;
- mssind = check_tcp_syn_cookie(cookie,
- skb->nh.iph->saddr, skb->nh.iph->daddr,
- skb->h.th->source, skb->h.th->dest,
- seq, jiffies / (HZ * 60), COUNTER_TRIES);
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 seq = ntohl(th->seq) - 1;
+ __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+ th->source, th->dest, seq,
+ jiffies / (HZ * 60),
+ COUNTER_TRIES);
return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
}
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct tcp_sock *tp = tcp_sk(sk);
- __u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
+ const struct tcphdr *th = tcp_hdr(skb);
+ __u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
int mss;
struct rtable *rt;
__u8 rcv_wscale;
- if (!sysctl_tcp_syncookies || !skb->h.th->ack)
+ if (!sysctl_tcp_syncookies || !th->ack)
goto out;
if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
@@ -220,12 +220,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
}
ireq = inet_rsk(req);
treq = tcp_rsk(req);
- treq->rcv_isn = ntohl(skb->h.th->seq) - 1;
+ treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
req->mss = mss;
- ireq->rmt_port = skb->h.th->source;
- ireq->loc_addr = skb->nh.iph->daddr;
- ireq->rmt_addr = skb->nh.iph->saddr;
+ ireq->rmt_port = th->source;
+ ireq->loc_addr = ip_hdr(skb)->daddr;
+ ireq->rmt_addr = ip_hdr(skb)->saddr;
ireq->opt = NULL;
/* We throwed the options of the initial SYN away, so we hope
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
.tos = RT_CONN_FLAGS(sk) } },
.proto = IPPROTO_TCP,
.uli_u = { .ports =
- { .sport = skb->h.th->dest,
- .dport = skb->h.th->source } } };
+ { .sport = th->dest,
+ .dport = th->source } } };
security_req_classify_flow(req, &fl);
if (ip_route_output_key(&rt, &fl)) {
reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0aa304711a9..6817d6485df 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_dointvec
},
{
+ .ctl_name = NET_TCP_FRTO_RESPONSE,
+ .procname = "tcp_frto_response",
+ .data = &sysctl_tcp_frto_response,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
.ctl_name = NET_TCP_LOW_LATENCY,
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
@@ -803,6 +811,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_allowed_congestion_control,
.strategy = &strategy_allowed_congestion_control,
},
+ {
+ .ctl_name = NET_TCP_MAX_SSTHRESH,
+ .procname = "tcp_max_ssthresh",
+ .data = &sysctl_tcp_max_ssthresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10b511..8b124eafbb9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -297,7 +297,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
* All the sk_stream_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
-int tcp_memory_pressure;
+int tcp_memory_pressure __read_mostly;
EXPORT_SYMBOL(tcp_memory_pressure);
@@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
/* Subtract 1, if FIN is in queue. */
if (answ && !skb_queue_empty(&sk->sk_receive_queue))
answ -=
- ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
+ tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
} else
answ = tp->urg_seq - tp->copied_seq;
release_sock(sk);
@@ -444,7 +444,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
break;
default:
return -ENOIOCTLCMD;
- };
+ }
return put_user(answ, (int __user *)arg);
}
@@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp)
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
-static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb)
+static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
skb->csum = 0;
@@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
tcb->flags = TCPCB_FLAG_ACK;
tcb->sacked = 0;
skb_header_release(skb);
- __skb_queue_tail(&sk->sk_write_queue, skb);
+ tcp_add_write_queue_tail(sk, skb);
sk_charge_skb(sk, skb);
- if (!sk->sk_send_head)
- sk->sk_send_head = skb;
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
}
@@ -488,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
}
}
-static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
- int mss_now, int nonagle)
+static inline void tcp_push(struct sock *sk, int flags, int mss_now,
+ int nonagle)
{
- if (sk->sk_send_head) {
- struct sk_buff *skb = sk->sk_write_queue.prev;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tcp_send_head(sk)) {
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
if (!(flags & MSG_MORE) || forced_push(tp))
tcp_mark_push(tp, skb);
tcp_mark_urg(tp, flags, skb);
- __tcp_push_pending_frames(sk, tp, mss_now,
+ __tcp_push_pending_frames(sk, mss_now,
(flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
}
}
@@ -526,13 +526,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
goto do_error;
while (psize > 0) {
- struct sk_buff *skb = sk->sk_write_queue.prev;
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
struct page *page = pages[poffset / PAGE_SIZE];
int copy, i, can_coalesce;
int offset = poffset % PAGE_SIZE;
int size = min_t(size_t, psize, PAGE_SIZE - offset);
- if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+ if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -542,7 +542,7 @@ new_segment:
if (!skb)
goto wait_for_memory;
- skb_entail(sk, tp, skb);
+ skb_entail(sk, skb);
copy = size_goal;
}
@@ -588,8 +588,8 @@ new_segment:
if (forced_push(tp)) {
tcp_mark_push(tp, skb);
- __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
- } else if (skb == sk->sk_send_head)
+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+ } else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue;
@@ -597,7 +597,7 @@ wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
if (copied)
- tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -608,7 +608,7 @@ wait_for_memory:
out:
if (copied)
- tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+ tcp_push(sk, flags, mss_now, tp->nonagle);
return copied;
do_error:
@@ -639,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-static inline int select_size(struct sock *sk, struct tcp_sock *tp)
+static inline int select_size(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sk->sk_route_caps & NETIF_F_SG) {
@@ -704,9 +705,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
while (seglen > 0) {
int copy;
- skb = sk->sk_write_queue.prev;
+ skb = tcp_write_queue_tail(sk);
- if (!sk->sk_send_head ||
+ if (!tcp_send_head(sk) ||
(copy = size_goal - skb->len) <= 0) {
new_segment:
@@ -716,7 +717,7 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+ skb = sk_stream_alloc_pskb(sk, select_size(sk),
0, sk->sk_allocation);
if (!skb)
goto wait_for_memory;
@@ -727,7 +728,7 @@ new_segment:
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
skb->ip_summed = CHECKSUM_PARTIAL;
- skb_entail(sk, tp, skb);
+ skb_entail(sk, skb);
copy = size_goal;
}
@@ -832,8 +833,8 @@ new_segment:
if (forced_push(tp)) {
tcp_mark_push(tp, skb);
- __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
- } else if (skb == sk->sk_send_head)
+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+ } else if (skb == tcp_send_head(sk))
tcp_push_one(sk, mss_now);
continue;
@@ -841,7 +842,7 @@ wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
if (copied)
- tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -853,16 +854,18 @@ wait_for_memory:
out:
if (copied)
- tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+ tcp_push(sk, flags, mss_now, tp->nonagle);
TCP_CHECK_TIMER(sk);
release_sock(sk);
return copied;
do_fault:
if (!skb->len) {
- if (sk->sk_send_head == skb)
- sk->sk_send_head = NULL;
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
+ /* It is the one place in all of TCP, except connection
+ * reset, where we can be unlinking the send_head.
+ */
+ tcp_check_send_head(sk, skb);
sk_stream_free_skb(sk, skb);
}
@@ -1016,9 +1019,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
skb_queue_walk(&sk->sk_receive_queue, skb) {
offset = seq - TCP_SKB_CB(skb)->seq;
- if (skb->h.th->syn)
+ if (tcp_hdr(skb)->syn)
offset--;
- if (offset < skb->len || skb->h.th->fin) {
+ if (offset < skb->len || tcp_hdr(skb)->fin) {
*off = offset;
return skb;
}
@@ -1070,7 +1073,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (offset != skb->len)
break;
}
- if (skb->h.th->fin) {
+ if (tcp_hdr(skb)->fin) {
sk_eat_skb(sk, skb, 0);
++seq;
break;
@@ -1174,11 +1177,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
break;
}
offset = *seq - TCP_SKB_CB(skb)->seq;
- if (skb->h.th->syn)
+ if (tcp_hdr(skb)->syn)
offset--;
if (offset < skb->len)
goto found_ok_skb;
- if (skb->h.th->fin)
+ if (tcp_hdr(skb)->fin)
goto found_fin_ok;
BUG_TRAP(flags & MSG_PEEK);
skb = skb->next;
@@ -1389,12 +1392,12 @@ do_prequeue:
skip_copy:
if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
tp->urg_data = 0;
- tcp_fast_path_check(sk, tp);
+ tcp_fast_path_check(sk);
}
if (used + offset < skb->len)
continue;
- if (skb->h.th->fin)
+ if (tcp_hdr(skb)->fin)
goto found_fin_ok;
if (!(flags & MSG_PEEK)) {
sk_eat_skb(sk, skb, copied_early);
@@ -1563,21 +1566,19 @@ void tcp_close(struct sock *sk, long timeout)
*/
while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
- skb->h.th->fin;
+ tcp_hdr(skb)->fin;
data_was_unread += len;
__kfree_skb(skb);
}
sk_stream_mem_reclaim(sk);
- /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
- * 3.10, we send a RST here because data was lost. To
- * witness the awful effects of the old behavior of always
- * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
- * a bulk GET in an FTP client, suspend the process, wait
- * for the client to advertise a zero window, then kill -9
- * the FTP client, wheee... Note: timeout is always zero
- * in such a case.
+ /* As outlined in RFC 2525, section 2.17, we send a RST here because
+ * data was lost. To witness the awful effects of the old behavior of
+ * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
+ * GET in an FTP client, suspend the process, wait for the client to
+ * advertise a zero window, then kill -9 the FTP client, wheee...
+ * Note: timeout is always zero in such a case.
*/
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
@@ -1732,7 +1733,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
- sk_stream_writequeue_purge(sk);
+ tcp_write_queue_purge(sk);
__skb_queue_purge(&tp->out_of_order_queue);
#ifdef CONFIG_NET_DMA
__skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1758,9 +1759,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
- sk->sk_send_head = NULL;
- tp->rx_opt.saw_tstamp = 0;
- tcp_sack_reset(&tp->rx_opt);
+ tcp_init_send_head(sk);
+ memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
@@ -1830,7 +1830,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
* for currently queued segments.
*/
tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk, tp);
+ tcp_push_pending_frames(sk);
} else {
tp->nonagle &= ~TCP_NAGLE_OFF;
}
@@ -1854,7 +1854,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
tp->nonagle &= ~TCP_NAGLE_CORK;
if (tp->nonagle&TCP_NAGLE_OFF)
tp->nonagle |= TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk, tp);
+ tcp_push_pending_frames(sk);
}
break;
@@ -1954,7 +1954,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
default:
err = -ENOPROTOOPT;
break;
- };
+ }
+
release_sock(sk);
return err;
}
@@ -2124,7 +2125,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0;
default:
return -ENOPROTOOPT;
- };
+ }
if (put_user(len, optlen))
return -EFAULT;
@@ -2170,7 +2171,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
if (!pskb_may_pull(skb, sizeof(*th)))
goto out;
- th = skb->h.th;
+ th = tcp_hdr(skb);
thlen = th->doff * 4;
if (thlen < sizeof(*th))
goto out;
@@ -2210,7 +2211,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
delta = htonl(oldlen + (thlen + len));
skb = segs;
- th = skb->h.th;
+ th = tcp_hdr(skb);
seq = ntohl(th->seq);
do {
@@ -2219,23 +2220,25 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb->h.raw, thlen,
- skb->csum));
+ th->check =
+ csum_fold(csum_partial(skb_transport_header(skb),
+ thlen, skb->csum));
seq += len;
skb = skb->next;
- th = skb->h.th;
+ th = tcp_hdr(skb);
th->seq = htonl(seq);
th->cwr = 0;
} while (skb->next);
- delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+ delta = htonl(oldlen + (skb->tail - skb->transport_header) +
+ skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
(__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
- th->check = csum_fold(csum_partial(skb->h.raw, thlen,
- skb->csum));
+ th->check = csum_fold(csum_partial(skb_transport_header(skb),
+ thlen, skb->csum));
out:
return segs;
@@ -2372,6 +2375,23 @@ void __tcp_put_md5sig_pool(void)
EXPORT_SYMBOL(__tcp_put_md5sig_pool);
#endif
+void tcp_done(struct sock *sk)
+{
+ if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+
+ tcp_set_state(sk, TCP_CLOSE);
+ tcp_clear_xmit_timers(sk);
+
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_state_change(sk);
+ else
+ inet_csk_destroy_sock(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_done);
+
extern void __skb_cb_too_small_for_tcp(int, int);
extern struct tcp_congestion_ops tcp_reno;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0a..281c9f91325 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 34ae3f13483..86b26539e54 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -12,6 +12,8 @@
#include <linux/list.h>
#include <net/tcp.h>
+int sysctl_tcp_max_ssthresh = 0;
+
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
@@ -124,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
#endif
if (ca) {
- ca->non_restricted = 1; /* default is always allowed */
+ ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
list_move(&ca->list, &tcp_cong_list);
ret = 0;
}
@@ -179,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
*buf = '\0';
rcu_read_lock();
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (!ca->non_restricted)
+ if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
continue;
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
@@ -210,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
}
}
- /* pass 2 clear */
+ /* pass 2 clear old values */
list_for_each_entry_rcu(ca, &tcp_cong_list, list)
- ca->non_restricted = 0;
+ ca->flags &= ~TCP_CONG_NON_RESTRICTED;
/* pass 3 mark as allowed */
while ((name = strsep(&val, " ")) && *name) {
ca = tcp_ca_find(name);
WARN_ON(!ca);
if (ca)
- ca->non_restricted = 1;
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
}
out:
spin_unlock(&tcp_cong_list_lock);
@@ -254,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
if (!ca)
err = -ENOENT;
- else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+ else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
err = -EPERM;
else if (!try_module_get(ca->owner))
@@ -274,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
/*
- * Linear increase during slow start
+ * Slow start (exponential increase) with
+ * RFC3742 Limited Slow Start (fast linear increase) support.
*/
void tcp_slow_start(struct tcp_sock *tp)
{
+ int cnt = 0;
+
if (sysctl_tcp_abc) {
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
@@ -286,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp)
*/
if (tp->bytes_acked < tp->mss_cache)
return;
-
- /* We MAY increase by 2 if discovered delayed ack */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
}
+
+ if (sysctl_tcp_max_ssthresh > 0 &&
+ tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+ cnt += sysctl_tcp_max_ssthresh>>1;
+ else
+ cnt += tp->snd_cwnd;
+
+ /* RFC3465: We MAY increase by 2 if discovered delayed ack */
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
+ cnt <<= 1;
tp->bytes_acked = 0;
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ tp->snd_cwnd_cnt += cnt;
+ while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ tp->snd_cwnd_cnt -= tp->snd_cwnd;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
@@ -358,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
struct tcp_congestion_ops tcp_reno = {
+ .flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
- .non_restricted = 1,
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 9a582fb4ef9..14224487b16 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,5 +1,5 @@
/*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
*
* This is from the implementation of CUBIC TCP in
* Injong Rhee, Lisong Xu.
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
-#include <asm/div64.h>
-
/* BIC TCP Parameters */
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
@@ -93,50 +91,51 @@ static void bictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- /* avoid 64 bit division if possible */
- if (dividend >> 32)
- do_div(dividend, d);
- else
- dividend = (uint32_t) dividend / d;
-
- return dividend;
-}
-
-/*
- * calculate the cubic root of x using Newton-Raphson
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
*/
static u32 cubic_root(u64 a)
{
- u32 x, x1;
-
- /* Initial estimate is based on:
- * cbrt(x) = exp(log(x) / 3)
+ u32 x, b, shift;
+ /*
+ * cbrt(x) MSB values for x MSB values in [0..63].
+ * Precomputed then refined by hand - Willy Tarreau
+ *
+ * For x in [0..63],
+ * v = cbrt(x << 18) - 1
+ * cbrt(x) = (v[x] + 10) >> 6
*/
- x = 1u << (fls64(a)/3);
+ static const u8 v[] = {
+ /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
+ /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
+ /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
+ /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
+ /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
+ /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
+ /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
+ /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
+ };
+
+ b = fls64(a);
+ if (b < 7) {
+ /* a in [0..63] */
+ return ((u32)v[(u32)a] + 35) >> 6;
+ }
+
+ b = ((b * 84) >> 8) - 1;
+ shift = (a >> (b * 3));
+
+ x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
/*
- * Iteration based on:
+ * Newton-Raphson iteration
* 2
* x = ( 2 * x + a / x ) / 3
* k+1 k k
*/
- do {
- x1 = x;
- x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
- } while (abs(x1 - x) > 1);
-
+ x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+ x = ((x * 341) >> 10);
return x;
}
@@ -215,7 +214,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
if (ca->delay_min > 0) {
/* max increment = Smax * rtt / 0.1 */
min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
- if (ca->cnt < min_cnt)
+
+ /* use concave growth when the target is above the origin */
+ if (ca->cnt < min_cnt && t >= ca->bic_K)
ca->cnt = min_cnt;
}
@@ -333,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -401,4 +402,4 @@ module_exit(cubictcp_unregister);
MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.0");
+MODULE_VERSION("2.1");
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index a291097fcc0..43d624e5043 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -97,10 +97,6 @@ struct hstcp {
u32 ai;
};
-static int max_ssthresh = 100;
-module_param(max_ssthresh, int, 0644);
-MODULE_PARM_DESC(max_ssthresh, "limited slow start threshold (RFC3742)");
-
static void hstcp_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -122,23 +118,9 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- /* RFC3742: limited slow start
- * the window is increased by 1/K MSS for each arriving ACK,
- * for K = int(cwnd/(0.5 max_ssthresh))
- */
- if (max_ssthresh > 0 && tp->snd_cwnd > max_ssthresh) {
- u32 k = max(tp->snd_cwnd / (max_ssthresh >> 1), 1U);
- if (++tp->snd_cwnd_cnt >= k) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- }
- } else {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
- } else {
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
/* Update AIMD parameters.
*
* We want to guarantee that:
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d..4ba4a7ae0a8 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
}
}
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 59e691d26f6..e5be3511722 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
ca->snd_cwnd_cents += odd;
/* check when fractions goes >=128 and increase cwnd by 1. */
- while(ca->snd_cwnd_cents >= 128) {
+ while (ca->snd_cwnd_cents >= 128) {
tp->snd_cwnd++;
ca->snd_cwnd_cents -= 128;
tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
new file mode 100644
index 00000000000..4adc47c5535
--- /dev/null
+++ b/net/ipv4/tcp_illinois.c
@@ -0,0 +1,356 @@
+/*
+ * TCP Illinois congestion control.
+ * Home page:
+ * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+ *
+ * The algorithm is described in:
+ * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
+ * for High-Speed Networks"
+ * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
+ *
+ * Implemented from description in paper and ns-2 simulation.
+ * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+#include <asm/div64.h>
+#include <net/tcp.h>
+
+#define ALPHA_SHIFT 7
+#define ALPHA_SCALE (1u<<ALPHA_SHIFT)
+#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */
+#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */
+#define ALPHA_BASE ALPHA_SCALE /* 1.0 */
+#define U32_MAX ((u32)~0U)
+#define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */
+
+#define BETA_SHIFT 6
+#define BETA_SCALE (1u<<BETA_SHIFT)
+#define BETA_MIN (BETA_SCALE/8) /* 0.125 */
+#define BETA_MAX (BETA_SCALE/2) /* 0.5 */
+#define BETA_BASE BETA_MAX
+
+static int win_thresh __read_mostly = 15;
+module_param(win_thresh, int, 0);
+MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
+
+static int theta __read_mostly = 5;
+module_param(theta, int, 0);
+MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
+
+/* TCP Illinois Parameters */
+struct illinois {
+ u64 sum_rtt; /* sum of rtt's measured within last rtt */
+ u16 cnt_rtt; /* # of rtts measured within last rtt */
+ u32 base_rtt; /* min of all rtt in usec */
+ u32 max_rtt; /* max of all rtt in usec */
+ u32 end_seq; /* right edge of current RTT */
+ u32 alpha; /* Additive increase */
+ u32 beta; /* Muliplicative decrease */
+ u16 acked; /* # packets acked by current ACK */
+ u8 rtt_above; /* average rtt has gone above threshold */
+ u8 rtt_low; /* # of rtts measurements below threshold */
+};
+
+static void rtt_reset(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ ca->end_seq = tp->snd_nxt;
+ ca->cnt_rtt = 0;
+ ca->sum_rtt = 0;
+
+ /* TODO: age max_rtt? */
+}
+
+static void tcp_illinois_init(struct sock *sk)
+{
+ struct illinois *ca = inet_csk_ca(sk);
+
+ ca->alpha = ALPHA_MAX;
+ ca->beta = BETA_BASE;
+ ca->base_rtt = 0x7fffffff;
+ ca->max_rtt = 0;
+
+ ca->acked = 0;
+ ca->rtt_low = 0;
+ ca->rtt_above = 0;
+
+ rtt_reset(sk);
+}
+
+/* Measure RTT for each ack. */
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+{
+ struct illinois *ca = inet_csk_ca(sk);
+ u32 rtt;
+
+ ca->acked = pkts_acked;
+
+ rtt = ktime_to_us(net_timedelta(last));
+
+ /* ignore bogus values, this prevents wraparound in alpha math */
+ if (rtt > RTT_MAX)
+ rtt = RTT_MAX;
+
+ /* keep track of minimum RTT seen so far */
+ if (ca->base_rtt > rtt)
+ ca->base_rtt = rtt;
+
+ /* and max */
+ if (ca->max_rtt < rtt)
+ ca->max_rtt = rtt;
+
+ ++ca->cnt_rtt;
+ ca->sum_rtt += rtt;
+}
+
+/* Maximum queuing delay */
+static inline u32 max_delay(const struct illinois *ca)
+{
+ return ca->max_rtt - ca->base_rtt;
+}
+
+/* Average queuing delay */
+static inline u32 avg_delay(const struct illinois *ca)
+{
+ u64 t = ca->sum_rtt;
+
+ do_div(t, ca->cnt_rtt);
+ return t - ca->base_rtt;
+}
+
+/*
+ * Compute value of alpha used for additive increase.
+ * If small window then use 1.0, equivalent to Reno.
+ *
+ * For larger windows, adjust based on average delay.
+ * A. If average delay is at minimum (we are uncongested),
+ * then use large alpha (10.0) to increase faster.
+ * B. If average delay is at maximum (getting congested)
+ * then use small alpha (0.3)
+ *
+ * The result is a convex window growth curve.
+ */
+static u32 alpha(struct illinois *ca, u32 da, u32 dm)
+{
+ u32 d1 = dm / 100; /* Low threshold */
+
+ if (da <= d1) {
+ /* If never got out of low delay zone, then use max */
+ if (!ca->rtt_above)
+ return ALPHA_MAX;
+
+ /* Wait for 5 good RTT's before allowing alpha to go alpha max.
+ * This prevents one good RTT from causing sudden window increase.
+ */
+ if (++ca->rtt_low < theta)
+ return ca->alpha;
+
+ ca->rtt_low = 0;
+ ca->rtt_above = 0;
+ return ALPHA_MAX;
+ }
+
+ ca->rtt_above = 1;
+
+ /*
+ * Based on:
+ *
+ * (dm - d1) amin amax
+ * k1 = -------------------
+ * amax - amin
+ *
+ * (dm - d1) amin
+ * k2 = ---------------- - d1
+ * amax - amin
+ *
+ * k1
+ * alpha = ----------
+ * k2 + da
+ */
+
+ dm -= d1;
+ da -= d1;
+ return (dm * ALPHA_MAX) /
+ (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
+}
+
+/*
+ * Beta used for multiplicative decrease.
+ * For small window sizes returns same value as Reno (0.5)
+ *
+ * If delay is small (10% of max) then beta = 1/8
+ * If delay is up to 80% of max then beta = 1/2
+ * In between is a linear function
+ */
+static u32 beta(u32 da, u32 dm)
+{
+ u32 d2, d3;
+
+ d2 = dm / 10;
+ if (da <= d2)
+ return BETA_MIN;
+
+ d3 = (8 * dm) / 10;
+ if (da >= d3 || d3 <= d2)
+ return BETA_MAX;
+
+ /*
+ * Based on:
+ *
+ * bmin d3 - bmax d2
+ * k3 = -------------------
+ * d3 - d2
+ *
+ * bmax - bmin
+ * k4 = -------------
+ * d3 - d2
+ *
+ * b = k3 + k4 da
+ */
+ return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
+ / (d3 - d2);
+}
+
+/* Update alpha and beta values once per RTT */
+static void update_params(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ if (tp->snd_cwnd < win_thresh) {
+ ca->alpha = ALPHA_BASE;
+ ca->beta = BETA_BASE;
+ } else if (ca->cnt_rtt > 0) {
+ u32 dm = max_delay(ca);
+ u32 da = avg_delay(ca);
+
+ ca->alpha = alpha(ca, da, dm);
+ ca->beta = beta(da, dm);
+ }
+
+ rtt_reset(sk);
+}
+
+/*
+ * In case of loss, reset to default values
+ */
+static void tcp_illinois_state(struct sock *sk, u8 new_state)
+{
+ struct illinois *ca = inet_csk_ca(sk);
+
+ if (new_state == TCP_CA_Loss) {
+ ca->alpha = ALPHA_BASE;
+ ca->beta = BETA_BASE;
+ ca->rtt_low = 0;
+ ca->rtt_above = 0;
+ rtt_reset(sk);
+ }
+}
+
+/*
+ * Increase window in response to successful acknowledgment.
+ */
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+ u32 in_flight, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ if (after(ack, ca->end_seq))
+ update_params(sk);
+
+ /* RFC2861 only increase cwnd if fully utilized */
+ if (!tcp_is_cwnd_limited(sk, in_flight))
+ return;
+
+ /* In slow start */
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+
+ else {
+ u32 delta;
+
+ /* snd_cwnd_cnt is # of packets since last cwnd increment */
+ tp->snd_cwnd_cnt += ca->acked;
+ ca->acked = 1;
+
+ /* This is close approximation of:
+ * tp->snd_cwnd += alpha/tp->snd_cwnd
+ */
+ delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
+ if (delta >= tp->snd_cwnd) {
+ tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
+ (u32) tp->snd_cwnd_clamp);
+ tp->snd_cwnd_cnt = 0;
+ }
+ }
+}
+
+static u32 tcp_illinois_ssthresh(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct illinois *ca = inet_csk_ca(sk);
+
+ /* Multiplicative decrease */
+ return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U);
+}
+
+
+/* Extract info for Tcp socket info provided via netlink. */
+static void tcp_illinois_info(struct sock *sk, u32 ext,
+ struct sk_buff *skb)
+{
+ const struct illinois *ca = inet_csk_ca(sk);
+
+ if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+ struct tcpvegas_info info = {
+ .tcpv_enabled = 1,
+ .tcpv_rttcnt = ca->cnt_rtt,
+ .tcpv_minrtt = ca->base_rtt,
+ };
+ u64 t = ca->sum_rtt;
+
+ do_div(t, ca->cnt_rtt);
+ info.tcpv_rtt = t;
+
+ nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ }
+}
+
+static struct tcp_congestion_ops tcp_illinois = {
+ .flags = TCP_CONG_RTT_STAMP,
+ .init = tcp_illinois_init,
+ .ssthresh = tcp_illinois_ssthresh,
+ .min_cwnd = tcp_reno_min_cwnd,
+ .cong_avoid = tcp_illinois_cong_avoid,
+ .set_state = tcp_illinois_state,
+ .get_info = tcp_illinois_info,
+ .pkts_acked = tcp_illinois_acked,
+
+ .owner = THIS_MODULE,
+ .name = "illinois",
+};
+
+static int __init tcp_illinois_register(void)
+{
+ BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
+ return tcp_register_congestion_control(&tcp_illinois);
+}
+
+static void __exit tcp_illinois_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcp_illinois);
+}
+
+module_init(tcp_illinois_register);
+module_exit(tcp_illinois_unregister);
+
+MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP Illinois");
+MODULE_VERSION("1.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a14191687a..7641b2761a1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_frto_response __read_mostly;
int sysctl_tcp_nometrics_save __read_mostly;
int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -100,6 +101,7 @@ int sysctl_tcp_abc __read_mostly;
#define FLAG_ECE 0x40 /* ECE in this ACK */
#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +112,8 @@ int sysctl_tcp_abc __read_mostly;
#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
+#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+
#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
/* Adapt the MSS value used to make delayed ack decision to the
@@ -136,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
*
* "len" is invariant segment length, including TCP header.
*/
- len += skb->data - skb->h.raw;
+ len += skb->data - skb_transport_header(skb);
if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
/* If PSH is not set, packet should be
* full sized, provided peer TCP is not badly broken.
@@ -144,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
* to handle super-low mtu links fairly.
*/
(len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
- !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+ !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
/* Subtract also invariant (if peer is RFC compliant),
* tcp header plus fixed timestamp option length.
* Resulting "len" is MSS free of SACK jitter.
@@ -231,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk)
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
- const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(skb->truesize)/2;
int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
@@ -248,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
return 0;
}
-static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+static void tcp_grow_window(struct sock *sk,
struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
@@ -263,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
if (tcp_win_from_space(skb->truesize) <= skb->len)
incr = 2*tp->advmss;
else
- incr = __tcp_grow_window(sk, tp, skb);
+ incr = __tcp_grow_window(sk, skb);
if (incr) {
tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
@@ -326,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk)
}
/* 5. Recalculate window clamp after socket hit its memory bounds. */
-static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
+static void tcp_clamp_window(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ack.quick = 0;
@@ -499,8 +506,9 @@ new_measure:
* each ACK we send, he increments snd_cwnd and transmits more of his
* queue. -DaveM
*/
-static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
u32 now;
@@ -541,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
TCP_ECN_check_ce(tp, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, tp, skb);
+ tcp_grow_window(sk, skb);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -574,7 +582,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
* does not matter how to _calculate_ it. Seems, it was trap
* that VJ failed to avoid. 8)
*/
- if(m == 0)
+ if (m == 0)
m = 1;
if (tp->srtt != 0) {
m -= (tp->srtt >> 3); /* m is now error in rtt est */
@@ -759,15 +767,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
}
/* Set slow start threshold and cwnd not falling to slow start */
-void tcp_enter_cwr(struct sock *sk)
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
{
struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
tp->prior_ssthresh = 0;
tp->bytes_acked = 0;
- if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+ if (icsk->icsk_ca_state < TCP_CA_CWR) {
tp->undo_marker = 0;
- tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ if (set_ssthresh)
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp) + 1U);
tp->snd_cwnd_cnt = 0;
@@ -934,7 +944,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+ unsigned char *ptr = (skb_transport_header(ack_skb) +
+ TCP_SKB_CB(ack_skb)->sacked);
struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
struct sk_buff *cached_skb;
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
@@ -1038,7 +1049,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
cached_skb = tp->fastpath_skb_hint;
cached_fack_count = tp->fastpath_cnt_hint;
if (!cached_skb) {
- cached_skb = sk->sk_write_queue.next;
+ cached_skb = tcp_write_queue_head(sk);
cached_fack_count = 0;
}
@@ -1055,10 +1066,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (after(end_seq, tp->high_seq))
flag |= FLAG_DATA_LOST;
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
int in_sack, pcount;
u8 sacked;
+ if (skb == tcp_send_head(sk))
+ break;
+
cached_skb = skb;
cached_fack_count = fack_count;
if (i == first_sack_index) {
@@ -1159,6 +1173,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
/* clear lost hint */
tp->retransmit_skb_hint = NULL;
}
+ /* SACK enhanced F-RTO detection.
+ * Set flag if and only if non-rexmitted
+ * segments below frto_highmark are
+ * SACKed (RFC4138; Appendix B).
+ * Clearing correct due to in-order walk
+ */
+ if (after(end_seq, tp->frto_highmark)) {
+ flag &= ~FLAG_ONLY_ORIG_SACKED;
+ } else {
+ if (!(sacked & TCPCB_RETRANS))
+ flag |= FLAG_ONLY_ORIG_SACKED;
+ }
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1195,7 +1221,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
struct sk_buff *skb;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
break;
if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
@@ -1224,7 +1252,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->left_out = tp->sacked_out + tp->lost_out;
- if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
+ if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+ (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
#if FASTRETRANS_DEBUG > 0
@@ -1236,9 +1265,49 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
return flag;
}
-/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
- * segments to see from the next ACKs whether any data was really missing.
- * If the RTO was spurious, new ACKs should arrive.
+/* F-RTO can only be used if TCP has never retransmitted anything other than
+ * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
+ */
+int tcp_use_frto(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+
+ if (!sysctl_tcp_frto)
+ return 0;
+
+ if (IsSackFrto())
+ return 1;
+
+ /* Avoid expensive walking of rexmit queue if possible */
+ if (tp->retrans_out > 1)
+ return 0;
+
+ skb = tcp_write_queue_head(sk);
+ skb = tcp_write_queue_next(sk, skb); /* Skips head */
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
+ if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+ return 0;
+ /* Short-circuit when first non-SACKed skb has been checked */
+ if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+ break;
+ }
+ return 1;
+}
+
+/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
+ * recovery a bit and use heuristics in tcp_process_frto() to detect if
+ * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
+ * keep retrans_out counting accurate (with SACK F-RTO, other than head
+ * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
+ * bits are handled if the Loss state is really to be entered (in
+ * tcp_enter_frto_loss).
+ *
+ * Do like tcp_enter_loss() would; when RTO expires the second time it
+ * does:
+ * "Reduce ssthresh if it has not yet been made inside this window."
*/
void tcp_enter_frto(struct sock *sk)
{
@@ -1246,39 +1315,69 @@ void tcp_enter_frto(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- tp->frto_counter = 1;
-
- if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+ if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
tp->snd_una == tp->high_seq ||
- (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+ ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
+ !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ /* Our state is too optimistic in ssthresh() call because cwnd
+ * is not reduced until tcp_enter_frto_loss() when previous FRTO
+ * recovery has not yet completed. Pattern would be this: RTO,
+ * Cumulative ACK, RTO (2xRTO for the same segment does not end
+ * up here twice).
+ * RFC4138 should be more specific on what to do, even though
+ * RTO is quite unlikely to occur after the first Cumulative ACK
+ * due to back-off and complexity of triggering events ...
+ */
+ if (tp->frto_counter) {
+ u32 stored_cwnd;
+ stored_cwnd = tp->snd_cwnd;
+ tp->snd_cwnd = 2;
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ tp->snd_cwnd = stored_cwnd;
+ } else {
+ tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+ }
+ /* ... in theory, cong.control module could do "any tricks" in
+ * ssthresh(), which means that ca_state, lost bits and lost_out
+ * counter would have to be faked before the call occurs. We
+ * consider that too expensive, unlikely and hacky, so modules
+ * using these in ssthresh() must deal these incompatibility
+ * issues if they receives CA_EVENT_FRTO and frto_counter != 0
+ */
tcp_ca_event(sk, CA_EVENT_FRTO);
}
- /* Have to clear retransmission markers here to keep the bookkeeping
- * in shape, even though we are not yet in Loss state.
- * If something was really lost, it is eventually caught up
- * in tcp_enter_frto_loss.
- */
- tp->retrans_out = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
- sk_stream_for_retrans_queue(skb, sk) {
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
+ skb = tcp_write_queue_head(sk);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+ tp->retrans_out -= tcp_skb_pcount(skb);
}
tcp_sync_left_out(tp);
- tcp_set_ca_state(sk, TCP_CA_Open);
- tp->frto_highmark = tp->snd_nxt;
+ /* Earlier loss recovery underway (see RFC4138; Appendix B).
+ * The last condition is necessary at least in tp->frto_counter case.
+ */
+ if (IsSackFrto() && (tp->frto_counter ||
+ ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+ after(tp->high_seq, tp->snd_una)) {
+ tp->frto_highmark = tp->high_seq;
+ } else {
+ tp->frto_highmark = tp->snd_nxt;
+ }
+ tcp_set_ca_state(sk, TCP_CA_Disorder);
+ tp->high_seq = tp->snd_nxt;
+ tp->frto_counter = 1;
}
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
* which indicates that we should follow the traditional RTO recovery,
* i.e. mark everything lost and do go-back-N retransmission.
*/
-static void tcp_enter_frto_loss(struct sock *sk)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -1287,10 +1386,23 @@ static void tcp_enter_frto_loss(struct sock *sk)
tp->sacked_out = 0;
tp->lost_out = 0;
tp->fackets_out = 0;
+ tp->retrans_out = 0;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
cnt += tcp_skb_pcount(skb);
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+ /*
+ * Count the retransmission made on RTO correctly (only when
+ * waiting for the first ACK and did not get it)...
+ */
+ if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+ tp->retrans_out += tcp_skb_pcount(skb);
+ /* ...enter this if branch just for the first segment */
+ flag |= FLAG_DATA_ACKED;
+ } else {
+ TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+ }
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were
@@ -1308,7 +1420,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
}
tcp_sync_left_out(tp);
- tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+ tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
@@ -1366,7 +1478,9 @@ void tcp_enter_loss(struct sock *sk, int how)
if (!how)
tp->undo_marker = tp->snd_una;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
cnt += tcp_skb_pcount(skb);
if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
tp->undo_marker = 0;
@@ -1401,14 +1515,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
* receiver _host_ is heavily congested (or buggy).
* Do processing similar to RTO timeout.
*/
- if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
+ if ((skb = tcp_write_queue_head(sk)) != NULL &&
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
struct inet_connection_sock *icsk = inet_csk(sk);
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
tcp_enter_loss(sk, 1);
icsk->icsk_retransmits++;
- tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+ tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
icsk->icsk_rto, TCP_RTO_MAX);
return 1;
@@ -1426,10 +1540,12 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
}
-static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
+static inline int tcp_head_timedout(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
return tp->packets_out &&
- tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
+ tcp_skb_timedout(sk, tcp_write_queue_head(sk));
}
/* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1525,10 +1641,15 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
* Main question: may we further continue forward transmission
* with the same cwnd?
*/
-static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
+static int tcp_time_to_recover(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out;
+ /* Do not perform any recovery during FRTO algorithm */
+ if (tp->frto_counter)
+ return 0;
+
/* Trick#1: The loss is proven. */
if (tp->lost_out)
return 1;
@@ -1540,7 +1661,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
/* Trick#3 : when we use RFC2988 timer restart, fast
* retransmit can be triggered by timeout of queue head.
*/
- if (tcp_head_timedout(sk, tp))
+ if (tcp_head_timedout(sk))
return 1;
/* Trick#4: It is still not OK... But will it be useful to delay
@@ -1549,7 +1670,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
packets_out = tp->packets_out;
if (packets_out <= tp->reordering &&
tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
- !tcp_may_send_now(sk, tp)) {
+ !tcp_may_send_now(sk)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
*/
@@ -1589,8 +1710,10 @@ static void tcp_add_reno_sack(struct sock *sk)
/* Account for ACK, ACKing some data in Reno Recovery phase. */
-static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
if (acked-1 >= tp->sacked_out)
@@ -1609,9 +1732,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
}
/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
+static void tcp_mark_head_lost(struct sock *sk,
int packets, u32 high_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt;
@@ -1620,11 +1744,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
skb = tp->lost_skb_hint;
cnt = tp->lost_cnt_hint;
} else {
- skb = sk->sk_write_queue.next;
+ skb = tcp_write_queue_head(sk);
cnt = 0;
}
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
@@ -1638,12 +1764,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
/* clear xmit_retransmit_queue hints
* if this is beyond hint */
- if(tp->retransmit_skb_hint != NULL &&
- before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
-
+ if (tp->retransmit_skb_hint != NULL &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
tp->retransmit_skb_hint = NULL;
- }
+
}
}
tcp_sync_left_out(tp);
@@ -1651,15 +1776,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
/* Account newly detected lost packet(s) */
-static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
+static void tcp_update_scoreboard(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (IsFack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+ tcp_mark_head_lost(sk, lost, tp->high_seq);
} else {
- tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+ tcp_mark_head_lost(sk, 1, tp->high_seq);
}
/* New heuristics: it is possible only after we switched
@@ -1667,13 +1794,15 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
* Hence, we can detect timed out packets during fast
* retransmit without falling to slow start.
*/
- if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
+ if (!IsReno(tp) && tcp_head_timedout(sk)) {
struct sk_buff *skb;
skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
- : sk->sk_write_queue.next;
+ : tcp_write_queue_head(sk);
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
if (!tcp_skb_timedout(sk, skb))
break;
@@ -1745,9 +1874,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp)
/* Undo procedures. */
#if FASTRETRANS_DEBUG > 1
-static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
+static void DBGUNDO(struct sock *sk, const char *msg)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
+
printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
msg,
NIPQUAD(inet->daddr), ntohs(inet->dport),
@@ -1793,13 +1924,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp)
}
/* People celebrate: "We love our President!" */
-static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_recovery(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_may_undo(tp)) {
/* Happy end! We did not retransmit anything
* or our original transmission succeeded.
*/
- DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
+ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
tcp_undo_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
@@ -1819,10 +1952,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
}
/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
+static void tcp_try_undo_dsack(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tp->undo_marker && !tp->undo_retrans) {
- DBGUNDO(sk, tp, "D-SACK");
+ DBGUNDO(sk, "D-SACK");
tcp_undo_cwr(sk, 1);
tp->undo_marker = 0;
NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
@@ -1831,9 +1966,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
/* Undo during fast recovery after partial ACK. */
-static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
- int acked)
+static int tcp_try_undo_partial(struct sock *sk, int acked)
{
+ struct tcp_sock *tp = tcp_sk(sk);
/* Partial ACK arrived. Force Hoe's retransmit. */
int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
@@ -1846,7 +1981,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
- DBGUNDO(sk, tp, "Hoe");
+ DBGUNDO(sk, "Hoe");
tcp_undo_cwr(sk, 0);
NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
@@ -1860,17 +1995,21 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
}
/* Undo during loss recovery after partial ACK. */
-static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_loss(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_may_undo(tp)) {
struct sk_buff *skb;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
clear_all_retrans_hints(tp);
- DBGUNDO(sk, tp, "partial loss");
+ DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
tp->left_out = tp->sacked_out;
tcp_undo_cwr(sk, 1);
@@ -1892,15 +2031,17 @@ static inline void tcp_complete_cwr(struct sock *sk)
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
-static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
tp->left_out = tp->sacked_out;
if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
if (flag&FLAG_ECE)
- tcp_enter_cwr(sk);
+ tcp_enter_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
int state = TCP_CA_Open;
@@ -1987,7 +2128,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+ tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
@@ -1997,14 +2138,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* E. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (icsk->icsk_ca_state == TCP_CA_Open) {
- if (!sysctl_tcp_frto)
- BUG_TRAP(tp->retrans_out == 0);
+ BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (icsk->icsk_ca_state) {
case TCP_CA_Loss:
icsk->icsk_retransmits = 0;
- if (tcp_try_undo_recovery(sk, tp))
+ if (tcp_try_undo_recovery(sk))
return;
break;
@@ -2018,7 +2158,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
break;
case TCP_CA_Disorder:
- tcp_try_undo_dsack(sk, tp);
+ tcp_try_undo_dsack(sk);
if (!tp->undo_marker ||
/* For SACK case do not Open to allow to undo
* catching for all duplicate ACKs. */
@@ -2031,7 +2171,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
case TCP_CA_Recovery:
if (IsReno(tp))
tcp_reset_reno_sack(tp);
- if (tcp_try_undo_recovery(sk, tp))
+ if (tcp_try_undo_recovery(sk))
return;
tcp_complete_cwr(sk);
break;
@@ -2047,14 +2187,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
} else {
int acked = prior_packets - tp->packets_out;
if (IsReno(tp))
- tcp_remove_reno_sacks(sk, tp, acked);
- is_dupack = tcp_try_undo_partial(sk, tp, acked);
+ tcp_remove_reno_sacks(sk, acked);
+ is_dupack = tcp_try_undo_partial(sk, acked);
}
break;
case TCP_CA_Loss:
if (flag&FLAG_DATA_ACKED)
icsk->icsk_retransmits = 0;
- if (!tcp_try_undo_loss(sk, tp)) {
+ if (!tcp_try_undo_loss(sk)) {
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
return;
@@ -2071,10 +2211,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
}
if (icsk->icsk_ca_state == TCP_CA_Disorder)
- tcp_try_undo_dsack(sk, tp);
+ tcp_try_undo_dsack(sk);
- if (!tcp_time_to_recover(sk, tp)) {
- tcp_try_to_open(sk, tp, flag);
+ if (!tcp_time_to_recover(sk)) {
+ tcp_try_to_open(sk, flag);
return;
}
@@ -2113,8 +2253,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
- if (is_dupack || tcp_head_timedout(sk, tp))
- tcp_update_scoreboard(sk, tp);
+ if (is_dupack || tcp_head_timedout(sk))
+ tcp_update_scoreboard(sk);
tcp_cwnd_down(sk);
tcp_xmit_retransmit_queue(sk);
}
@@ -2190,8 +2330,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
* RFC2988 recommends to restart timer to now+rto.
*/
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
} else {
@@ -2255,14 +2397,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
return acked;
}
-static u32 tcp_usrtt(struct timeval *tv)
-{
- struct timeval now;
-
- do_gettimeofday(&now);
- return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
-}
-
/* Remove acknowledged frames from the retransmission queue. */
static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{
@@ -2273,12 +2407,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
int acked = 0;
__s32 seq_rtt = -1;
u32 pkts_acked = 0;
- void (*rtt_sample)(struct sock *sk, u32 usrtt)
- = icsk->icsk_ca_ops->rtt_sample;
- struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+ ktime_t last_ackt = ktime_set(0,0);
- while ((skb = skb_peek(&sk->sk_write_queue)) &&
- skb != sk->sk_send_head) {
+ while ((skb = tcp_write_queue_head(sk)) &&
+ skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u8 sacked = scb->sacked;
@@ -2318,13 +2450,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
if (sacked) {
if (sacked & TCPCB_RETRANS) {
- if(sacked & TCPCB_SACKED_RETRANS)
+ if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= tcp_skb_pcount(skb);
acked |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- skb_get_timestamp(skb, &tv);
+ last_ackt = skb->tstamp;
}
if (sacked & TCPCB_SACKED_ACKED)
tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2337,23 +2469,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- skb_get_timestamp(skb, &tv);
+ last_ackt = skb->tstamp;
}
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
sk_stream_free_skb(sk, skb);
clear_all_retrans_hints(tp);
}
if (acked&FLAG_ACKED) {
+ const struct tcp_congestion_ops *ca_ops
+ = inet_csk(sk)->icsk_ca_ops;
+
tcp_ack_update_rtt(sk, acked, seq_rtt);
- tcp_ack_packets_out(sk, tp);
- if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
- (*rtt_sample)(sk, tcp_usrtt(&tv));
+ tcp_ack_packets_out(sk);
- if (icsk->icsk_ca_ops->pkts_acked)
- icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
+ if (ca_ops->pkts_acked)
+ ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
}
#if FASTRETRANS_DEBUG > 0
@@ -2390,7 +2523,7 @@ static void tcp_ack_probe(struct sock *sk)
/* Was it a usable window open? */
- if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+ if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
tp->snd_una + tp->snd_wnd)) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
@@ -2433,13 +2566,14 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack
* Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
* and in FreeBSD. NetBSD's one is even worse.) is wrong.
*/
-static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb, u32 ack, u32 ack_seq)
+static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+ u32 ack_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
int flag = 0;
- u32 nwin = ntohs(skb->h.th->window);
+ u32 nwin = ntohs(tcp_hdr(skb)->window);
- if (likely(!skb->h.th->syn))
+ if (likely(!tcp_hdr(skb)->syn))
nwin <<= tp->rx_opt.snd_wscale;
if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
@@ -2453,7 +2587,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
* fast path is recovered for sending TCP.
*/
tp->pred_flags = 0;
- tcp_fast_path_check(sk, tp);
+ tcp_fast_path_check(sk);
if (nwin > tp->max_window) {
tp->max_window = nwin;
@@ -2467,39 +2601,139 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
return flag;
}
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+/* A very conservative spurious RTO response algorithm: reduce cwnd and
+ * continue in congestion avoidance.
+ */
+static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
+{
+ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ tp->snd_cwnd_cnt = 0;
+ tcp_moderate_cwnd(tp);
+}
+
+/* A conservative spurious RTO response algorithm: reduce cwnd using
+ * rate halving and continue in congestion avoidance.
+ */
+static void tcp_ratehalving_spur_to_response(struct sock *sk)
+{
+ tcp_enter_cwr(sk, 0);
+}
+
+static void tcp_undo_spur_to_response(struct sock *sk, int flag)
+{
+ if (flag&FLAG_ECE)
+ tcp_ratehalving_spur_to_response(sk);
+ else
+ tcp_undo_cwr(sk, 1);
+}
+
+/* F-RTO spurious RTO detection algorithm (RFC4138)
+ *
+ * F-RTO affects during two new ACKs following RTO (well, almost, see inline
+ * comments). State (ACK number) is kept in frto_counter. When ACK advances
+ * window (but not to or beyond highest sequence sent before RTO):
+ * On First ACK, send two new segments out.
+ * On Second ACK, RTO was likely spurious. Do spurious response (response
+ * algorithm is not part of the F-RTO detection algorithm
+ * given in RFC4138 but can be selected separately).
+ * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
+ * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
+ * of Nagle, this is done using frto_counter states 2 and 3, when a new data
+ * segment of any size sent during F-RTO, state 2 is upgraded to 3.
+ *
+ * Rationale: if the RTO was spurious, new ACKs should arrive from the
+ * original window even after we transmit two new data segments.
+ *
+ * SACK version:
+ * on first step, wait until first cumulative ACK arrives, then move to
+ * the second step. In second step, the next ACK decides.
+ *
+ * F-RTO is implemented (mainly) in four functions:
+ * - tcp_use_frto() is used to determine if TCP is can use F-RTO
+ * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
+ * called when tcp_use_frto() showed green light
+ * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
+ * - tcp_enter_frto_loss() is called if there is not enough evidence
+ * to prove that the RTO is indeed spurious. It transfers the control
+ * from F-RTO to the conventional RTO recovery
+ */
+static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
tcp_sync_left_out(tp);
- if (tp->snd_una == prior_snd_una ||
- !before(tp->snd_una, tp->frto_highmark)) {
- /* RTO was caused by loss, start retransmitting in
- * go-back-N slow start
+ /* Duplicate the behavior from Loss state (fastretrans_alert) */
+ if (flag&FLAG_DATA_ACKED)
+ inet_csk(sk)->icsk_retransmits = 0;
+
+ if (!before(tp->snd_una, tp->frto_highmark)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
+ return 1;
+ }
+
+ if (!IsSackFrto() || IsReno(tp)) {
+ /* RFC4138 shortcoming in step 2; should also have case c):
+ * ACK isn't duplicate nor advances window, e.g., opposite dir
+ * data, winupdate
*/
- tcp_enter_frto_loss(sk);
- return;
+ if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+ !(flag&FLAG_FORWARD_PROGRESS))
+ return 1;
+
+ if (!(flag&FLAG_DATA_ACKED)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
+ flag);
+ return 1;
+ }
+ } else {
+ if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+ /* Prevent sending of new data. */
+ tp->snd_cwnd = min(tp->snd_cwnd,
+ tcp_packets_in_flight(tp));
+ return 1;
+ }
+
+ if ((tp->frto_counter >= 2) &&
+ (!(flag&FLAG_FORWARD_PROGRESS) ||
+ ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+ /* RFC4138 shortcoming (see comment above) */
+ if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+ return 1;
+
+ tcp_enter_frto_loss(sk, 3, flag);
+ return 1;
+ }
}
if (tp->frto_counter == 1) {
- /* First ACK after RTO advances the window: allow two new
- * segments out.
- */
+ /* Sending of the next skb must be allowed or no FRTO */
+ if (!tcp_send_head(sk) ||
+ after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
+ tp->snd_una + tp->snd_wnd)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
+ flag);
+ return 1;
+ }
+
tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
+ tp->frto_counter = 2;
+ return 1;
} else {
- /* Also the second ACK after RTO advances the window.
- * The RTO was likely spurious. Reduce cwnd and continue
- * in congestion avoidance
- */
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
- tcp_moderate_cwnd(tp);
+ switch (sysctl_tcp_frto_response) {
+ case 2:
+ tcp_undo_spur_to_response(sk, flag);
+ break;
+ case 1:
+ tcp_conservative_spur_to_response(tp);
+ break;
+ default:
+ tcp_ratehalving_spur_to_response(sk);
+ break;
+ }
+ tp->frto_counter = 0;
}
-
- /* F-RTO affects on two new ACKs following RTO.
- * At latest on third ACK the TCP behavior is back to normal.
- */
- tp->frto_counter = (tp->frto_counter + 1) % 3;
+ return 0;
}
/* This routine deals with incoming acks, but not outgoing ones. */
@@ -2513,6 +2747,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 prior_in_flight;
s32 seq_rtt;
int prior_packets;
+ int frto_cwnd = 0;
/* If the ack is newer than sent or older than previous acks
* then we can probably ignore it.
@@ -2549,12 +2784,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
else
NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
- flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+ flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+ if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
@@ -2575,15 +2810,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
if (tp->frto_counter)
- tcp_process_frto(sk, prior_snd_una);
+ frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
- if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
+ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
+ tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
} else {
- if ((flag & FLAG_DATA_ACKED))
+ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
}
@@ -2599,7 +2835,7 @@ no_queue:
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
- if (sk->sk_send_head)
+ if (tcp_send_head(sk))
tcp_ack_probe(sk);
return 1;
@@ -2620,13 +2856,13 @@ uninteresting_ack:
void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
{
unsigned char *ptr;
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
int length=(th->doff*4)-sizeof(struct tcphdr);
ptr = (unsigned char *)(th + 1);
opt_rx->saw_tstamp = 0;
- while(length>0) {
+ while (length > 0) {
int opcode=*ptr++;
int opsize;
@@ -2642,9 +2878,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
return;
if (opsize > length)
return; /* don't parse partial options */
- switch(opcode) {
+ switch (opcode) {
case TCPOPT_MSS:
- if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+ if (opsize==TCPOLEN_MSS && th->syn && !estab) {
u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
if (in_mss) {
if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
@@ -2654,12 +2890,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
break;
case TCPOPT_WINDOW:
- if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+ if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
if (sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *) ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > 14) {
- if(net_ratelimit())
+ if (net_ratelimit())
printk(KERN_INFO "tcp_parse_options: Illegal window "
"scaling value %d >14 received.\n",
snd_wscale);
@@ -2669,7 +2905,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
break;
case TCPOPT_TIMESTAMP:
- if(opsize==TCPOLEN_TIMESTAMP) {
+ if (opsize==TCPOLEN_TIMESTAMP) {
if ((estab && opt_rx->tstamp_ok) ||
(!estab && sysctl_tcp_timestamps)) {
opt_rx->saw_tstamp = 1;
@@ -2679,7 +2915,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
}
break;
case TCPOPT_SACK_PERM:
- if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+ if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
if (sysctl_tcp_sack) {
opt_rx->sack_ok = 1;
tcp_sack_reset(opt_rx);
@@ -2688,7 +2924,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
break;
case TCPOPT_SACK:
- if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+ if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
!((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
opt_rx->sack_ok) {
TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
@@ -2701,10 +2937,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
*/
break;
#endif
- };
+ }
+
ptr+=opsize-2;
length-=opsize;
- };
+ }
}
}
@@ -2737,7 +2974,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
static inline void tcp_store_ts_recent(struct tcp_sock *tp)
{
tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
- tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ tp->rx_opt.ts_recent_stamp = get_seconds();
}
static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -2750,8 +2987,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
* Not only, also it occurs for expired timestamps.
*/
- if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
- xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+ if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+ get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
tcp_store_ts_recent(tp);
}
}
@@ -2782,7 +3019,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
u32 seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -2803,7 +3040,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *
{
const struct tcp_sock *tp = tcp_sk(sk);
return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
- xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+ get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
!tcp_disordered_ack(sk, skb));
}
@@ -2910,7 +3147,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
__FUNCTION__, sk->sk_state);
break;
- };
+ }
/* It _is_ possible, that we have something out-of-order _after_ FIN.
* Probably, we should reset in this case. For now drop them.
@@ -3009,7 +3246,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
*/
tp->rx_opt.num_sacks--;
tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
- for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+ for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
sp[i] = sp[i+1];
continue;
}
@@ -3062,7 +3299,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
tp->rx_opt.num_sacks--;
sp--;
}
- for(; this_sack > 0; this_sack--, sp--)
+ for (; this_sack > 0; this_sack--, sp--)
*sp = *(sp-1);
new_sack:
@@ -3088,7 +3325,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
return;
}
- for(this_sack = 0; this_sack < num_sacks; ) {
+ for (this_sack = 0; this_sack < num_sacks; ) {
/* Check if the start of the sack is covered by RCV.NXT. */
if (!before(tp->rcv_nxt, sp->start_seq)) {
int i;
@@ -3144,8 +3381,8 @@ static void tcp_ofo_queue(struct sock *sk)
__skb_unlink(skb, &tp->out_of_order_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if(skb->h.th->fin)
- tcp_fin(skb, sk, skb->h.th);
+ if (tcp_hdr(skb)->fin)
+ tcp_fin(skb, sk, tcp_hdr(skb));
}
}
@@ -3153,7 +3390,7 @@ static int tcp_prune_queue(struct sock *sk);
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
struct tcp_sock *tp = tcp_sk(sk);
int eaten = -1;
@@ -3210,9 +3447,9 @@ queue_and_out:
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- if(skb->len)
- tcp_event_data_recv(sk, tp, skb);
- if(th->fin)
+ if (skb->len)
+ tcp_event_data_recv(sk, skb);
+ if (th->fin)
tcp_fin(skb, sk, th);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -3228,7 +3465,7 @@ queue_and_out:
if (tp->rx_opt.num_sacks)
tcp_sack_remove(tp);
- tcp_fast_path_check(sk, tp);
+ tcp_fast_path_check(sk);
if (eaten > 0)
__kfree_skb(skb);
@@ -3392,7 +3629,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
- if (!skb->h.th->syn && !skb->h.th->fin &&
+ if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start) ||
(skb->next != tail &&
@@ -3403,7 +3640,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
start = TCP_SKB_CB(skb)->end_seq;
skb = skb->next;
}
- if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+ if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
return;
while (before(start, end)) {
@@ -3419,11 +3656,14 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
nskb = alloc_skb(copy+header, GFP_ATOMIC);
if (!nskb)
return;
+
+ skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
+ skb_set_network_header(nskb, (skb_network_header(skb) -
+ skb->head));
+ skb_set_transport_header(nskb, (skb_transport_header(skb) -
+ skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
- nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
- nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
- nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_insert(nskb, skb->prev, skb, list);
@@ -3449,7 +3689,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
__kfree_skb(skb);
NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
- if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+ if (skb == tail ||
+ tcp_hdr(skb)->syn ||
+ tcp_hdr(skb)->fin)
return;
}
}
@@ -3514,7 +3756,7 @@ static int tcp_prune_queue(struct sock *sk)
NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- tcp_clamp_window(sk, tp);
+ tcp_clamp_window(sk);
else if (tcp_memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
@@ -3583,8 +3825,10 @@ void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
/* If the user specified a specific send buffer setting, do
* not modify it.
*/
@@ -3616,7 +3860,7 @@ static void tcp_new_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_should_expand_sndbuf(sk, tp)) {
+ if (tcp_should_expand_sndbuf(sk)) {
int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
@@ -3640,9 +3884,9 @@ static void tcp_check_space(struct sock *sk)
}
}
-static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk)
{
- tcp_push_pending_frames(sk, tp);
+ tcp_push_pending_frames(sk);
tcp_check_space(sk);
}
@@ -3790,7 +4034,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
int err;
local_bh_enable();
- if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+ if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
else
err = skb_copy_and_csum_datagram_iovec(skb, hlen,
@@ -3822,7 +4066,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
{
- return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ return !skb_csum_unnecessary(skb) &&
__tcp_checksum_complete_user(sk, skb);
}
@@ -3840,7 +4084,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
- if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
@@ -3856,7 +4100,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
tcp_rcv_space_adjust(sk);
if ((tp->ucopy.len == 0) ||
- (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+ (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
(atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
tp->ucopy.wakeup = 1;
sk->sk_data_ready(sk, 0);
@@ -3976,7 +4220,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
*/
tcp_ack(sk, skb, 0);
__kfree_skb(skb);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
return 0;
} else { /* Header too small */
TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -4047,12 +4291,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
}
- tcp_event_data_recv(sk, tp, skb);
+ tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
/* Well, only one small jumplet in fast path... */
tcp_ack(sk, skb, FLAG_DATA);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
goto no_ack;
}
@@ -4109,7 +4353,7 @@ slow_path:
goto discard;
}
- if(th->rst) {
+ if (th->rst) {
tcp_reset(sk);
goto discard;
}
@@ -4124,7 +4368,7 @@ slow_path:
}
step5:
- if(th->ack)
+ if (th->ack)
tcp_ack(sk, skb, FLAG_SLOWPATH);
tcp_rcv_rtt_measure_ts(sk, skb);
@@ -4135,7 +4379,7 @@ step5:
/* step 7: process the segment text */
tcp_data_queue(sk, skb);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
return 0;
@@ -4412,13 +4656,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
case TCP_LISTEN:
- if(th->ack)
+ if (th->ack)
return 1;
- if(th->rst)
+ if (th->rst)
goto discard;
- if(th->syn) {
+ if (th->syn) {
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
@@ -4452,7 +4696,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* Do step6 onward by hand. */
tcp_urg(sk, skb, th);
__kfree_skb(skb);
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
return 0;
}
@@ -4474,7 +4718,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
/* step 2: check RST bit */
- if(th->rst) {
+ if (th->rst) {
tcp_reset(sk);
goto discard;
}
@@ -4497,7 +4741,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (th->ack) {
int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
- switch(sk->sk_state) {
+ switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
tp->copied_seq = tp->rcv_nxt;
@@ -4644,7 +4888,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* tcp_data could move socket to TIME-WAIT */
if (sk->sk_state != TCP_CLOSE) {
- tcp_data_snd_check(sk, tp);
+ tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bbe7d3..5a3e7f839fc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,7 +88,7 @@ int sysctl_tcp_low_latency __read_mostly;
#define ICMP_MIN_LENGTH 8
/* Socket used for sending RSTs */
-static struct socket *tcp_socket;
+static struct socket *tcp_socket __read_mostly;
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
@@ -125,10 +125,10 @@ void tcp_unhash(struct sock *sk)
static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
{
- return secure_tcp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
- skb->h.th->dest,
- skb->h.th->source);
+ return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
*/
if (tcptw->tw_ts_recent_stamp &&
(twp == NULL || (sysctl_tcp_tw_reuse &&
- xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+ get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* when trying new connection.
*/
if (peer != NULL &&
- peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+ peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
tp->rx_opt.ts_recent = peer->tcp_ts;
}
@@ -354,8 +354,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
struct tcp_sock *tp;
struct inet_sock *inet;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
__u32 seq;
int err;
@@ -499,11 +499,12 @@ out:
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(sk);
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~tcp_v4_check(len, inet->saddr,
inet->daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
@@ -515,17 +516,18 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
int tcp_v4_gso_send_check(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
struct tcphdr *th;
if (!pskb_may_pull(skb, sizeof(*th)))
return -EINVAL;
- iph = skb->nh.iph;
- th = skb->h.th;
+ iph = ip_hdr(skb);
+ th = tcp_hdr(skb);
th->check = 0;
th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
@@ -546,7 +548,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
#ifdef CONFIG_TCP_MD5SIG
@@ -585,7 +587,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len = sizeof(rep.th);
#ifdef CONFIG_TCP_MD5SIG
- key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
+ key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
if (key) {
rep.opt[0] = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
@@ -597,14 +599,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
key,
- skb->nh.iph->daddr,
- skb->nh.iph->saddr,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
&rep.th, IPPROTO_TCP,
arg.iov[0].iov_len);
}
#endif
- arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
- skb->nh.iph->saddr, /* XXX */
+ arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, /* XXX */
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
@@ -622,7 +624,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts)
{
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
struct {
struct tcphdr th;
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -670,7 +672,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
* skb->sk) holds true, but we program defensively.
*/
if (!twsk && skb->sk) {
- key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
+ key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
} else if (twsk && twsk->tw_md5_keylen) {
tw_key.key = twsk->tw_md5_key;
tw_key.keylen = twsk->tw_md5_keylen;
@@ -690,14 +692,14 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
key,
- skb->nh.iph->daddr,
- skb->nh.iph->saddr,
+ ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr,
&rep.th, IPPROTO_TCP,
arg.iov[0].iov_len);
}
#endif
- arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
- skb->nh.iph->saddr, /* XXX */
+ arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
@@ -745,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
skb = tcp_make_synack(sk, dst, req);
if (skb) {
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
th->check = tcp_v4_check(skb->len,
ireq->loc_addr,
@@ -781,7 +783,7 @@ static void syn_flood_warning(struct sk_buff *skb)
warntime = jiffies;
printk(KERN_INFO
"possible SYN flooding on port %d. Sending cookies.\n",
- ntohs(skb->h.th->dest));
+ ntohs(tcp_hdr(skb)->dest));
}
}
#endif
@@ -1133,8 +1135,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
*/
__u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th = skb->h.th;
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff << 2) - sizeof(struct tcphdr);
int genhash;
unsigned char *ptr;
@@ -1251,8 +1253,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct inet_request_sock *ireq;
struct tcp_options_received tmp_opt;
struct request_sock *req;
- __be32 saddr = skb->nh.iph->saddr;
- __be32 daddr = skb->nh.iph->daddr;
+ __be32 saddr = ip_hdr(skb)->saddr;
+ __be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
#ifdef CONFIG_SYN_COOKIES
@@ -1327,7 +1329,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->rmt_addr = saddr;
ireq->opt = tcp_v4_save_options(sk, skb);
if (!want_cookie)
- TCP_ECN_create_request(req, skb->h.th);
+ TCP_ECN_create_request(req, tcp_hdr(skb));
if (want_cookie) {
#ifdef CONFIG_SYN_COOKIES
@@ -1351,7 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
(dst = inet_csk_route_req(sk, req)) != NULL &&
(peer = rt_get_peer((struct rtable *)dst)) != NULL &&
peer->v4daddr == saddr) {
- if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+ if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1375,7 +1377,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
"request from %u.%u.%u.%u/%u\n",
NIPQUAD(saddr),
- ntohs(skb->h.th->source));
+ ntohs(tcp_hdr(skb)->source));
dst_release(dst);
goto drop_and_free;
}
@@ -1439,7 +1441,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
- newinet->mc_ttl = skb->nh.iph->ttl;
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newinet->opt)
inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
@@ -1481,8 +1483,8 @@ exit:
static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th;
- struct iphdr *iph = skb->nh.iph;
+ struct tcphdr *th = tcp_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
struct sock *nsk;
struct request_sock **prev;
/* Find possible connection requests. */
@@ -1491,9 +1493,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
- th->source, skb->nh.iph->daddr,
- th->dest, inet_iif(skb));
+ nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+ iph->daddr, th->dest, inet_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1513,15 +1514,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
{
+ const struct iphdr *iph = ip_hdr(skb);
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v4_check(skb->len, skb->nh.iph->saddr,
- skb->nh.iph->daddr, skb->csum)) {
+ if (!tcp_v4_check(skb->len, iph->saddr,
+ iph->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
- skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len, IPPROTO_TCP, 0);
if (skb->len <= 76) {
@@ -1555,7 +1558,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+ if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
@@ -1563,7 +1566,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+ if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
@@ -1581,7 +1584,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
}
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+ if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
@@ -1610,6 +1613,7 @@ csum_err:
int tcp_v4_rcv(struct sk_buff *skb)
{
+ const struct iphdr *iph;
struct tcphdr *th;
struct sock *sk;
int ret;
@@ -1623,7 +1627,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
- th = skb->h.th;
+ th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
goto bad_packet;
@@ -1634,23 +1638,21 @@ int tcp_v4_rcv(struct sk_buff *skb)
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
- if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
- tcp_v4_checksum_init(skb)))
+ if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
goto bad_packet;
- th = skb->h.th;
+ th = tcp_hdr(skb);
+ iph = ip_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
+ TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
- skb->nh.iph->daddr, th->dest,
- inet_iif(skb));
-
+ sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+ iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1724,8 +1726,7 @@ do_time_wait:
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
- skb->nh.iph->daddr,
- th->dest,
+ iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
@@ -1770,7 +1771,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
if (peer) {
if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
- (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+ (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
peer->tcp_ts = tp->rx_opt.ts_recent;
@@ -1791,7 +1792,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
- (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+ (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
peer->tcp_ts = tcptw->tw_ts_recent;
@@ -1890,7 +1891,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
tcp_cleanup_congestion_control(sk);
/* Cleanup up the write buffer. */
- sk_stream_writequeue_purge(sk);
+ tcp_write_queue_purge(sk);
/* Cleans up our, hopefully empty, out_of_order_queue. */
__skb_queue_purge(&tp->out_of_order_queue);
@@ -2293,13 +2294,13 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
req);
}
-static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
+static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
{
int timer_active;
unsigned long timer_expires;
- struct tcp_sock *tp = tcp_sk(sp);
- const struct inet_connection_sock *icsk = inet_csk(sp);
- struct inet_sock *inet = inet_sk(sp);
+ struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_sock *inet = inet_sk(sk);
__be32 dest = inet->daddr;
__be32 src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
@@ -2311,9 +2312,9 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
- } else if (timer_pending(&sp->sk_timer)) {
+ } else if (timer_pending(&sk->sk_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = sk->sk_timer.expires;
} else {
timer_active = 0;
timer_expires = jiffies;
@@ -2321,17 +2322,17 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5d %8d %lu %d %p %u %u %u %u %d",
- i, src, srcp, dest, destp, sp->sk_state,
+ i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
- sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
+ sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
(tp->rcv_nxt - tp->copied_seq),
timer_active,
jiffies_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- sock_i_uid(sp),
+ sock_i_uid(sk),
icsk->icsk_probes_out,
- sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp,
+ sock_i_ino(sk),
+ atomic_read(&sk->sk_refcnt), sk,
icsk->icsk_rto,
icsk->icsk_ack.ato,
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21c..43294ad9f63 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
* 3. calc smoothed OWD (SOWD).
* Most ideas come from the original TCP-LP implementation.
*/
-static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
+static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
{
struct lp *lp = inet_csk_ca(sk);
s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
* newReno in increase case.
* We work it out by following the idea from TCP-LP's paper directly
*/
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last)));
+
/* calc inference */
if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
}
static struct tcp_congestion_ops tcp_lp = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_lp_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_lp_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_lp_rtt_sample,
.pkts_acked = tcp_lp_pkts_acked,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6b5c64f3c92..a12b08fca5a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -149,7 +149,7 @@ kill_with_rst:
tw->tw_substate = TCP_TIME_WAIT;
tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+ tcptw->tw_ts_recent_stamp = get_seconds();
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}
@@ -208,7 +208,7 @@ kill:
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
- tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+ tcptw->tw_ts_recent_stamp = get_seconds();
}
inet_twsk_put(tw);
@@ -246,7 +246,7 @@ kill:
if (paws_reject)
NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
- if(!th->rst) {
+ if (!th->rst) {
/* In this case we must reset the TIMEWAIT timer.
*
* If it is ACKless SYN it may be both old duplicate
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tcp_alloc_md5sig_pool() == NULL)
BUG();
}
- } while(0);
+ } while (0);
#endif
/* Linkage updates. */
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
/* Now setup tcp_sock */
newtp = tcp_sk(newsk);
newtp->pred_flags = 0;
- newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
+ newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
+ newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
tcp_prequeue_init(newtp);
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
- newtp->rcv_wup = treq->rcv_isn + 1;
newtp->write_seq = treq->snt_isn + 1;
newtp->pushed_seq = newtp->write_seq;
- newtp->copied_seq = treq->rcv_isn + 1;
newtp->rx_opt.saw_tstamp = 0;
@@ -440,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
keepalive_time_when(newtp));
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
- if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
+ if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
if (sysctl_tcp_fack)
newtp->rx_opt.sack_ok |= 2;
}
@@ -455,12 +453,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
newtp->window_clamp = min(newtp->window_clamp, 65535U);
}
- newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+ newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
+ newtp->rx_opt.snd_wscale);
newtp->max_window = newtp->snd_wnd;
if (newtp->rx_opt.tstamp_ok) {
newtp->rx_opt.ts_recent = req->ts_recent;
- newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ newtp->rx_opt.ts_recent_stamp = get_seconds();
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
newtp->rx_opt.ts_recent_stamp = 0;
@@ -490,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
- struct tcphdr *th = skb->h.th;
+ const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
int paws_reject = 0;
struct tcp_options_received tmp_opt;
@@ -506,7 +505,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
* it can be estimated (approximately)
* from another data.
*/
- tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+ tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
paws_reject = tcp_paws_check(&tmp_opt, th->rst);
}
}
@@ -712,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int state = child->sk_state;
if (!sock_owned_by_user(child)) {
- ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
-
+ ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
+ skb->len);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
parent->sk_data_ready(parent, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c24881f2a6..0faacf9c419 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,14 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-static void update_send_head(struct sock *sk, struct tcp_sock *tp,
- struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct sk_buff *skb)
{
- sk->sk_send_head = skb->next;
- if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
- sk->sk_send_head = NULL;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- tcp_packets_out_inc(sk, tp, skb);
+ tcp_packets_out_inc(sk, skb);
}
/* SND.NXT, if window was not shrunk.
@@ -78,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
* Anything in between SND.UNA...SND.UNA+SND.WND also can be already
* invalid. OK, let's make this for now:
*/
-static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp)
+static inline __u32 tcp_acceptable_seq(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
return tp->snd_nxt;
else
@@ -238,7 +239,7 @@ static u16 tcp_select_window(struct sock *sk)
u32 new_win = __tcp_select_window(sk);
/* Never shrink the offered window */
- if(new_win < cur_win) {
+ if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
* we will not be able to advertise a zero
@@ -289,10 +290,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
(TCPOPT_SACK << 8) |
(TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
TCPOLEN_SACK_PERBLOCK)));
- for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+
+ for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
*ptr++ = htonl(sp[this_sack].start_seq);
*ptr++ = htonl(sp[this_sack].end_seq);
}
+
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
tp->rx_opt.eff_sacks--;
@@ -337,7 +340,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
*/
*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
if (ts) {
- if(sack)
+ if (sack)
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
(TCPOLEN_SACK_PERM << 16) |
(TCPOPT_TIMESTAMP << 8) |
@@ -349,7 +352,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
TCPOLEN_TIMESTAMP);
*ptr++ = htonl(tstamp); /* TSVAL */
*ptr++ = htonl(ts_recent); /* TSECR */
- } else if(sack)
+ } else if (sack)
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) |
@@ -406,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
/* If congestion control is doing timestamping, we must
* take such a timestamp before we potentially clone/copy.
*/
- if (icsk->icsk_ca_ops->rtt_sample)
+ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
__net_timestamp(skb);
if (likely(clone_it)) {
@@ -430,7 +433,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
sysctl_flags = 0;
if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if(sysctl_tcp_timestamps) {
+ if (sysctl_tcp_timestamps) {
tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
}
@@ -465,11 +468,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
#endif
- th = (struct tcphdr *) skb_push(skb, tcp_header_size);
- skb->h.th = th;
+ skb_push(skb, tcp_header_size);
+ skb_reset_transport_header(skb);
skb_set_owner_w(skb, sk);
/* Build TCP header and checksum it. */
+ th = tcp_hdr(skb);
th->source = inet->sport;
th->dest = inet->dport;
th->seq = htonl(tcb->seq);
@@ -515,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
md5 ? &md5_hash_location :
#endif
NULL);
- TCP_ECN_send(sk, tp, skb, tcp_header_size);
+ TCP_ECN_send(sk, skb, tcp_header_size);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -524,7 +528,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tp->af_specific->calc_md5_hash(md5_hash_location,
md5,
sk, NULL, NULL,
- skb->h.th,
+ tcp_hdr(skb),
sk->sk_protocol,
skb->len);
}
@@ -545,7 +549,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (likely(err <= 0))
return err;
- tcp_enter_cwr(sk);
+ tcp_enter_cwr(sk, 1);
return net_xmit_eval(err);
@@ -567,12 +571,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Advance write_seq and place onto the write_queue. */
tp->write_seq = TCP_SKB_CB(skb)->end_seq;
skb_header_release(skb);
- __skb_queue_tail(&sk->sk_write_queue, skb);
+ tcp_add_write_queue_tail(sk, skb);
sk_charge_skb(sk, skb);
-
- /* Queue it, remembering where we must start sending. */
- if (sk->sk_send_head == NULL)
- sk->sk_send_head = skb;
}
static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +705,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
/* Link BUFF into the send queue. */
skb_header_release(buff);
- __skb_append(skb, buff, &sk->sk_write_queue);
+ tcp_insert_write_queue_after(skb, buff, sk);
return 0;
}
@@ -736,7 +736,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
}
skb_shinfo(skb)->nr_frags = k;
- skb->tail = skb->data;
+ skb_reset_tail_pointer(skb);
skb->data_len -= len;
skb->len = skb->data_len;
}
@@ -930,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
__u32 packets_out = tp->packets_out;
if (packets_out >= tp->snd_cwnd) {
@@ -1034,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
if (nonagle & TCP_NAGLE_PUSH)
return 1;
- /* Don't use the nagle rule for urgent data (or for the final FIN). */
- if (tp->urg_mode ||
+ /* Don't use the nagle rule for urgent data (or for the final FIN).
+ * Nagle can be ignored during F-RTO too (see RFC4138).
+ */
+ if (tp->urg_mode || (tp->frto_counter == 2) ||
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
return 1;
@@ -1056,7 +1059,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
return !after(end_seq, tp->snd_una + tp->snd_wnd);
}
-/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
* should be put on the wire right now. If so, it returns the number of
* packets allowed by the congestion window.
*/
@@ -1079,15 +1082,10 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
return cwnd_quota;
}
-static inline int tcp_skb_is_last(const struct sock *sk,
- const struct sk_buff *skb)
+int tcp_may_send_now(struct sock *sk)
{
- return skb->next == (struct sk_buff *)&sk->sk_write_queue;
-}
-
-int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
-{
- struct sk_buff *skb = sk->sk_send_head;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb = tcp_send_head(sk);
return (skb &&
tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1143,7 +1141,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* Link BUFF into the send queue. */
skb_header_release(buff);
- __skb_append(skb, buff, &sk->sk_write_queue);
+ tcp_insert_write_queue_after(skb, buff, sk);
return 0;
}
@@ -1153,8 +1151,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*
* This algorithm is from John Heffner.
*/
-static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight;
@@ -1249,10 +1248,10 @@ static int tcp_mtu_probe(struct sock *sk)
/* Have enough data in the send queue to probe? */
len = 0;
- if ((skb = sk->sk_send_head) == NULL)
+ if ((skb = tcp_send_head(sk)) == NULL)
return -1;
while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
- skb = skb->next;
+ skb = tcp_write_queue_next(sk, skb);
if (len < probe_size)
return -1;
@@ -1279,9 +1278,9 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
sk_charge_skb(sk, nskb);
- skb = sk->sk_send_head;
- __skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue);
- sk->sk_send_head = nskb;
+ skb = tcp_send_head(sk);
+ tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_advance_send_head(sk, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1292,7 +1291,7 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
while (len < probe_size) {
- next = skb->next;
+ next = tcp_write_queue_next(sk, skb);
copy = min_t(int, skb->len, probe_size - len);
if (nskb->ip_summed)
@@ -1305,7 +1304,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* We've eaten all the data from this skb.
* Throw it away. */
TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
sk_stream_free_skb(sk, skb);
} else {
TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1333,7 +1332,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* Decrement cwnd here because we are sending
* effectively two packets. */
tp->snd_cwnd--;
- update_send_head(sk, tp, nskb);
+ update_send_head(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1377,7 +1376,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
sent_pkts = 1;
}
- while ((skb = sk->sk_send_head)) {
+ while ((skb = tcp_send_head(sk))) {
unsigned int limit;
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1396,7 +1395,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
- if (tcp_tso_should_defer(sk, tp, skb))
+ if (tcp_tso_should_defer(sk, skb))
break;
}
@@ -1425,31 +1424,31 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
- update_send_head(sk, tp, skb);
+ update_send_head(sk, skb);
tcp_minshall_update(tp, mss_now, skb);
sent_pkts++;
}
if (likely(sent_pkts)) {
- tcp_cwnd_validate(sk, tp);
+ tcp_cwnd_validate(sk);
return 0;
}
- return !tp->packets_out && sk->sk_send_head;
+ return !tp->packets_out && tcp_send_head(sk);
}
/* Push out any pending frames which were held back due to
* TCP_CORK or attempt at coalescing tiny packets.
* The socket must be locked by the caller.
*/
-void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
- unsigned int cur_mss, int nonagle)
+void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+ int nonagle)
{
- struct sk_buff *skb = sk->sk_send_head;
+ struct sk_buff *skb = tcp_send_head(sk);
if (skb) {
if (tcp_write_xmit(sk, cur_mss, nonagle))
- tcp_check_probe_timer(sk, tp);
+ tcp_check_probe_timer(sk);
}
}
@@ -1459,7 +1458,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
void tcp_push_one(struct sock *sk, unsigned int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = sk->sk_send_head;
+ struct sk_buff *skb = tcp_send_head(sk);
unsigned int tso_segs, cwnd_quota;
BUG_ON(!skb || skb->len < mss_now);
@@ -1493,8 +1492,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
- update_send_head(sk, tp, skb);
- tcp_cwnd_validate(sk, tp);
+ update_send_head(sk, skb);
+ tcp_cwnd_validate(sk);
return;
}
}
@@ -1620,7 +1619,7 @@ u32 __tcp_select_window(struct sock *sk)
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *next_skb = skb->next;
+ struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
/* The first test we must make is that neither of these two
* SKB's are still referenced by someone else.
@@ -1630,7 +1629,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
u16 flags = TCP_SKB_CB(skb)->flags;
/* Also punt if next skb has been SACK'd. */
- if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+ if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
return;
/* Next skb is out of window. */
@@ -1652,9 +1651,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
clear_all_retrans_hints(tp);
/* Ok. We will be able to collapse the packet. */
- __skb_unlink(next_skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(next_skb, sk);
- memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+ skb_copy_from_linear_data(next_skb,
+ skb_put(skb, next_skb_size),
+ next_skb_size);
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1706,7 +1707,9 @@ void tcp_simple_retransmit(struct sock *sk)
unsigned int mss = tcp_current_mss(sk, 0);
int lost = 0;
- sk_stream_for_retrans_queue(skb, sk) {
+ tcp_for_write_queue(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
if (skb->len > mss &&
!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1788,13 +1791,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
}
/* Collapse two adjacent packets if worthwhile and we can. */
- if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
- (skb->len < (cur_mss >> 1)) &&
- (skb->next != sk->sk_send_head) &&
- (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
- (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
- (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
- (sysctl_tcp_retrans_collapse != 0))
+ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+ (skb->len < (cur_mss >> 1)) &&
+ (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+ (!tcp_skb_is_last(sk, skb)) &&
+ (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+ (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+ (sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1804,9 +1807,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* retransmit when old data is attached. So strip it off
* since it is cheap to do so and saves bytes on the network.
*/
- if(skb->len > 0 &&
- (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
- tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+ if (skb->len > 0 &&
+ (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+ tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
if (!pskb_trim(skb, 0)) {
TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
skb_shinfo(skb)->gso_segs = 1;
@@ -1872,15 +1875,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
skb = tp->retransmit_skb_hint;
packet_cnt = tp->retransmit_cnt_hint;
}else{
- skb = sk->sk_write_queue.next;
+ skb = tcp_write_queue_head(sk);
packet_cnt = 0;
}
/* First pass: retransmit lost packets. */
if (tp->lost_out) {
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
__u8 sacked = TCP_SKB_CB(skb)->sacked;
+ if (skb == tcp_send_head(sk))
+ break;
/* we could do better than to assign each time */
tp->retransmit_skb_hint = skb;
tp->retransmit_cnt_hint = packet_cnt;
@@ -1906,8 +1911,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
else
NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
- if (skb ==
- skb_peek(&sk->sk_write_queue))
+ if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
@@ -1937,18 +1941,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
* segments to send.
*/
- if (tcp_may_send_now(sk, tp))
+ if (tcp_may_send_now(sk))
return;
if (tp->forward_skb_hint) {
skb = tp->forward_skb_hint;
packet_cnt = tp->forward_cnt_hint;
} else{
- skb = sk->sk_write_queue.next;
+ skb = tcp_write_queue_head(sk);
packet_cnt = 0;
}
- sk_stream_for_retrans_queue_from(skb, sk) {
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
tp->forward_cnt_hint = packet_cnt;
tp->forward_skb_hint = skb;
@@ -1973,7 +1979,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
break;
}
- if (skb == skb_peek(&sk->sk_write_queue))
+ if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
@@ -1989,7 +1995,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
void tcp_send_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
int mss_now;
/* Optimization, tack on the FIN if we have a queue of
@@ -1998,7 +2004,7 @@ void tcp_send_fin(struct sock *sk)
*/
mss_now = tcp_current_mss(sk, 1);
- if (sk->sk_send_head != NULL) {
+ if (tcp_send_head(sk) != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
@@ -2025,17 +2031,16 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
tcp_queue_skb(sk, skb);
}
- __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
/* We get here when a process closes a file descriptor (either due to
* an explicit close() or as a byproduct of exit()'ing) and there
* was unread data in the receive queue. This behavior is recommended
- * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
+ * by RFC 2525, section 2.17. -DaveM
*/
void tcp_send_active_reset(struct sock *sk, gfp_t priority)
{
- struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
/* NOTE: No TCP options attached and we never retransmit this. */
@@ -2055,7 +2060,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
skb_shinfo(skb)->gso_type = 0;
/* Send it off. */
- TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2071,7 +2076,7 @@ int tcp_send_synack(struct sock *sk)
{
struct sk_buff* skb;
- skb = skb_peek(&sk->sk_write_queue);
+ skb = tcp_write_queue_head(sk);
if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
return -EFAULT;
@@ -2081,9 +2086,9 @@ int tcp_send_synack(struct sock *sk)
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
return -ENOMEM;
- __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_unlink_write_queue(skb, sk);
skb_header_release(nskb);
- __skb_queue_head(&sk->sk_write_queue, nskb);
+ __tcp_add_write_queue_head(sk, nskb);
sk_stream_free_skb(sk, skb);
sk_charge_skb(sk, nskb);
skb = nskb;
@@ -2133,8 +2138,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
if (md5)
tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
#endif
- skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+ skb_push(skb, tcp_header_size);
+ skb_reset_transport_header(skb);
+ th = tcp_hdr(skb);
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
@@ -2188,7 +2195,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
tp->af_specific->calc_md5_hash(md5_hash_location,
md5,
NULL, dst, req,
- skb->h.th, sk->sk_protocol,
+ tcp_hdr(skb), sk->sk_protocol,
skb->len);
}
#endif
@@ -2271,7 +2278,7 @@ int tcp_connect(struct sock *sk)
skb_reserve(buff, MAX_TCP_HEADER);
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
- TCP_ECN_send_syn(sk, tp, buff);
+ TCP_ECN_send_syn(sk, buff);
TCP_SKB_CB(buff)->sacked = 0;
skb_shinfo(buff)->gso_segs = 1;
skb_shinfo(buff)->gso_size = 0;
@@ -2285,7 +2292,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
skb_header_release(buff);
- __skb_queue_tail(&sk->sk_write_queue, buff);
+ __tcp_add_write_queue_tail(sk, buff);
sk_charge_skb(sk, buff);
tp->packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2363,7 +2370,6 @@ void tcp_send_ack(struct sock *sk)
{
/* If we have been reset, we may not send again. */
if (sk->sk_state != TCP_CLOSE) {
- struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
/* We are not putting this on the write queue, so
@@ -2389,7 +2395,7 @@ void tcp_send_ack(struct sock *sk)
skb_shinfo(buff)->gso_type = 0;
/* Send it off, this clears delayed acks for us. */
- TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}
@@ -2441,7 +2447,7 @@ int tcp_write_wakeup(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- if ((skb = sk->sk_send_head) != NULL &&
+ if ((skb = tcp_send_head(sk)) != NULL &&
before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
int err;
unsigned int mss = tcp_current_mss(sk, 0);
@@ -2467,7 +2473,7 @@ int tcp_write_wakeup(struct sock *sk)
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err) {
- update_send_head(sk, tp, skb);
+ update_send_head(sk, skb);
}
return err;
} else {
@@ -2491,7 +2497,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk);
- if (tp->packets_out || !sk->sk_send_head) {
+ if (tp->packets_out || !tcp_send_head(sk)) {
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 61f406f2729..3938d5dbdf2 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -26,6 +26,8 @@
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/kfifo.h>
+#include <linux/ktime.h>
+#include <linux/time.h>
#include <linux/vmalloc.h>
#include <net/tcp.h>
@@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
MODULE_DESCRIPTION("TCP cwnd snooper");
MODULE_LICENSE("GPL");
-static int port = 0;
+static int port __read_mostly = 0;
MODULE_PARM_DESC(port, "Port to match (0=all)");
module_param(port, int, 0);
-static int bufsize = 64*1024;
+static int bufsize __read_mostly = 64*1024;
MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
module_param(bufsize, int, 0);
+static int full __read_mostly;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
+module_param(full, int, 0);
+
static const char procname[] = "tcpprobe";
struct {
- struct kfifo *fifo;
- spinlock_t lock;
+ struct kfifo *fifo;
+ spinlock_t lock;
wait_queue_head_t wait;
- struct timeval tstart;
+ ktime_t start;
+ u32 lastcwnd;
} tcpw;
+/*
+ * Print to log with timestamps.
+ * FIXME: causes an extra copy
+ */
static void printl(const char *fmt, ...)
{
va_list args;
int len;
- struct timeval now;
+ struct timespec tv;
char tbuf[256];
va_start(args, fmt);
- do_gettimeofday(&now);
+ /* want monotonic time since start of tcp_probe */
+ tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start));
- now.tv_sec -= tcpw.tstart.tv_sec;
- now.tv_usec -= tcpw.tstart.tv_usec;
- if (now.tv_usec < 0) {
- --now.tv_sec;
- now.tv_usec += 1000000;
- }
-
- len = sprintf(tbuf, "%lu.%06lu ",
- (unsigned long) now.tv_sec,
- (unsigned long) now.tv_usec);
+ len = sprintf(tbuf, "%lu.%09lu ",
+ (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec);
len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
va_end(args);
@@ -78,38 +82,44 @@ static void printl(const char *fmt, ...)
wake_up(&tcpw.wait);
}
-static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size)
+/*
+ * Hook inserted to be called before each receive packet.
+ * Note: arguments must match tcp_rcv_established()!
+ */
+static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ struct tcphdr *th, unsigned len)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
- if (port == 0 || ntohs(inet->dport) == port ||
- ntohs(inet->sport) == port) {
+ /* Only update if port matches */
+ if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
+ && (full || tp->snd_cwnd != tcpw.lastcwnd)) {
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n",
NIPQUAD(inet->saddr), ntohs(inet->sport),
NIPQUAD(inet->daddr), ntohs(inet->dport),
- size, tp->snd_nxt, tp->snd_una,
+ skb->len, tp->snd_nxt, tp->snd_una,
tp->snd_cwnd, tcp_current_ssthresh(sk),
- tp->snd_wnd);
+ tp->snd_wnd, tp->srtt >> 3);
+ tcpw.lastcwnd = tp->snd_cwnd;
}
jprobe_return();
return 0;
}
-static struct jprobe tcp_send_probe = {
+static struct jprobe tcp_probe = {
.kp = {
- .symbol_name = "tcp_sendmsg",
+ .symbol_name = "tcp_rcv_established",
},
- .entry = JPROBE_ENTRY(jtcp_sendmsg),
+ .entry = JPROBE_ENTRY(jtcp_rcv_established),
};
static int tcpprobe_open(struct inode * inode, struct file * file)
{
kfifo_reset(tcpw.fifo);
- do_gettimeofday(&tcpw.tstart);
+ tcpw.start = ktime_get();
return 0;
}
@@ -162,7 +172,7 @@ static __init int tcpprobe_init(void)
if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
goto err0;
- ret = register_jprobe(&tcp_send_probe);
+ ret = register_jprobe(&tcp_probe);
if (ret)
goto err1;
@@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void)
{
kfifo_free(tcpw.fifo);
proc_net_remove(procname);
- unregister_jprobe(&tcp_send_probe);
+ unregister_jprobe(&tcp_probe);
}
module_exit(tcpprobe_exit);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a9243cfc1be..2ca97b20929 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
- if (tp->packets_out || !sk->sk_send_head) {
+ if (tp->packets_out || !tcp_send_head(sk)) {
icsk->icsk_probes_out = 0;
return;
}
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk)
if (!tp->packets_out)
goto out;
- BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
+ BUG_TRAP(!tcp_write_queue_empty(sk));
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
!((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
goto out;
}
tcp_enter_loss(sk, 0);
- tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+ tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
__sk_dst_reset(sk);
goto out_reset_timer;
}
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk, 0);
}
- if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
+ if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
/* Retransmission failed because of local congestion,
* do not backoff.
*/
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tp->packets_out || sk->sk_send_head)
+ if (tp->packets_out || tcp_send_head(sk))
goto resched;
elapsed = tcp_time_stamp - tp->rcv_tstamp;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 5c484dceb96..73e19cf7df2 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -38,6 +38,8 @@
#include <net/tcp.h>
+#include "tcp_vegas.h"
+
/* Default values of the Vegas variables, in fixed-point representation
* with V_PARAM_SHIFT bits to the right of the binary point.
*/
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644);
MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
-/* Vegas variables */
-struct vegas {
- u32 beg_snd_nxt; /* right edge during last RTT */
- u32 beg_snd_una; /* left edge during last RTT */
- u32 beg_snd_cwnd; /* saves the size of the cwnd */
- u8 doing_vegas_now;/* if true, do vegas for this RTT */
- u16 cntRTT; /* # of RTTs measured within last RTT */
- u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
- u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
-};
-
/* There are several situations when we must "re-start" Vegas:
*
* o when a connection is established
@@ -81,7 +72,7 @@ struct vegas {
* Instead we must wait until the completion of an RTT during
* which we actually receive ACKs.
*/
-static inline void vegas_enable(struct sock *sk)
+static void vegas_enable(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk)
vegas->doing_vegas_now = 0;
}
-static void tcp_vegas_init(struct sock *sk)
+void tcp_vegas_init(struct sock *sk)
{
struct vegas *vegas = inet_csk_ca(sk);
vegas->baseRTT = 0x7fffffff;
vegas_enable(sk);
}
+EXPORT_SYMBOL_GPL(tcp_vegas_init);
/* Do RTT sampling needed for Vegas.
* Basically we:
@@ -120,10 +112,13 @@ static void tcp_vegas_init(struct sock *sk)
* o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT)
*/
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct vegas *vegas = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = ktime_to_us(net_timedelta(last)) + 1;
/* Filter to find propagation delay: */
if (vrtt < vegas->baseRTT)
@@ -135,8 +130,9 @@ static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
vegas->minRTT = min(vegas->minRTT, vrtt);
vegas->cntRTT++;
}
+EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
-static void tcp_vegas_state(struct sock *sk, u8 ca_state)
+void tcp_vegas_state(struct sock *sk, u8 ca_state)
{
if (ca_state == TCP_CA_Open)
@@ -144,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
else
vegas_disable(sk);
}
+EXPORT_SYMBOL_GPL(tcp_vegas_state);
/*
* If the connection is idle and we are restarting,
@@ -154,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
* packets, _then_ we can make Vegas calculations
* again.
*/
-static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_CWND_RESTART ||
event == CA_EVENT_TX_START)
tcp_vegas_init(sk);
}
+EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
u32 seq_rtt, u32 in_flight, int flag)
@@ -336,30 +334,29 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
}
/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct sock *sk, u32 ext,
- struct sk_buff *skb)
+void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
{
const struct vegas *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info *info;
-
- info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
- sizeof(*info)));
-
- info->tcpv_enabled = ca->doing_vegas_now;
- info->tcpv_rttcnt = ca->cntRTT;
- info->tcpv_rtt = ca->baseRTT;
- info->tcpv_minrtt = ca->minRTT;
- rtattr_failure: ;
+ struct tcpvegas_info info = {
+ .tcpv_enabled = ca->doing_vegas_now,
+ .tcpv_rttcnt = ca->cntRTT,
+ .tcpv_rtt = ca->baseRTT,
+ .tcpv_minrtt = ca->minRTT,
+ };
+
+ nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
}
+EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
static struct tcp_congestion_ops tcp_vegas = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_vegas_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_vegas_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_vegas_rtt_calc,
+ .pkts_acked = tcp_vegas_pkts_acked,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
.get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
new file mode 100644
index 00000000000..502fa818363
--- /dev/null
+++ b/net/ipv4/tcp_vegas.h
@@ -0,0 +1,24 @@
+/*
+ * TCP Vegas congestion control interface
+ */
+#ifndef __TCP_VEGAS_H
+#define __TCP_VEGAS_H 1
+
+/* Vegas variables */
+struct vegas {
+ u32 beg_snd_nxt; /* right edge during last RTT */
+ u32 beg_snd_una; /* left edge during last RTT */
+ u32 beg_snd_cwnd; /* saves the size of the cwnd */
+ u8 doing_vegas_now;/* if true, do vegas for this RTT */
+ u16 cntRTT; /* # of RTTs measured within last RTT */
+ u32 minRTT; /* min of RTTs measured within last RTT (in usec) */
+ u32 baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */
+};
+
+extern void tcp_vegas_init(struct sock *sk);
+extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
+extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
+extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+
+#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6..9edb340f2f9 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
}
/* Do rtt sampling needed for Veno. */
-static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct veno *veno = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = ktime_to_us(net_timedelta(last)) + 1;
/* Filter to find propagation delay: */
if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
}
static struct tcp_congestion_ops tcp_veno = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
.cong_avoid = tcp_veno_cong_avoid,
- .rtt_sample = tcp_veno_rtt_calc,
+ .pkts_acked = tcp_veno_pkts_acked,
.set_state = tcp_veno_state,
.cwnd_event = tcp_veno_cwnd_event,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4e1b61032a9..e61e09dd513 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
* Called after processing group of packets.
* but all westwood needs is the last sample of srtt.
*/
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct westwood *w = inet_csk_ca(sk);
if (cnt > 0)
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
struct tcp_sock *tp = tcp_sk(sk);
struct westwood *w = inet_csk_ca(sk);
- switch(event) {
+ switch (event) {
case CA_EVENT_FAST_ACK:
westwood_fast_bw(sk);
break;
@@ -260,16 +260,13 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
{
const struct westwood *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct rtattr *rta;
- struct tcpvegas_info *info;
-
- rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
- info = RTA_DATA(rta);
- info->tcpv_enabled = 1;
- info->tcpv_rttcnt = 0;
- info->tcpv_rtt = jiffies_to_usecs(ca->rtt);
- info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
- rtattr_failure: ;
+ struct tcpvegas_info info = {
+ .tcpv_enabled = 1,
+ .tcpv_rtt = jiffies_to_usecs(ca->rtt),
+ .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+ };
+
+ nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
}
}
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
new file mode 100644
index 00000000000..545ed237ab5
--- /dev/null
+++ b/net/ipv4/tcp_yeah.c
@@ -0,0 +1,268 @@
+/*
+ *
+ * YeAH TCP
+ *
+ * For further details look at:
+ * http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+ *
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+
+#include <net/tcp.h>
+
+#include "tcp_vegas.h"
+
+#define TCP_YEAH_ALPHA 80 //lin number of packets queued at the bottleneck
+#define TCP_YEAH_GAMMA 1 //lin fraction of queue to be removed per rtt
+#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss
+#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion
+#define TCP_YEAH_PHY 8 //lin maximum delta from base
+#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss
+#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count
+
+#define TCP_SCALABLE_AI_CNT 100U
+
+/* YeAH variables */
+struct yeah {
+ struct vegas vegas; /* must be first */
+
+ /* YeAH */
+ u32 lastQ;
+ u32 doing_reno_now;
+
+ u32 reno_count;
+ u32 fast_count;
+
+ u32 pkts_acked;
+};
+
+static void tcp_yeah_init(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+
+ tcp_vegas_init(sk);
+
+ yeah->doing_reno_now = 0;
+ yeah->lastQ = 0;
+
+ yeah->reno_count = 2;
+
+ /* Ensure the MD arithmetic works. This is somewhat pedantic,
+ * since I don't think we will see a cwnd this large. :) */
+ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
+
+}
+
+
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+
+ if (icsk->icsk_ca_state == TCP_CA_Open)
+ yeah->pkts_acked = pkts_acked;
+
+ tcp_vegas_pkts_acked(sk, pkts_acked, last);
+}
+
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
+ u32 seq_rtt, u32 in_flight, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+
+ if (!tcp_is_cwnd_limited(sk, in_flight))
+ return;
+
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+
+ else if (!yeah->doing_reno_now) {
+ /* Scalable */
+
+ tp->snd_cwnd_cnt+=yeah->pkts_acked;
+ if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ }
+
+ yeah->pkts_acked = 1;
+
+ } else {
+ /* Reno */
+
+ if (tp->snd_cwnd_cnt < tp->snd_cwnd)
+ tp->snd_cwnd_cnt++;
+
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ }
+ }
+
+ /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
+ *
+ * These are so named because they represent the approximate values
+ * of snd_una and snd_nxt at the beginning of the current RTT. More
+ * precisely, they represent the amount of data sent during the RTT.
+ * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
+ * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
+ * bytes of data have been ACKed during the course of the RTT, giving
+ * an "actual" rate of:
+ *
+ * (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
+ *
+ * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
+ * because delayed ACKs can cover more than one segment, so they
+ * don't line up yeahly with the boundaries of RTTs.
+ *
+ * Another unfortunate fact of life is that delayed ACKs delay the
+ * advance of the left edge of our send window, so that the number
+ * of bytes we send in an RTT is often less than our cwnd will allow.
+ * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
+ */
+
+ if (after(ack, yeah->vegas.beg_snd_nxt)) {
+
+ /* We do the Vegas calculations only if we got enough RTT
+ * samples that we can be reasonably sure that we got
+ * at least one RTT sample that wasn't from a delayed ACK.
+ * If we only had 2 samples total,
+ * then that means we're getting only 1 ACK per RTT, which
+ * means they're almost certainly delayed ACKs.
+ * If we have 3 samples, we should be OK.
+ */
+
+ if (yeah->vegas.cntRTT > 2) {
+ u32 rtt, queue;
+ u64 bw;
+
+ /* We have enough RTT samples, so, using the Vegas
+ * algorithm, we determine if we should increase or
+ * decrease cwnd, and by how much.
+ */
+
+ /* Pluck out the RTT we are using for the Vegas
+ * calculations. This is the min RTT seen during the
+ * last RTT. Taking the min filters out the effects
+ * of delayed ACKs, at the cost of noticing congestion
+ * a bit later.
+ */
+ rtt = yeah->vegas.minRTT;
+
+ /* Compute excess number of packets above bandwidth
+ * Avoid doing full 64 bit divide.
+ */
+ bw = tp->snd_cwnd;
+ bw *= rtt - yeah->vegas.baseRTT;
+ do_div(bw, rtt);
+ queue = bw;
+
+ if (queue > TCP_YEAH_ALPHA ||
+ rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
+ if (queue > TCP_YEAH_ALPHA
+ && tp->snd_cwnd > yeah->reno_count) {
+ u32 reduction = min(queue / TCP_YEAH_GAMMA ,
+ tp->snd_cwnd >> TCP_YEAH_EPSILON);
+
+ tp->snd_cwnd -= reduction;
+
+ tp->snd_cwnd = max(tp->snd_cwnd,
+ yeah->reno_count);
+
+ tp->snd_ssthresh = tp->snd_cwnd;
+ }
+
+ if (yeah->reno_count <= 2)
+ yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+ else
+ yeah->reno_count++;
+
+ yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
+ 0xffffffU);
+ } else {
+ yeah->fast_count++;
+
+ if (yeah->fast_count > TCP_YEAH_ZETA) {
+ yeah->reno_count = 2;
+ yeah->fast_count = 0;
+ }
+
+ yeah->doing_reno_now = 0;
+ }
+
+ yeah->lastQ = queue;
+
+ }
+
+ /* Save the extent of the current window so we can use this
+ * at the end of the next RTT.
+ */
+ yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
+ yeah->vegas.beg_snd_nxt = tp->snd_nxt;
+ yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+
+ /* Wipe the slate clean for the next RTT. */
+ yeah->vegas.cntRTT = 0;
+ yeah->vegas.minRTT = 0x7fffffff;
+ }
+}
+
+static u32 tcp_yeah_ssthresh(struct sock *sk) {
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct yeah *yeah = inet_csk_ca(sk);
+ u32 reduction;
+
+ if (yeah->doing_reno_now < TCP_YEAH_RHO) {
+ reduction = yeah->lastQ;
+
+ reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );
+
+ reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+ } else
+ reduction = max(tp->snd_cwnd>>1,2U);
+
+ yeah->fast_count = 0;
+ yeah->reno_count = max(yeah->reno_count>>1, 2U);
+
+ return tp->snd_cwnd - reduction;
+}
+
+static struct tcp_congestion_ops tcp_yeah = {
+ .flags = TCP_CONG_RTT_STAMP,
+ .init = tcp_yeah_init,
+ .ssthresh = tcp_yeah_ssthresh,
+ .cong_avoid = tcp_yeah_cong_avoid,
+ .min_cwnd = tcp_reno_min_cwnd,
+ .set_state = tcp_vegas_state,
+ .cwnd_event = tcp_vegas_cwnd_event,
+ .get_info = tcp_vegas_get_info,
+ .pkts_acked = tcp_yeah_pkts_acked,
+
+ .owner = THIS_MODULE,
+ .name = "yeah",
+};
+
+static int __init tcp_yeah_register(void)
+{
+ BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
+ tcp_register_congestion_control(&tcp_yeah);
+ return 0;
+}
+
+static void __exit tcp_yeah_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcp_yeah);
+}
+
+module_init(tcp_yeah_register);
+module_exit(tcp_yeah_unregister);
+
+MODULE_AUTHOR("Angelo P. Castellani");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("YeAH TCP");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fc620a7c1db..113e0c4c8a9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock);
static int udp_port_rover;
-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
+/*
+ * Note about this hash function :
+ * Typical use is probably daddr = 0, only dport is going to vary hash
+ */
+static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr)
+{
+ addr ^= addr >> 16;
+ addr ^= addr >> 8;
+ return port ^ addr;
+}
+
+static inline int __udp_lib_port_inuse(unsigned int hash, int port,
+ __be32 daddr, struct hlist_head udptable[])
{
struct sock *sk;
struct hlist_node *node;
+ struct inet_sock *inet;
- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
- if (sk->sk_hash == num)
+ sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+ if (sk->sk_hash != hash)
+ continue;
+ inet = inet_sk(sk);
+ if (inet->num != port)
+ continue;
+ if (inet->rcv_saddr == daddr)
return 1;
+ }
return 0;
}
@@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
struct hlist_node *node;
struct hlist_head *head;
struct sock *sk2;
+ unsigned int hash;
int error = 1;
write_lock_bh(&udp_hash_lock);
@@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
int size;
- head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
+ hash = hash_port_and_addr(result,
+ inet_sk(sk)->rcv_saddr);
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
if (hlist_empty(head)) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0] +
@@ -175,12 +197,23 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
;
}
result = best;
- for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
+ for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
+ i++, result += UDP_HTABLE_SIZE) {
if (result > sysctl_local_port_range[1])
result = sysctl_local_port_range[0]
+ ((result - sysctl_local_port_range[0]) &
(UDP_HTABLE_SIZE - 1));
- if (! __udp_lib_lport_inuse(result, udptable))
+ hash = hash_port_and_addr(result, 0);
+ if (__udp_lib_port_inuse(hash, result,
+ 0, udptable))
+ continue;
+ if (!inet_sk(sk)->rcv_saddr)
+ break;
+
+ hash = hash_port_and_addr(result,
+ inet_sk(sk)->rcv_saddr);
+ if (! __udp_lib_port_inuse(hash, result,
+ inet_sk(sk)->rcv_saddr, udptable))
break;
}
if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -188,21 +221,41 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
*port_rover = snum = result;
} else {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ hash = hash_port_and_addr(snum, 0);
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
sk_for_each(sk2, node, head)
- if (sk2->sk_hash == snum &&
- sk2 != sk &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
- || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (*saddr_comp)(sk, sk2) )
+ if (sk2->sk_hash == hash &&
+ sk2 != sk &&
+ inet_sk(sk2)->num == snum &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ (*saddr_comp)(sk, sk2))
goto fail;
+
+ if (inet_sk(sk)->rcv_saddr) {
+ hash = hash_port_and_addr(snum,
+ inet_sk(sk)->rcv_saddr);
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+
+ sk_for_each(sk2, node, head)
+ if (sk2->sk_hash == hash &&
+ sk2 != sk &&
+ inet_sk(sk2)->num == snum &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if ||
+ !sk->sk_bound_dev_if ||
+ sk2->sk_bound_dev_if ==
+ sk->sk_bound_dev_if) &&
+ (*saddr_comp)(sk, sk2))
+ goto fail;
+ }
}
inet_sk(sk)->num = snum;
- sk->sk_hash = snum;
+ sk->sk_hash = hash;
if (sk_unhashed(sk)) {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
sk_add_node(sk, head);
sock_prot_inc_use(sk->sk_prot);
}
@@ -212,13 +265,13 @@ fail:
return error;
}
-__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
+int udp_get_port(struct sock *sk, unsigned short snum,
int (*scmp)(const struct sock *, const struct sock *))
{
return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
}
-inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -241,63 +294,77 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
- unsigned short hnum = ntohs(dport);
- int badness = -1;
+ unsigned int hash, hashwild;
+ int score, best = -1, hport = ntohs(dport);
+
+ hash = hash_port_and_addr(hport, daddr);
+ hashwild = hash_port_and_addr(hport, 0);
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+
+lookup:
+
+ sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
- if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
- int score = (sk->sk_family == PF_INET ? 1 : 0);
- if (inet->rcv_saddr) {
- if (inet->rcv_saddr != daddr)
- continue;
- score+=2;
- }
- if (inet->daddr) {
- if (inet->daddr != saddr)
- continue;
- score+=2;
- }
- if (inet->dport) {
- if (inet->dport != sport)
- continue;
- score+=2;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- continue;
- score+=2;
- }
- if(score == 9) {
- result = sk;
- break;
- } else if(score > badness) {
- result = sk;
- badness = score;
- }
+ if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
+ inet->num != hport)
+ continue;
+
+ score = (sk->sk_family == PF_INET ? 1 : 0);
+ if (inet->rcv_saddr) {
+ if (inet->rcv_saddr != daddr)
+ continue;
+ score+=2;
+ }
+ if (inet->daddr) {
+ if (inet->daddr != saddr)
+ continue;
+ score+=2;
}
+ if (inet->dport) {
+ if (inet->dport != sport)
+ continue;
+ score+=2;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ continue;
+ score+=2;
+ }
+ if (score == 9) {
+ result = sk;
+ goto found;
+ } else if (score > best) {
+ result = sk;
+ best = score;
+ }
+ }
+
+ if (hash != hashwild) {
+ hash = hashwild;
+ goto lookup;
}
+found:
if (result)
sock_hold(result);
read_unlock(&udp_hash_lock);
return result;
}
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
- __be16 loc_port, __be32 loc_addr,
+static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum,
+ int hport, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
struct hlist_node *node;
struct sock *s = sk;
- unsigned short hnum = ntohs(loc_port);
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
if (s->sk_hash != hnum ||
+ inet->num != hport ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
@@ -329,8 +396,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
struct inet_sock *inet;
struct iphdr *iph = (struct iphdr*)skb->data;
struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
int harderr;
int err;
@@ -390,7 +457,7 @@ out:
sock_put(sk);
}
-__inline__ void udp_err(struct sk_buff *skb, u32 info)
+void udp_err(struct sk_buff *skb, u32 info)
{
return __udp4_lib_err(skb, info, udp_hash);
}
@@ -419,13 +486,14 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, int len )
{
unsigned int offset;
- struct udphdr *uh = skb->h.uh;
+ struct udphdr *uh = udp_hdr(skb);
__wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) {
/*
* Only one fragment on the socket.
*/
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
} else {
@@ -434,7 +502,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
- offset = skb->h.raw - skb->data;
+ offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
skb->ip_summed = CHECKSUM_NONE;
@@ -469,7 +537,7 @@ static int udp_push_pending_frames(struct sock *sk)
/*
* Create a UDP header
*/
- uh = skb->h.uh;
+ uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
@@ -765,38 +833,38 @@ out:
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
- switch(cmd)
+ switch (cmd) {
+ case SIOCOUTQ:
{
- case SIOCOUTQ:
- {
- int amount = atomic_read(&sk->sk_wmem_alloc);
- return put_user(amount, (int __user *)arg);
- }
+ int amount = atomic_read(&sk->sk_wmem_alloc);
+ return put_user(amount, (int __user *)arg);
+ }
- case SIOCINQ:
- {
- struct sk_buff *skb;
- unsigned long amount;
-
- amount = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
- /*
- * We will only return the amount
- * of this packet since that is all
- * that will be read.
- */
- amount = skb->len - sizeof(struct udphdr);
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
+ case SIOCINQ:
+ {
+ struct sk_buff *skb;
+ unsigned long amount;
+
+ amount = 0;
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb != NULL) {
+ /*
+ * We will only return the amount
+ * of this packet since that is all
+ * that will be read.
+ */
+ amount = skb->len - sizeof(struct udphdr);
}
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ return put_user(amount, (int __user *)arg);
+ }
- default:
- return -ENOIOCTLCMD;
+ default:
+ return -ENOIOCTLCMD;
}
- return(0);
+
+ return 0;
}
/*
@@ -810,7 +878,9 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
- int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
+ unsigned int ulen, copied;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
/*
* Check any passed addresses
@@ -826,28 +896,25 @@ try_again:
if (!skb)
goto out;
- copied = skb->len - sizeof(struct udphdr);
- if (copied > len) {
- copied = len;
+ ulen = skb->len - sizeof(struct udphdr);
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
- }
/*
- * Decide whether to checksum and/or copy data.
- *
- * UDP: checksum may have been computed in HW,
- * (re-)compute it if message is truncated.
- * UDP-Lite: always needs to checksum, no HW support.
+ * If checksum is needed at all, try to do it while copying the
+ * data. If the data is truncated, or if we only want a partial
+ * coverage checksum (UDP-Lite), do it before the copy.
*/
- copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
- if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
- if (__udp_lib_checksum_complete(skb))
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
- copy_only = 1;
}
- if (copy_only)
+ if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied );
else {
@@ -866,8 +933,8 @@ try_again:
if (sin)
{
sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_port = udp_hdr(skb)->source;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
@@ -875,7 +942,7 @@ try_again:
err = copied;
if (flags & MSG_TRUNC)
- err = skb->len - sizeof(struct udphdr);
+ err = ulen;
out_free:
skb_free_datagram(sk, skb);
@@ -949,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
return 1;
/* Now we can get the pointers */
- uh = skb->h.uh;
+ uh = udp_hdr(skb);
udpdata = (__u8 *)uh + sizeof(struct udphdr);
udpdata32 = (__be32 *)udpdata;
@@ -959,7 +1026,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
return 0;
- } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
+ } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
} else
@@ -990,7 +1057,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
return 0;
/* Now we can update and verify the packet length... */
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iphlen = iph->ihl << 2;
iph->tot_len = htons(ntohs(iph->tot_len) - len);
if (skb->len < iphlen + len) {
@@ -1002,7 +1069,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- skb->h.raw = skb_pull(skb, len);
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
/* modify the protocol (it's ESP!) */
iph->protocol = IPPROTO_ESP;
@@ -1095,10 +1163,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
- if (__udp_lib_checksum_complete(skb))
+ if (sk->sk_filter) {
+ if (udp_lib_checksum_complete(skb))
goto drop;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
@@ -1128,33 +1195,49 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
{
- struct sock *sk;
+ struct sock *sk, *skw, *sknext;
int dif;
+ int hport = ntohs(uh->dest);
+ unsigned int hash = hash_port_and_addr(hport, daddr);
+ unsigned int hashwild = hash_port_and_addr(hport, 0);
- read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
- if (sk) {
- struct sock *sknext = NULL;
+ read_lock(&udp_hash_lock);
+
+ sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
+ skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
+
+ sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
+ if (!sk) {
+ hash = hashwild;
+ sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
+ saddr, dif);
+ }
+ if (sk) {
do {
struct sk_buff *skb1 = skb;
-
- sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
- if(sknext)
+ sknext = udp_v4_mcast_next(sk_next(sk), hash, hport,
+ daddr, uh->source, saddr, dif);
+ if (!sknext && hash != hashwild) {
+ hash = hashwild;
+ sknext = udp_v4_mcast_next(skw, hash, hport,
+ daddr, uh->source, saddr, dif);
+ }
+ if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
- if(skb1) {
+ if (skb1) {
int ret = udp_queue_rcv_skb(sk, skb1);
if (ret > 0)
- /* we should probably re-process instead
- * of dropping packets here. */
+ /*
+ * we should probably re-process
+ * instead of dropping packets here.
+ */
kfree_skb(skb1);
}
sk = sknext;
- } while(sknext);
+ } while (sknext);
} else
kfree_skb(skb);
read_unlock(&udp_hash_lock);
@@ -1166,25 +1249,37 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
-static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
+static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
+ int proto)
{
+ const struct iphdr *iph;
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
+ iph = ip_hdr(skb);
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
- skb->len, IPPROTO_UDP, skb->csum ))
+ if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+ proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
- skb->nh.iph->daddr,
- skb->len, IPPROTO_UDP, 0);
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ skb->len, proto, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
- /* UDP = UDP-Lite with a non-partial checksum coverage */
- UDP_SKB_CB(skb)->partial_cov = 0;
+ return 0;
}
/*
@@ -1192,14 +1287,14 @@ static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
*/
int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
- int is_udplite)
+ int proto)
{
struct sock *sk;
- struct udphdr *uh = skb->h.uh;
+ struct udphdr *uh = udp_hdr(skb);
unsigned short ulen;
struct rtable *rt = (struct rtable*)skb->dst;
- __be32 saddr = skb->nh.iph->saddr;
- __be32 daddr = skb->nh.iph->daddr;
+ __be32 saddr = ip_hdr(skb)->saddr;
+ __be32 daddr = ip_hdr(skb)->daddr;
/*
* Validate the packet.
@@ -1211,24 +1306,21 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (ulen > skb->len)
goto short_packet;
- if(! is_udplite ) { /* UDP validates ulen. */
-
+ if (proto == IPPROTO_UDP) {
+ /* UDP validates ulen. */
if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
goto short_packet;
- uh = skb->h.uh;
-
- udp4_csum_init(skb, uh);
-
- } else { /* UDP-Lite validates cscov. */
- if (udplite4_csum_init(skb, uh))
- goto csum_error;
+ uh = udp_hdr(skb);
}
- if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+ if (udp4_csum_init(skb, uh, proto))
+ goto csum_error;
+
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
- skb->dev->ifindex, udptable );
+ skb->dev->ifindex, udptable);
if (sk != NULL) {
int ret = udp_queue_rcv_skb(sk, skb);
@@ -1250,7 +1342,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+ UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -1258,11 +1350,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
* don't wanna listen. Ignore it.
*/
kfree_skb(skb);
- return(0);
+ return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
- is_udplite? "-Lite" : "",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
ulen,
@@ -1277,21 +1369,21 @@ csum_error:
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
- is_udplite? "-Lite" : "",
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
NIPQUAD(daddr),
ntohs(uh->dest),
ulen);
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
- return(0);
+ return 0;
}
-__inline__ int udp_rcv(struct sk_buff *skb)
+int udp_rcv(struct sk_buff *skb)
{
- return __udp4_lib_rcv(skb, udp_hash, 0);
+ return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
int udp_destroy_sock(struct sock *sk)
@@ -1313,13 +1405,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
int val;
int err = 0;
- if(optlen<sizeof(int))
+ if (optlen<sizeof(int))
return -EINVAL;
if (get_user(val, (int __user *)optval))
return -EFAULT;
- switch(optname) {
+ switch (optname) {
case UDP_CORK:
if (val != 0) {
up->corkflag = 1;
@@ -1373,7 +1465,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
default:
err = -ENOPROTOOPT;
break;
- };
+ }
return err;
}
@@ -1404,15 +1496,15 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
struct udp_sock *up = udp_sk(sk);
int val, len;
- if(get_user(len,optlen))
+ if (get_user(len,optlen))
return -EFAULT;
len = min_t(unsigned int, len, sizeof(int));
- if(len < 0)
+ if (len < 0)
return -EINVAL;
- switch(optname) {
+ switch (optname) {
case UDP_CORK:
val = up->corkflag;
break;
@@ -1433,11 +1525,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
default:
return -ENOPROTOOPT;
- };
+ }
- if(put_user(len, optlen))
+ if (put_user(len, optlen))
return -EFAULT;
- if(copy_to_user(optval, &val,len))
+ if (copy_to_user(optval, &val,len))
return -EFAULT;
return 0;
}
@@ -1486,15 +1578,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct sk_buff *skb;
spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL) {
- if (udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
- __skb_unlink(skb, rcvq);
- kfree_skb(skb);
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- break;
- }
+ while ((skb = skb_peek(rcvq)) != NULL &&
+ udp_lib_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+ __skb_unlink(skb, rcvq);
+ kfree_skb(skb);
}
spin_unlock_bh(&rcvq->lock);
@@ -1573,7 +1661,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
struct sock *sk = udp_get_first(seq);
if (sk)
- while(pos && (sk = udp_get_next(seq, sk)) != NULL)
+ while (pos && (sk = udp_get_next(seq, sk)) != NULL)
--pos;
return pos ? NULL : sk;
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index b28fe1edf98..f34fd686a8f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,7 @@ static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
static int udplite_rcv(struct sk_buff *skb)
{
- return __udp4_lib_rcv(skb, udplite_hash, 1);
+ return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
}
static void udplite_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 78e80deb7e8..5ceca951d73 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -28,7 +28,7 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
switch (nexthdr) {
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- *spi = skb->nh.iph->saddr;
+ *spi = ip_hdr(skb)->saddr;
*seq = 0;
return 0;
}
@@ -39,9 +39,9 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
#ifdef CONFIG_NETFILTER
static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
-
if (skb->dst == NULL) {
+ const struct iphdr *iph = ip_hdr(skb);
+
if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
skb->dev))
goto drop;
@@ -55,18 +55,18 @@ drop:
int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
{
- int err;
__be32 spi, seq;
struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
struct xfrm_state *x;
int xfrm_nr = 0;
int decaps = 0;
+ int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
- if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
+ if (err != 0)
goto drop;
do {
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
if (xfrm_nr == XFRM_MAX_DEPTH)
goto drop;
@@ -113,7 +113,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
break;
}
- if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
+ err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
+ if (err < 0)
goto drop;
} while (!err);
@@ -146,15 +147,15 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
return 0;
} else {
#ifdef CONFIG_NETFILTER
- __skb_push(skb, skb->data - skb->nh.raw);
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
+ __skb_push(skb, skb->data - skb_network_header(skb));
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
xfrm4_rcv_encap_finish);
return 0;
#else
- return -skb->nh.iph->protocol;
+ return -ip_hdr(skb)->protocol;
#endif
}
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index d419e15d980..a73e710740c 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -29,20 +29,21 @@
*/
static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph, *top_iph = NULL;
+ struct iphdr *iph, *top_iph;
int hdrlen, optlen;
- iph = skb->nh.iph;
- skb->h.ipiph = iph;
+ iph = ip_hdr(skb);
+ skb->transport_header = skb->network_header;
hdrlen = 0;
optlen = iph->ihl * 4 - sizeof(*iph);
if (unlikely(optlen))
hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
- skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
- top_iph = skb->nh.iph;
- skb->h.raw += sizeof(*iph) - hdrlen;
+ skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen);
+ skb_reset_network_header(skb);
+ top_iph = ip_hdr(skb);
+ skb->transport_header += sizeof(*iph) - hdrlen;
memmove(top_iph, iph, sizeof(*iph));
if (unlikely(optlen)) {
@@ -50,7 +51,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
BUG_ON(optlen < 0);
- ph = (struct ip_beet_phdr *)skb->h.raw;
+ ph = (struct ip_beet_phdr *)skb_transport_header(skb);
ph->padlen = 4 - (optlen & 4);
ph->hdrlen = optlen / 8;
ph->nexthdr = top_iph->protocol;
@@ -69,20 +70,18 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
int phlen = 0;
int optlen = 0;
- __u8 ph_nexthdr = 0, protocol = 0;
+ u8 ph_nexthdr = 0;
int err = -EINVAL;
- protocol = iph->protocol;
-
if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
struct ip_beet_phdr *ph;
if (!pskb_may_pull(skb, sizeof(*ph)))
goto out;
- ph = (struct ip_beet_phdr *)(skb->h.ipiph + 1);
+ ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1);
phlen = sizeof(*ph) + ph->padlen;
optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
@@ -96,22 +95,20 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
ph_nexthdr = ph->nexthdr;
}
- skb->nh.raw = skb->data + (phlen - sizeof(*iph));
- memmove(skb->nh.raw, iph, sizeof(*iph));
- skb->h.raw = skb->data + (phlen + optlen);
- skb->data = skb->h.raw;
+ skb_set_network_header(skb, phlen - sizeof(*iph));
+ memmove(skb_network_header(skb), iph, sizeof(*iph));
+ skb_set_transport_header(skb, phlen + optlen);
+ skb->data = skb_transport_header(skb);
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->ihl = (sizeof(*iph) + optlen) / 4;
iph->tot_len = htons(skb->len + iph->ihl * 4);
iph->daddr = x->sel.daddr.a4;
iph->saddr = x->sel.saddr.a4;
if (ph_nexthdr)
iph->protocol = ph_nexthdr;
- else
- iph->protocol = protocol;
iph->check = 0;
- iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+ iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
err = 0;
out:
return err;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 92676b7e403..601047161ea 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -23,16 +23,13 @@
*/
static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph;
- int ihl;
+ struct iphdr *iph = ip_hdr(skb);
+ int ihl = iph->ihl * 4;
- iph = skb->nh.iph;
- skb->h.ipiph = iph;
-
- ihl = iph->ihl * 4;
- skb->h.raw += ihl;
-
- skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
+ skb->transport_header = skb->network_header + ihl;
+ skb_push(skb, x->props.header_len);
+ skb_reset_network_header(skb);
+ memmove(skb_network_header(skb), iph, ihl);
return 0;
}
@@ -46,12 +43,15 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
- int ihl = skb->data - skb->h.raw;
+ int ihl = skb->data - skb_transport_header(skb);
- if (skb->h.raw != skb->nh.raw)
- skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
- skb->nh.iph->tot_len = htons(skb->len + ihl);
- skb->h.raw = skb->data;
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ip_hdr(skb)->tot_len = htons(skb->len + ihl);
+ skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ceb4376f572..a2f2e6a5ec5 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,8 +16,8 @@
static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
{
- struct iphdr *outer_iph = skb->nh.iph;
- struct iphdr *inner_iph = skb->h.ipiph;
+ struct iphdr *outer_iph = ip_hdr(skb);
+ struct iphdr *inner_iph = ipip_hdr(skb);
if (INET_ECN_is_ce(outer_iph->tos))
IP_ECN_set_ce(inner_iph);
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos))
- IP6_ECN_set_ce(skb->nh.ipv6h);
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
/* Add encapsulation header.
@@ -46,11 +46,12 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph, *top_iph;
int flags;
- iph = skb->nh.iph;
- skb->h.ipiph = iph;
+ iph = ip_hdr(skb);
+ skb->transport_header = skb->network_header;
- skb->nh.raw = skb_push(skb, x->props.header_len);
- top_iph = skb->nh.iph;
+ skb_push(skb, x->props.header_len);
+ skb_reset_network_header(skb);
+ top_iph = ip_hdr(skb);
top_iph->ihl = 5;
top_iph->version = 4;
@@ -90,10 +91,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
+ const unsigned char *old_mac;
int err = -EINVAL;
- switch(iph->protocol){
+ switch (iph->protocol){
case IPPROTO_IPIP:
break;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -111,10 +113,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (iph->protocol == IPPROTO_IPIP) {
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv4_copy_dscp(iph, skb->h.ipiph);
+ ipv4_copy_dscp(iph, ipip_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip_ecn_decapsulate(skb);
}
@@ -125,9 +127,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
}
#endif
- skb->mac.raw = memmove(skb->data - skb->mac_len,
- skb->mac.raw, skb->mac_len);
- skb->nh.raw = skb->data;
+ old_mac = skb_mac_header(skb);
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ skb_reset_network_header(skb);
err = 0;
out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 038ca160fe2..44ef208a75c 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -22,14 +22,13 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
struct dst_entry *dst;
- struct iphdr *iph = skb->nh.iph;
if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
goto out;
IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
- if (!(iph->frag_off & htons(IP_DF)) || skb->local_df)
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
goto out;
dst = skb->dst;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5d51a2af34c..4ff8ed30024 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -119,7 +119,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
unsigned short encap_family = xfrm[i]->props.family;
- switch(encap_family) {
+ switch (encap_family) {
case AF_INET:
fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
@@ -209,8 +209,8 @@ error:
static void
_decode_session4(struct sk_buff *skb, struct flowi *fl)
{
- struct iphdr *iph = skb->nh.iph;
- u8 *xprth = skb->nh.raw + iph->ihl*4;
+ struct iphdr *iph = ip_hdr(skb);
+ u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
memset(fl, 0, sizeof(struct flowi));
if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
default:
fl->fl_ipsec_spi = 0;
break;
- };
+ }
}
fl->proto = iph->protocol;
fl->fl4_dst = iph->daddr;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 3eef06454da..56851030455 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -12,9 +12,8 @@
static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct iphdr *iph;
+ struct iphdr *iph = ip_hdr(skb);
- iph = skb->nh.iph;
iph->tot_len = htons(skb->len);
ip_send_check(iph);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 79682efb14b..8e5d54f23b4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -57,6 +57,16 @@ config IPV6_ROUTE_INFO
If unsure, say N.
+config IPV6_OPTIMISTIC_DAD
+ bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ ---help---
+ This is experimental support for optimistic Duplicate
+ Address Detection. It allows for autoconfigured addresses
+ to be used more quickly.
+
+ If unsure, say N.
+
config INET6_AH
tristate "IPv6: AH transformation"
depends on IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d460017bb35..bb33309044c 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,14 +7,15 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
- exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
- ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
+ exthdrs.o sysctl_net_ipv6.o datagram.o \
+ ip6_flowlabel.o inet6_connection_sock.o
ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
xfrm6_output.o
ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 452a82ce479..d02685c6bc6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#endif
#include <asm/uaccess.h>
+#include <asm/unaligned.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -208,9 +209,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
};
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-#if 0
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-#endif
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -246,6 +245,37 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
add_timer(&ifp->timer);
}
+static int snmp6_alloc_dev(struct inet6_dev *idev)
+{
+ int err = -ENOMEM;
+
+ if (!idev || !idev->dev)
+ return -EINVAL;
+
+ if (snmp_mib_init((void **)idev->stats.ipv6,
+ sizeof(struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip;
+ if (snmp_mib_init((void **)idev->stats.icmpv6,
+ sizeof(struct icmpv6_mib),
+ __alignof__(struct icmpv6_mib)) < 0)
+ goto err_icmp;
+
+ return 0;
+
+err_icmp:
+ snmp_mib_free((void **)idev->stats.ipv6);
+err_ip:
+ return err;
+}
+
+static int snmp6_free_dev(struct inet6_dev *idev)
+{
+ snmp_mib_free((void **)idev->stats.icmpv6);
+ snmp_mib_free((void **)idev->stats.ipv6);
+ return 0;
+}
+
/* Nobody refers to this device, we may destroy it. */
static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
@@ -271,6 +301,8 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
}
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+
static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
@@ -417,7 +449,7 @@ static void addrconf_forward_change(void)
struct inet6_dev *idev;
read_lock(&dev_base_lock);
- for (dev=dev_base; dev; dev=dev->next) {
+ for_each_netdev(dev) {
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
@@ -528,6 +560,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa->rt = rt;
+ /*
+ * part one of RFC 4429, section 3.3
+ * We should not configure an address as
+ * optimistic if we do not yet know the link
+ * layer address of our nexhop router
+ */
+
+ if (rt->rt6i_nexthop == NULL)
+ ifa->flags &= ~IFA_F_OPTIMISTIC;
+
ifa->idev = idev;
in6_dev_hold(idev);
/* For caller */
@@ -704,6 +746,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
int tmp_plen;
int ret = 0;
int max_addresses;
+ u32 addr_flags;
write_lock(&idev->lock);
if (ift) {
@@ -761,10 +804,17 @@ retry:
spin_unlock_bh(&ifp->lock);
write_unlock(&idev->lock);
+
+ addr_flags = IFA_F_TEMPORARY;
+ /* set in addrconf_prefix_rcv() */
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ addr_flags |= IFA_F_OPTIMISTIC;
+
ift = !max_addresses ||
ipv6_count_addresses(idev) < max_addresses ?
ipv6_add_addr(idev, &addr, tmp_plen,
- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
+ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+ addr_flags) : NULL;
if (!ift || IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -861,7 +911,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
read_lock(&dev_base_lock);
rcu_read_lock();
- for (dev = dev_base; dev; dev=dev->next) {
+ for_each_netdev(dev) {
struct inet6_dev *idev;
struct inet6_ifaddr *ifa;
@@ -896,13 +946,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
* - Tentative Address (RFC2462 section 5.4)
* - A tentative address is not considered
* "assigned to an interface" in the traditional
- * sense.
+ * sense, unless it is also flagged as optimistic.
* - Candidate Source Address (section 4)
* - In any case, anycast addresses, multicast
* addresses, and the unspecified address MUST
* NOT be included in a candidate set.
*/
- if (ifa->flags & IFA_F_TENTATIVE)
+ if ((ifa->flags & IFA_F_TENTATIVE) &&
+ (!(ifa->flags & IFA_F_OPTIMISTIC)))
continue;
if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
score.addr_type & IPV6_ADDR_MULTICAST)) {
@@ -961,15 +1012,17 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
}
}
- /* Rule 3: Avoid deprecated address */
+ /* Rule 3: Avoid deprecated and optimistic addresses */
if (hiscore.rule < 3) {
if (ipv6_saddr_preferred(hiscore.addr_type) ||
- !(ifa_result->flags & IFA_F_DEPRECATED))
+ (((ifa_result->flags &
+ (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
hiscore.rule++;
}
if (ipv6_saddr_preferred(score.addr_type) ||
- !(ifa->flags & IFA_F_DEPRECATED)) {
+ (((ifa_result->flags &
+ (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
score.rule = 3;
@@ -1107,8 +1160,10 @@ int ipv6_get_saddr(struct dst_entry *dst,
return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
}
+EXPORT_SYMBOL(ipv6_get_saddr);
-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+ unsigned char banned_flags)
{
struct inet6_dev *idev;
int err = -EADDRNOTAVAIL;
@@ -1119,7 +1174,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
read_lock_bh(&idev->lock);
for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+ if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
ipv6_addr_copy(addr, &ifp->addr);
err = 0;
break;
@@ -1161,6 +1216,8 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
return ifp != NULL;
}
+EXPORT_SYMBOL(ipv6_chk_addr);
+
static
int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
{
@@ -1669,6 +1726,13 @@ ok:
if (ifp == NULL && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
+ u32 addr_flags = 0;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !ipv6_devconf.forwarding)
+ addr_flags = IFA_F_OPTIMISTIC;
+#endif
/* Do not allow to create too much of autoconfigured
* addresses; this would be too easy way to crash kernel.
@@ -1676,7 +1740,8 @@ ok:
if (!max_addresses ||
ipv6_count_addresses(in6_dev) < max_addresses)
ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags);
if (!ifp || IS_ERR(ifp)) {
in6_dev_put(in6_dev);
@@ -1884,6 +1949,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
jiffies_to_clock_t(valid_lft * HZ), flags);
+ /*
+ * Note that section 3.1 of RFC 4429 indicates
+ * that the Optimistic flag should not be set for
+ * manually configured addresses
+ */
addrconf_dad_start(ifp, 0);
in6_ifa_put(ifp);
addrconf_verify(0);
@@ -1994,7 +2064,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
return;
}
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
struct in_device * in_dev = __in_dev_get_rtnl(dev);
if (in_dev && (dev->flags & IFF_UP)) {
struct in_ifaddr * ifa;
@@ -2060,8 +2130,16 @@ static void init_loopback(struct net_device *dev)
static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
{
struct inet6_ifaddr * ifp;
+ u32 addr_flags = IFA_F_PERMANENT;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (idev->cnf.optimistic_dad &&
+ !ipv6_devconf.forwarding)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+ ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp, 0);
@@ -2129,7 +2207,7 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
{
struct in6_addr lladdr;
- if (!ipv6_get_lladdr(link_dev, &lladdr)) {
+ if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
addrconf_add_linklocal(idev, &lladdr);
return 0;
}
@@ -2147,7 +2225,7 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
return;
}
/* then try to inherit it from any device */
- for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
+ for_each_netdev(link_dev) {
if (!ipv6_inherit_linklocal(idev, link_dev))
return;
}
@@ -2240,7 +2318,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
default:
addrconf_dev_config(dev);
break;
- };
+ }
if (idev) {
if (run_pending)
addrconf_dad_run(idev);
@@ -2281,8 +2359,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
case NETDEV_CHANGENAME:
-#ifdef CONFIG_SYSCTL
if (idev) {
+ snmp6_unregister_dev(idev);
+#ifdef CONFIG_SYSCTL
addrconf_sysctl_unregister(&idev->cnf);
neigh_sysctl_unregister(idev->nd_parms);
neigh_sysctl_register(dev, idev->nd_parms,
@@ -2290,10 +2369,11 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
&ndisc_ifinfo_sysctl_change,
NULL);
addrconf_sysctl_register(idev, &idev->cnf);
- }
#endif
+ snmp6_register_dev(idev);
+ }
break;
- };
+ }
return NOTIFY_OK;
}
@@ -2474,7 +2554,11 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
unsigned long rand_num;
struct inet6_dev *idev = ifp->idev;
- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ rand_num = 0;
+ else
+ rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
ifp->probes = idev->cnf.dad_transmits;
addrconf_mod_timer(ifp, AC_DAD, rand_num);
}
@@ -2496,7 +2580,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
- ifp->flags &= ~IFA_F_TENTATIVE;
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
spin_unlock_bh(&ifp->lock);
read_unlock_bh(&idev->lock);
@@ -2516,6 +2600,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
addrconf_dad_stop(ifp);
return;
}
+
+ /*
+ * Optimistic nodes can start receiving
+ * Frames right away
+ */
+ if(ifp->flags & IFA_F_OPTIMISTIC)
+ ip6_ins_rt(ifp->rt);
+
addrconf_dad_kick(ifp);
spin_unlock_bh(&ifp->lock);
out:
@@ -2540,7 +2632,7 @@ static void addrconf_dad_timer(unsigned long data)
* DAD was successful
*/
- ifp->flags &= ~IFA_F_TENTATIVE;
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
spin_unlock_bh(&ifp->lock);
read_unlock_bh(&idev->lock);
@@ -3164,16 +3256,16 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
s_idx = cb->args[0];
s_ip_idx = ip_idx = cb->args[1];
- read_lock(&dev_base_lock);
- for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_ip_idx = 0;
ip_idx = 0;
if ((idev = in6_dev_get(dev)) == NULL)
- continue;
+ goto cont;
read_lock_bh(&idev->lock);
switch (type) {
case UNICAST_ADDR:
@@ -3220,13 +3312,14 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
+cont:
+ idx++;
}
done:
if (err <= 0) {
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
}
- read_unlock(&dev_base_lock);
cb->args[0] = idx;
cb->args[1] = ip_idx;
return skb->len;
@@ -3359,6 +3452,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#endif
array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+#endif
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -3372,14 +3468,44 @@ static inline size_t inet6_if_nlmsg_size(void)
nla_total_size(4) /* IFLA_INET6_FLAGS */
+ nla_total_size(sizeof(struct ifla_cacheinfo))
+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
);
}
+static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
+ int bytes)
+{
+ int i;
+ int pad = bytes - sizeof(u64) * items;
+ BUG_ON(pad < 0);
+
+ /* Use put_unaligned() because stats may not be aligned for u64. */
+ put_unaligned(items, &stats[0]);
+ for (i = 1; i < items; i++)
+ put_unaligned(snmp_fold_field(mib, i), &stats[i]);
+
+ memset(&stats[items], 0, pad);
+}
+
+static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+ int bytes)
+{
+ switch(attrtype) {
+ case IFLA_INET6_STATS:
+ __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+ break;
+ case IFLA_INET6_ICMP6STATS:
+ __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+ break;
+ }
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 pid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
- struct nlattr *conf;
+ struct nlattr *nla;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
@@ -3419,12 +3545,22 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
ci.retrans_time = idev->nd_parms->retrans_time;
NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
- conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
- if (conf == NULL)
+ nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+ if (nla == NULL)
goto nla_put_failure;
- ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf));
+ ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
- /* XXX - Statistics/MC not implemented */
+ /* XXX - MC not implemented */
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
nla_nest_end(skb, protoinfo);
return nlmsg_end(skb, nlh);
@@ -3442,16 +3578,19 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct inet6_dev *idev;
read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if ((idev = in6_dev_get(dev)) == NULL)
- continue;
+ goto cont;
err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
in6_dev_put(idev);
if (err <= 0)
break;
+cont:
+ idx++;
}
read_unlock(&dev_base_lock);
cb->args[0] = idx;
@@ -3550,30 +3689,20 @@ errout:
rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
}
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
- [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
- [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
- [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
- [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr,
- .dumpit = inet6_dump_ifaddr, },
- [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
- [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
- [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
- [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
- [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
- .dumpit = inet6_dump_fib, },
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, },
-#endif
-};
-
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
switch (event) {
case RTM_NEWADDR:
- ip6_ins_rt(ifp->rt);
+ /*
+ * If the address was optimistic
+ * we inserted the route at the start of
+ * our DAD process, so we don't need
+ * to do it again
+ */
+ if (!(ifp->rt->rt6i_node))
+ ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
break;
@@ -3894,6 +4023,17 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "optimistic_dad",
+ .data = &ipv6_devconf.optimistic_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+
+ },
+#endif
{
.ctl_name = 0, /* sentinel */
}
@@ -4021,11 +4161,15 @@ int register_inet6addr_notifier(struct notifier_block *nb)
return atomic_notifier_chain_register(&inet6addr_chain, nb);
}
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
int unregister_inet6addr_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
}
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
/*
* Init / cleanup code
*/
@@ -4064,7 +4208,18 @@ int __init addrconf_init(void)
register_netdevice_notifier(&ipv6_dev_notf);
addrconf_verify(0);
- rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
+
+ err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+ if (err < 0)
+ goto errout;
+
+ /* Only the first call to __rtnl_register can fail */
+ __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
+ __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
+ __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
+ __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
+ __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+
#ifdef CONFIG_SYSCTL
addrconf_sysctl.sysctl_header =
register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
@@ -4072,6 +4227,10 @@ int __init addrconf_init(void)
#endif
return 0;
+errout:
+ unregister_netdevice_notifier(&ipv6_dev_notf);
+
+ return err;
}
void __exit addrconf_cleanup(void)
@@ -4083,7 +4242,6 @@ void __exit addrconf_cleanup(void)
unregister_netdevice_notifier(&ipv6_dev_notf);
- rtnetlink_links[PF_INET6] = NULL;
#ifdef CONFIG_SYSCTL
addrconf_sysctl_unregister(&ipv6_devconf_dflt);
addrconf_sysctl_unregister(&ipv6_devconf);
@@ -4095,7 +4253,7 @@ void __exit addrconf_cleanup(void)
* clean dev list.
*/
- for (dev=dev_base; dev; dev=dev->next) {
+ for_each_netdev(dev) {
if ((idev = __in6_dev_get(dev)) == NULL)
continue;
addrconf_ifdown(dev, 1);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5cac14a5c77..18cb928c8d9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -98,6 +98,11 @@ static int inet6_create(struct socket *sock, int protocol)
int try_loading_module = 0;
int err;
+ if (sock->type != SOCK_RAW &&
+ sock->type != SOCK_DGRAM &&
+ !inet_ehash_secret)
+ build_ehash_secret();
+
/* Look for the requested type/protocol pair. */
answer = NULL;
lookup_protocol:
@@ -349,6 +354,8 @@ out:
return err;
}
+EXPORT_SYMBOL(inet6_bind);
+
int inet6_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -365,6 +372,8 @@ int inet6_release(struct socket *sock)
return inet_release(sock);
}
+EXPORT_SYMBOL(inet6_release);
+
int inet6_destroy_sock(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,6 +437,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
return(0);
}
+EXPORT_SYMBOL(inet6_getname);
+
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
@@ -437,6 +448,9 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *)arg);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
case SIOCADDRT:
case SIOCDELRT:
@@ -457,6 +471,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return(0);
}
+EXPORT_SYMBOL(inet6_ioctl);
+
const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -603,6 +619,8 @@ out_illegal:
goto out;
}
+EXPORT_SYMBOL(inet6_register_protosw);
+
void
inet6_unregister_protosw(struct inet_protosw *p)
{
@@ -619,6 +637,8 @@ inet6_unregister_protosw(struct inet_protosw *p)
}
}
+EXPORT_SYMBOL(inet6_unregister_protosw);
+
int inet6_sk_rebuild_header(struct sock *sk)
{
int err;
@@ -678,7 +698,8 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all) {
if ((opt->hop && (np->rxopt.bits.hopopts ||
np->rxopt.bits.ohopopts)) ||
- ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) &&
+ ((IPV6_FLOWINFO_MASK &
+ *(__be32 *)skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
np->rxopt.bits.osrcrt)) ||
@@ -691,61 +712,28 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
-int
-snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
- if (ptr == NULL)
- return -EINVAL;
-
- ptr[0] = __alloc_percpu(mibsize);
- if (!ptr[0])
- goto err0;
-
- ptr[1] = __alloc_percpu(mibsize);
- if (!ptr[1])
- goto err1;
-
- return 0;
-
-err1:
- free_percpu(ptr[0]);
- ptr[0] = NULL;
-err0:
- return -ENOMEM;
-}
-
-void
-snmp6_mib_free(void *ptr[2])
-{
- if (ptr == NULL)
- return;
- free_percpu(ptr[0]);
- free_percpu(ptr[1]);
- ptr[0] = ptr[1] = NULL;
-}
-
static int __init init_ipv6_mibs(void)
{
- if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
+ if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
goto err_ip_mib;
- if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
+ if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+ __alignof__(struct icmpv6_mib)) < 0)
goto err_icmp_mib;
- if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
goto err_udp_mib;
- if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
- __alignof__(struct udp_mib)) < 0)
+ if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
goto err_udplite_mib;
return 0;
err_udplite_mib:
- snmp6_mib_free((void **)udp_stats_in6);
+ snmp_mib_free((void **)udp_stats_in6);
err_udp_mib:
- snmp6_mib_free((void **)icmpv6_statistics);
+ snmp_mib_free((void **)icmpv6_statistics);
err_icmp_mib:
- snmp6_mib_free((void **)ipv6_statistics);
+ snmp_mib_free((void **)ipv6_statistics);
err_ip_mib:
return -ENOMEM;
@@ -753,10 +741,10 @@ err_ip_mib:
static void cleanup_ipv6_mibs(void)
{
- snmp6_mib_free((void **)ipv6_statistics);
- snmp6_mib_free((void **)icmpv6_statistics);
- snmp6_mib_free((void **)udp_stats_in6);
- snmp6_mib_free((void **)udplite_stats_in6);
+ snmp_mib_free((void **)ipv6_statistics);
+ snmp_mib_free((void **)icmpv6_statistics);
+ snmp_mib_free((void **)udp_stats_in6);
+ snmp_mib_free((void **)udplite_stats_in6);
}
static int __init inet6_init(void)
@@ -929,6 +917,8 @@ static void __exit inet6_exit(void)
{
/* First of all disallow new sockets creation. */
sock_unregister(PF_INET6);
+ /* Disallow any further netlink messages */
+ rtnl_unregister_all(PF_INET6);
/* Cleanup code parts. */
ipv6_packet_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dc68b7269c3..b696c840120 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -238,8 +238,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph = (struct ipv6hdr *)skb->data;
top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
- nexthdr = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_AH;
+ nexthdr = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_AH;
/* When there are no extension headers, we only need to save the first
* 8 bytes of the base IP header.
@@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
memcpy(tmp_base, top_iph, sizeof(tmp_base));
tmp_ext = NULL;
- extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+ extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr);
if (extlen) {
extlen += sizeof(*tmp_ext);
tmp_ext = kmalloc(extlen, GFP_ATOMIC);
@@ -268,7 +268,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
goto error_free_iph;
}
- ah = (struct ip_auth_hdr *)skb->h.raw;
+ ah = (struct ip_auth_hdr *)skb_transport_header(skb);
ah->nexthdr = nexthdr;
top_iph->priority = 0;
@@ -316,8 +316,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
*
* To erase AH:
* Keeping copy of cleared headers. After AH processing,
- * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
- * header length. Then copy back the copy as long as hdr_len
+ * Moving the pointer of skb->network_header by using skb_pull as long
+ * as AH header length. Then copy back the copy as long as hdr_len
* If destination header following AH exists, copy it into after [Ext2].
*
* |<>|[IPv6][Ext1][Ext2][Dest][Payload]
@@ -325,6 +325,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
*/
struct ipv6_auth_hdr *ah;
+ struct ipv6hdr *ip6h;
struct ah_data *ahp;
unsigned char *tmp_hdr = NULL;
u16 hdr_len;
@@ -341,7 +342,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto out;
- hdr_len = skb->data - skb->nh.raw;
+ hdr_len = skb->data - skb_network_header(skb);
ah = (struct ipv6_auth_hdr*)skb->data;
ahp = x->data;
nexthdr = ah->nexthdr;
@@ -354,16 +355,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC);
+ tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
if (!tmp_hdr)
goto out;
- if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
+ ip6h = ipv6_hdr(skb);
+ if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
goto free_out;
- skb->nh.ipv6h->priority = 0;
- skb->nh.ipv6h->flow_lbl[0] = 0;
- skb->nh.ipv6h->flow_lbl[1] = 0;
- skb->nh.ipv6h->flow_lbl[2] = 0;
- skb->nh.ipv6h->hop_limit = 0;
+ ip6h->priority = 0;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->hop_limit = 0;
{
u8 auth_data[MAX_AH_AUTH_LEN];
@@ -382,7 +384,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
}
}
- skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len);
+ skb->network_header += ah_hlen;
+ memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+ skb->transport_header = skb->network_header;
__skb_pull(skb, ah_hlen + hdr_len);
kfree(tmp_hdr);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 09117d63256..9b81264eb78 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -423,14 +423,18 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
*/
int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
{
+ int found = 0;
+
if (dev)
return ipv6_chk_acast_dev(dev, addr);
read_lock(&dev_base_lock);
- for (dev=dev_base; dev; dev=dev->next)
- if (ipv6_chk_acast_dev(dev, addr))
+ for_each_netdev(dev)
+ if (ipv6_chk_acast_dev(dev, addr)) {
+ found = 1;
break;
+ }
read_unlock(&dev_base_lock);
- return dev != 0;
+ return found;
}
@@ -447,9 +451,8 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
struct ifacaddr6 *im = NULL;
struct ac6_iter_state *state = ac6_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ for_each_netdev(state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (!idev)
@@ -476,7 +479,7 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
read_unlock_bh(&state->idev->lock);
in6_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
break;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b4e8dcf4c8..403eee66b9c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -209,7 +209,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+ struct icmp6hdr *icmph = icmp6_hdr(skb);
struct sock_exterr_skb *serr;
if (!np->recverr)
@@ -227,11 +227,12 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+ serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+ skb_network_header(skb);
serr->port = port;
- skb->h.raw = payload;
__skb_pull(skb, payload - skb->data);
+ skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
@@ -251,8 +252,9 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
if (!skb)
return;
- iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
- skb->nh.ipv6h = iph;
+ skb_put(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
serr = SKB_EXT_ERR(skb);
@@ -263,11 +265,11 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
serr->ee.ee_pad = 0;
serr->ee.ee_info = info;
serr->ee.ee_data = 0;
- serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+ serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
serr->port = fl->fl_ip_dport;
- skb->h.raw = skb->tail;
- __skb_pull(skb, skb->tail - skb->data);
+ __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+ skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
@@ -309,21 +311,24 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin = (struct sockaddr_in6 *)msg->msg_name;
if (sin) {
+ const unsigned char *nh = skb_network_header(skb);
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = serr->port;
sin->sin6_scope_id = 0;
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
ipv6_addr_copy(&sin->sin6_addr,
- (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+ (struct in6_addr *)(nh + serr->addr_offset));
if (np->sndflow)
- sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+ sin->sin6_flowinfo =
+ (*(__be32 *)(nh + serr->addr_offset - 24) &
+ IPV6_FLOWINFO_MASK);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin->sin6_scope_id = IP6CB(skb)->iif;
} else {
ipv6_addr_set(&sin->sin6_addr, 0, 0,
htonl(0xffff),
- *(__be32*)(skb->nh.raw + serr->addr_offset));
+ *(__be32 *)(nh + serr->addr_offset));
}
}
@@ -335,7 +340,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_flowinfo = 0;
sin->sin6_scope_id = 0;
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
- ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
if (np->rxopt.all)
datagram_recv_ctl(sk, msg, skb);
if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -344,8 +349,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
struct inet_sock *inet = inet_sk(sk);
ipv6_addr_set(&sin->sin6_addr, 0, 0,
- htonl(0xffff),
- skb->nh.iph->saddr);
+ htonl(0xffff), ip_hdr(skb)->saddr);
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
}
@@ -381,33 +385,34 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_skb_parm *opt = IP6CB(skb);
+ unsigned char *nh = skb_network_header(skb);
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
- int hlim = skb->nh.ipv6h->hop_limit;
+ int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxtclass) {
- int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff;
+ int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
- if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
- __be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+ if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
+ __be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
}
/* HbH is allowed only once */
if (np->rxopt.bits.hopopts && opt->hop) {
- u8 *ptr = skb->nh.raw + opt->hop;
+ u8 *ptr = nh + opt->hop;
put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
}
@@ -423,11 +428,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
* IPV6_RECVDSTOPTS is more generic. --yoshfuji
*/
unsigned int off = sizeof(struct ipv6hdr);
- u8 nexthdr = skb->nh.ipv6h->nexthdr;
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
while (off <= opt->lastopt) {
unsigned len;
- u8 *ptr = skb->nh.raw + off;
+ u8 *ptr = nh + off;
switch(nexthdr) {
case IPPROTO_DSTOPTS:
@@ -461,27 +466,27 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = opt->iif;
- ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
- int hlim = skb->nh.ipv6h->hop_limit;
+ int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.ohopopts && opt->hop) {
- u8 *ptr = skb->nh.raw + opt->hop;
+ u8 *ptr = nh + opt->hop;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.odstopts && opt->dst0) {
- u8 *ptr = skb->nh.raw + opt->dst0;
+ u8 *ptr = nh + opt->dst0;
put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.osrcrt && opt->srcrt) {
- struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+ struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
}
if (np->rxopt.bits.odstopts && opt->dst1) {
- u8 *ptr = skb->nh.raw + opt->dst1;
+ u8 *ptr = nh + opt->dst1;
put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
}
return 0;
@@ -718,7 +723,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
cmsg->cmsg_type);
err = -EINVAL;
break;
- };
+ }
}
exit_f:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 363e63ffecc..7107bb7e2e6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -42,21 +42,19 @@
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
- int hdr_len;
struct ipv6hdr *top_iph;
struct ipv6_esp_hdr *esph;
struct crypto_blkcipher *tfm;
struct blkcipher_desc desc;
- struct esp_data *esp;
struct sk_buff *trailer;
int blksize;
int clen;
int alen;
int nfrags;
-
- esp = x->data;
- hdr_len = skb->h.raw - skb->data +
- sizeof(*esph) + esp->conf.ivlen;
+ u8 *tail;
+ struct esp_data *esp = x->data;
+ int hdr_len = (skb_transport_offset(skb) +
+ sizeof(*esph) + esp->conf.ivlen);
/* Strip IP+ESP header. */
__skb_pull(skb, hdr_len);
@@ -81,19 +79,20 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
}
/* Fill padding... */
+ tail = skb_tail_pointer(trailer);
do {
int i;
for (i=0; i<clen-skb->len - 2; i++)
- *(u8*)(trailer->tail + i) = i+1;
+ tail[i] = i + 1;
} while (0);
- *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ tail[clen-skb->len - 2] = (clen - skb->len) - 2;
pskb_put(skb, trailer, clen - skb->len);
top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
- esph = (struct ipv6_esp_hdr *)skb->h.raw;
+ esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
- *(u8*)(trailer->tail - 1) = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_ESP;
+ *(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_ESP;
esph->spi = x->id.spi;
esph->seq_no = htonl(++x->replay.oseq);
@@ -150,8 +149,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
int alen = esp->auth.icv_trunc_len;
int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
-
- int hdr_len = skb->h.raw - skb->nh.raw;
+ int hdr_len = skb_network_header_len(skb);
int nfrags;
int ret = 0;
@@ -191,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
esph = (struct ipv6_esp_hdr*)skb->data;
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
/* Get ivec. This can be wrong, check against another impls. */
if (esp->conf.ivlen)
@@ -231,28 +229,30 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
ret = nexthdr[1];
}
- skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len;
-
+ __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+ skb_set_transport_header(skb, -hdr_len);
out:
return ret;
}
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+ u32 align = max_t(u32, blksize, esp->conf.padlen);
+ u32 rem;
+
+ mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+ rem = mtu & (align - 1);
+ mtu &= ~(align - 1);
- if (x->props.mode == XFRM_MODE_TUNNEL) {
- mtu = ALIGN(mtu + 2, blksize);
- } else {
- /* The worst case. */
+ if (x->props.mode != XFRM_MODE_TUNNEL) {
u32 padsize = ((blksize - 1) & 7) + 1;
- mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
+ mtu -= blksize - padsize;
+ mtu += min_t(u32, blksize - padsize, rem);
}
- if (esp->conf.padlen)
- mtu = ALIGN(mtu, esp->conf.padlen);
- return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+ return mtu - 2;
}
static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -382,7 +382,7 @@ static struct xfrm_type esp6_type =
.proto = IPPROTO_ESP,
.init_state = esp6_init_state,
.destructor = esp6_destroy,
- .get_max_size = esp6_get_max_size,
+ .get_mtu = esp6_get_mtu,
.input = esp6_input,
.output = esp6_output,
.hdr_offset = xfrm6_find_1stfragopt,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fb39604c3d0..6d8e4ac7bda 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,13 +50,14 @@
int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
{
- int packet_len = skb->tail - skb->nh.raw;
+ const unsigned char *nh = skb_network_header(skb);
+ int packet_len = skb->tail - skb->network_header;
struct ipv6_opt_hdr *hdr;
int len;
if (offset + 2 > packet_len)
goto bad;
- hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ hdr = (struct ipv6_opt_hdr *)(nh + offset);
len = ((hdr->hdrlen + 1) << 3);
if (offset + len > packet_len)
@@ -66,7 +67,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
len -= 2;
while (len > 0) {
- int opttype = skb->nh.raw[offset];
+ int opttype = nh[offset];
int optlen;
if (opttype == type)
@@ -77,7 +78,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
optlen = 1;
break;
default:
- optlen = skb->nh.raw[offset + 1] + 2;
+ optlen = nh[offset + 1] + 2;
if (optlen > len)
goto bad;
break;
@@ -113,7 +114,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
{
struct sk_buff *skb = *skbp;
- switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+ switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return 1;
@@ -124,12 +125,12 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
/* Actually, it is redundant check. icmp_send
will recheck in any case.
*/
- if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
break;
case 2: /* send ICMP PARM PROB regardless and drop packet */
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
return 0;
- };
+ }
kfree_skb(skb);
return 0;
@@ -141,19 +142,20 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
struct tlvtype_proc *curr;
- int off = skb->h.raw - skb->nh.raw;
- int len = ((skb->h.raw[1]+1)<<3);
+ const unsigned char *nh = skb_network_header(skb);
+ int off = skb_network_header_len(skb);
+ int len = (skb_transport_header(skb)[1] + 1) << 3;
- if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+ if (skb_transport_offset(skb) + len > skb_headlen(skb))
goto bad;
off += 2;
len -= 2;
while (len > 0) {
- int optlen = skb->nh.raw[off+1]+2;
+ int optlen = nh[off + 1] + 2;
- switch (skb->nh.raw[off]) {
+ switch (nh[off]) {
case IPV6_TLV_PAD0:
optlen = 1;
break;
@@ -165,7 +167,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
if (optlen > len)
goto bad;
for (curr=procs; curr->type >= 0; curr++) {
- if (curr->type == skb->nh.raw[off]) {
+ if (curr->type == nh[off]) {
/* type specific length/alignment
checks will be performed in the
func(). */
@@ -200,7 +202,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
struct sk_buff *skb = *skbp;
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
- struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct in6_addr tmp_addr;
int ret;
@@ -211,7 +213,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
opt->dsthao = opt->dst1;
opt->dst1 = 0;
- hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
if (hao->length != 16) {
LIMIT_NETDEBUG(
@@ -244,8 +246,9 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
/* update all variable using below by copied skbuff */
*skbp = skb = skb2;
- hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
- ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+ optoff);
+ ipv6h = ipv6_hdr(skb2);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -255,7 +258,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
ipv6_addr_copy(&hao->addr, &tmp_addr);
- if (skb->tstamp.off_sec == 0)
+ if (skb->tstamp.tv64 == 0)
__net_timestamp(skb);
return 1;
@@ -285,16 +288,16 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
#endif
struct dst_entry *dst;
- if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
- !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
- opt->lastopt = skb->h.raw - skb->nh.raw;
- opt->dst1 = skb->h.raw - skb->nh.raw;
+ opt->lastopt = opt->dst1 = skb_network_header_len(skb);
#ifdef CONFIG_IPV6_MIP6
dstbuf = opt->dst1;
#endif
@@ -303,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
dst_release(dst);
skb = *skbp;
- skb->h.raw += ((skb->h.raw[1]+1)<<3);
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
#ifdef CONFIG_IPV6_MIP6
opt->nhoff = dstbuf;
@@ -384,18 +387,20 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
in6_dev_put(idev);
- if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
- !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
- hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
switch (hdr->type) {
#ifdef CONFIG_IPV6_MIP6
+ case IPV6_SRCRT_TYPE_2:
break;
#endif
case IPV6_SRCRT_TYPE_0:
@@ -406,11 +411,12 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
default:
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ (&hdr->type) - skb_network_header(skb));
return -1;
}
- if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INADDRERRORS);
@@ -438,12 +444,11 @@ looped_back:
break;
}
- opt->lastopt = skb->h.raw - skb->nh.raw;
- opt->srcrt = skb->h.raw - skb->nh.raw;
- skb->h.raw += (hdr->hdrlen + 1) << 3;
+ opt->lastopt = opt->srcrt = skb_network_header_len(skb);
+ skb->transport_header += (hdr->hdrlen + 1) << 3;
opt->dst0 = opt->dst1;
opt->dst1 = 0;
- opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
+ opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
return 1;
}
@@ -452,7 +457,9 @@ looped_back:
if (hdr->hdrlen & 0x01) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((&hdr->hdrlen) -
+ skb_network_header(skb)));
return -1;
}
break;
@@ -479,7 +486,9 @@ looped_back:
if (hdr->segments_left > n) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((&hdr->segments_left) -
+ skb_network_header(skb)));
return -1;
}
@@ -498,7 +507,7 @@ looped_back:
kfree_skb(skb);
*skbp = skb = skb2;
opt = IP6CB(skb2);
- hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -514,7 +523,7 @@ looped_back:
#ifdef CONFIG_IPV6_MIP6
case IPV6_SRCRT_TYPE_2:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
- (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+ (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
IPPROTO_ROUTING) < 0) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INADDRERRORS);
@@ -541,19 +550,19 @@ looped_back:
}
ipv6_addr_copy(&daddr, addr);
- ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
- ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+ ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
+ ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
dst_release(xchg(&skb->dst, NULL));
ip6_route_input(skb);
if (skb->dst->error) {
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
dst_input(skb);
return -1;
}
if (skb->dst->dev->flags&IFF_LOOPBACK) {
- if (skb->nh.ipv6h->hop_limit <= 1) {
+ if (ipv6_hdr(skb)->hop_limit <= 1) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
@@ -561,11 +570,11 @@ looped_back:
kfree_skb(skb);
return -1;
}
- skb->nh.ipv6h->hop_limit--;
+ ipv6_hdr(skb)->hop_limit--;
goto looped_back;
}
- skb_push(skb, skb->data - skb->nh.raw);
+ skb_push(skb, skb->data - skb_network_header(skb));
dst_input(skb);
return -1;
}
@@ -656,13 +665,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
{
struct sk_buff *skb = *skbp;
+ const unsigned char *nh = skb_network_header(skb);
- if (skb->nh.raw[optoff+1] == 2) {
+ if (nh[optoff + 1] == 2) {
IP6CB(skb)->ra = optoff;
return 1;
}
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
- skb->nh.raw[optoff+1]);
+ nh[optoff + 1]);
kfree_skb(skb);
return 0;
}
@@ -672,23 +682,24 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
{
struct sk_buff *skb = *skbp;
+ const unsigned char *nh = skb_network_header(skb);
u32 pkt_len;
- if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+ if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
- skb->nh.raw[optoff+1]);
+ nh[optoff+1]);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
goto drop;
}
- pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2));
+ pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return 0;
}
- if (skb->nh.ipv6h->payload_len) {
+ if (ipv6_hdr(skb)->payload_len) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return 0;
@@ -727,13 +738,14 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
struct inet6_skb_parm *opt = IP6CB(skb);
/*
- * skb->nh.raw is equal to skb->data, and
- * skb->h.raw - skb->nh.raw is always equal to
+ * skb_network_header(skb) is equal to skb->data, and
+ * skb_network_header_len(skb) is always equal to
* sizeof(struct ipv6hdr) by definition of
* hop-by-hop options.
*/
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
- !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
+ !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+ ((skb_transport_header(skb)[1] + 1) << 3)))) {
kfree_skb(skb);
return -1;
}
@@ -741,7 +753,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
opt->hop = sizeof(struct ipv6hdr);
if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
skb = *skbp;
- skb->h.raw += (skb->h.raw[1]+1)<<3;
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
return 1;
@@ -810,6 +822,8 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
}
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
+
void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
{
if (opt->dst1opt)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ea3035b4e3e..fc3882c9060 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -17,6 +17,7 @@
#include <net/fib_rules.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/netlink.h>
@@ -95,8 +96,27 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
if (table)
rt = lookup(table, flp, flags);
- if (rt != &ip6_null_entry)
+ if (rt != &ip6_null_entry) {
+ struct fib6_rule *r = (struct fib6_rule *)rule;
+
+ /*
+ * If we need to find a source address for this traffic,
+ * we check the result if it meets requirement of the rule.
+ */
+ if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+ r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+ struct in6_addr saddr;
+ if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst,
+ &saddr))
+ goto again;
+ if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+ r->src.plen))
+ goto again;
+ ipv6_addr_copy(&flp->fl6_src, &saddr);
+ }
goto out;
+ }
+again:
dst_release(&rt->u.dst);
rt = NULL;
goto out;
@@ -117,9 +137,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
return 0;
+ /*
+ * If FIB_RULE_FIND_SADDR is set and we do not have a
+ * source address for the traffic, we defer check for
+ * source address.
+ */
if (r->src.plen) {
- if (!(flags & RT6_LOOKUP_F_HAS_SADDR) ||
- !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+ if (flags & RT6_LOOKUP_F_HAS_SADDR) {
+ if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr,
+ r->src.plen))
+ return 0;
+ } else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
return 0;
}
@@ -216,11 +244,6 @@ nla_put_failure:
return -ENOBUFS;
}
-int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return fib_rules_dump(skb, cb, AF_INET6);
-}
-
static u32 fib6_rule_default_pref(void)
{
return 0x3FFF;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index edfe98bf64c..e9bcce9e7bd 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
#include <asm/system.h>
DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
+EXPORT_SYMBOL(icmpv6_statistics);
/*
* The ICMP socket(s). This is the most convenient way to flow control
@@ -128,9 +129,9 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
static int is_ineligible(struct sk_buff *skb)
{
- int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+ int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
int len = skb->len - ptr;
- __u8 nexthdr = skb->nh.ipv6h->nexthdr;
+ __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
if (len < 0)
return 1;
@@ -205,7 +206,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
{
u8 _optval, *op;
- offset += skb->nh.raw - skb->data;
+ offset += skb_network_offset(skb);
op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
if (op == NULL)
return 1;
@@ -221,7 +222,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
- icmp6h = (struct icmp6hdr*) skb->h.raw;
+ icmp6h = icmp6_hdr(skb);
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
icmp6h->icmp6_cksum = 0;
@@ -274,7 +275,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
#ifdef CONFIG_IPV6_MIP6
static void mip6_addr_swap(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6_destopt_hao *hao;
struct in6_addr tmp;
@@ -283,7 +284,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
if (opt->dsthao) {
off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
if (likely(off >= 0)) {
- hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+ hao = (struct ipv6_destopt_hao *)
+ (skb_network_header(skb) + off);
ipv6_addr_copy(&tmp, &iph->saddr);
ipv6_addr_copy(&iph->saddr, &hao->addr);
ipv6_addr_copy(&hao->addr, &tmp);
@@ -301,7 +303,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
struct net_device *dev)
{
struct inet6_dev *idev = NULL;
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
struct sock *sk;
struct ipv6_pinfo *np;
struct in6_addr *saddr = NULL;
@@ -315,7 +317,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
int hlimit, tclass;
int err = 0;
- if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+ if ((u8 *)hdr < skb->head ||
+ (skb->network_header + sizeof(*hdr)) > skb->tail)
return;
/*
@@ -430,7 +433,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
tclass = 0;
msg.skb = skb;
- msg.offset = skb->nh.raw - skb->data;
+ msg.offset = skb_network_offset(skb);
msg.type = type;
len = skb->len - msg.offset;
@@ -466,13 +469,15 @@ out:
icmpv6_xmit_unlock();
}
+EXPORT_SYMBOL(icmpv6_send);
+
static void icmpv6_echo_reply(struct sk_buff *skb)
{
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
struct in6_addr *saddr = NULL;
- struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+ struct icmp6hdr *icmph = icmp6_hdr(skb);
struct icmp6hdr tmp_hdr;
struct flowi fl;
struct icmpv6_msg msg;
@@ -481,7 +486,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
int hlimit;
int tclass;
- saddr = &skb->nh.ipv6h->daddr;
+ saddr = &ipv6_hdr(skb)->daddr;
if (!ipv6_unicast_destination(skb))
saddr = NULL;
@@ -491,7 +496,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
memset(&fl, 0, sizeof(fl));
fl.proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
if (saddr)
ipv6_addr_copy(&fl.fl6_src, saddr);
fl.oif = skb->dev->ifindex;
@@ -579,8 +584,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
if (!pskb_may_pull(skb, inner_offset+8))
return;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
Without this we will not able f.e. to make source routed
@@ -624,8 +629,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
/* Perform checksum. */
switch (skb->ip_summed) {
@@ -647,7 +652,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
goto discard_it;
- hdr = (struct icmp6hdr *) skb->h.raw;
+ hdr = icmp6_hdr(skb);
type = hdr->icmp6_type;
@@ -673,7 +678,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
*/
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto discard_it;
- hdr = (struct icmp6hdr *) skb->h.raw;
+ hdr = icmp6_hdr(skb);
orig_hdr = (struct ipv6hdr *) (hdr + 1);
rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
ntohl(hdr->icmp6_mtu));
@@ -727,7 +732,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
*/
icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
- };
+ }
+
kfree_skb(skb);
return 0;
@@ -860,11 +866,13 @@ int icmpv6_err_convert(int type, int code, int *err)
case ICMPV6_TIME_EXCEED:
*err = EHOSTUNREACH;
break;
- };
+ }
return fatal;
}
+EXPORT_SYMBOL(icmpv6_err_convert);
+
#ifdef CONFIG_SYSCTL
ctl_table ipv6_icmp_table[] = {
{
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 268f476ef3d..ca08ee88d07 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -359,7 +359,7 @@ end:
return res;
}
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
unsigned int h, s_h;
unsigned int e = 0, s_e;
@@ -1486,6 +1486,8 @@ void __init fib6_init(void)
NULL, NULL);
fib6_tables_init();
+
+ __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
}
void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 61e7a6c8141..be0ee8a34f9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -96,12 +96,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
if (hdr->version != 6)
goto err;
- skb->h.raw = (u8 *)(hdr + 1);
+ skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
pkt_len = ntohs(hdr->payload_len);
@@ -116,7 +116,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
}
if (hdr->nexthdr == NEXTHDR_HOP) {
@@ -160,10 +160,10 @@ static inline int ip6_input_finish(struct sk_buff *skb)
rcu_read_lock();
resubmit:
idev = ip6_dst_idev(skb->dst);
- if (!pskb_pull(skb, skb->h.raw - skb->data))
+ if (!pskb_pull(skb, skb_transport_offset(skb)))
goto discard;
nhoff = IP6CB(skb)->nhoff;
- nexthdr = skb->nh.raw[nhoff];
+ nexthdr = skb_network_header(skb)[nhoff];
raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
@@ -181,9 +181,9 @@ resubmit:
indefinitely. */
nf_reset(skb);
- skb_postpull_rcsum(skb, skb->nh.raw,
- skb->h.raw - skb->nh.raw);
- hdr = skb->nh.ipv6h;
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ hdr = ipv6_hdr(skb);
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
&hdr->saddr) &&
@@ -234,7 +234,7 @@ int ip6_mc_input(struct sk_buff *skb)
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 305516921aa..f508171bab7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,8 +88,8 @@ static inline int ip6_output_finish(struct sk_buff *skb)
/* dev_loopback_xmit for use with netfilter. */
static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
{
- newskb->mac.raw = newskb->data;
- __skb_pull(newskb, newskb->nh.raw - newskb->data);
+ skb_reset_mac_header(newskb);
+ __skb_pull(newskb, skb_network_offset(newskb));
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
BUG_TRAP(newskb->dst);
@@ -107,13 +107,13 @@ static int ip6_output2(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
struct inet6_dev *idev = ip6_dst_idev(skb->dst);
if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
- ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr)) {
+ ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr)) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
@@ -124,7 +124,7 @@ static int ip6_output2(struct sk_buff *skb)
newskb->dev,
ip6_dev_loopback_xmit);
- if (skb->nh.ipv6h->hop_limit == 0) {
+ if (ipv6_hdr(skb)->hop_limit == 0) {
IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
}
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+ return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+ skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
int ip6_output(struct sk_buff *skb)
{
- if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
+ if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2);
else
@@ -191,7 +199,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
}
- hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
/*
* Fill in the IPv6 header
@@ -239,6 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
return -EMSGSIZE;
}
+EXPORT_SYMBOL(ip6_xmit);
+
/*
* To avoid extra problems ND packets are send through this
* routine. It's code duplication but I really want to avoid
@@ -259,8 +271,9 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
totlen = len + sizeof(struct ipv6hdr);
- hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
- skb->nh.ipv6h = hdr;
+ skb_reset_network_header(skb);
+ skb_put(skb, sizeof(struct ipv6hdr));
+ hdr = ipv6_hdr(skb);
*(__be32*)hdr = htonl(0x60000000);
@@ -305,7 +318,7 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
static int ip6_forward_proxy_check(struct sk_buff *skb)
{
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
u8 nexthdr = hdr->nexthdr;
int offset;
@@ -319,10 +332,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
if (nexthdr == IPPROTO_ICMPV6) {
struct icmp6hdr *icmp6;
- if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+ if (!pskb_may_pull(skb, (skb_network_header(skb) +
+ offset + 1 - skb->data)))
return 0;
- icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+ icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
switch (icmp6->icmp6_type) {
case NDISC_ROUTER_SOLICITATION:
@@ -361,7 +375,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
if (ipv6_devconf.forwarding == 0)
@@ -372,7 +386,7 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- skb->ip_summed = CHECKSUM_NONE;
+ skb_forward_csum(skb);
/*
* We DO NOT make any processing on
@@ -388,7 +402,7 @@ int ip6_forward(struct sk_buff *skb)
* that different fragments will go along one path. --ANK
*/
if (opt->ra) {
- u8 *ptr = skb->nh.raw + opt->ra;
+ u8 *ptr = skb_network_header(skb) + opt->ra;
if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
return 0;
}
@@ -470,7 +484,7 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
- hdr = skb->nh.ipv6h;
+ hdr = ipv6_hdr(skb);
/* Mangling hops number delayed to point after skb COW */
@@ -499,33 +513,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
-#ifdef CONFIG_NETFILTER
- /* Connection association is same as pre-frag packet */
- nf_conntrack_put(to->nfct);
- to->nfct = from->nfct;
- nf_conntrack_get(to->nfct);
- to->nfctinfo = from->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
- nf_conntrack_put_reasm(to->nfct_reasm);
- to->nfct_reasm = from->nfct_reasm;
- nf_conntrack_get_reasm(to->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
- nf_bridge_put(to->nf_bridge);
- to->nf_bridge = from->nf_bridge;
- nf_bridge_get(to->nf_bridge);
-#endif
-#endif
+ nf_copy(to, from);
skb_copy_secmark(to, from);
}
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
- unsigned int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int packet_len = skb->tail - skb->network_header;
int found_rhdr = 0;
- *nexthdr = &skb->nh.ipv6h->nexthdr;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
@@ -550,7 +549,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
}
return offset;
@@ -574,7 +574,20 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
- mtu = dst_mtu(&rt->u.dst);
+ mtu = ip6_skb_dst_mtu(skb);
+
+ /* We must not fragment if the socket is set to force MTU discovery
+ * or if the skb it not generated by a local socket. (This last
+ * check should be redundant, but it's free.)
+ */
+ if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
+ skb->dev = skb->dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
if (np && np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
@@ -616,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC);
+ tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!tmp_hdr) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
return -ENOMEM;
@@ -624,8 +637,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
__skb_pull(skb, hlen);
fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
- skb->nh.raw = __skb_push(skb, hlen);
- memcpy(skb->nh.raw, tmp_hdr, hlen);
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), tmp_hdr, hlen);
ipv6_select_ident(skb, fh);
fh->nexthdr = nexthdr;
@@ -636,7 +650,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
- skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+ ipv6_hdr(skb)->payload_len = htons(first_len -
+ sizeof(struct ipv6hdr));
dst_hold(&rt->u.dst);
@@ -645,10 +660,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
* before previous one went down. */
if (frag) {
frag->ip_summed = CHECKSUM_NONE;
- frag->h.raw = frag->data;
+ skb_reset_transport_header(frag);
fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
- frag->nh.raw = __skb_push(frag, hlen);
- memcpy(frag->nh.raw, tmp_hdr, hlen);
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), tmp_hdr,
+ hlen);
offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = nexthdr;
fh->reserved = 0;
@@ -656,7 +673,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
if (frag->next != NULL)
fh->frag_off |= htons(IP6_MF);
fh->identification = frag_id;
- frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ipv6_hdr(frag)->payload_len =
+ htons(frag->len -
+ sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
@@ -733,9 +752,10 @@ slow_path:
ip6_copy_metadata(frag, skb);
skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- frag->nh.raw = frag->data;
- fh = (struct frag_hdr*)(frag->data + hlen);
- frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
+ frag->transport_header = (frag->network_header + hlen +
+ sizeof(struct frag_hdr));
/*
* Charge the memory for the fragment to any owner
@@ -747,7 +767,7 @@ slow_path:
/*
* Copy the packet header into the new buffer.
*/
- memcpy(frag->nh.raw, skb->data, hlen);
+ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
/*
* Build fragment header.
@@ -763,14 +783,15 @@ slow_path:
/*
* Copy a block of the IP datagram.
*/
- if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+ if (skb_copy_bits(skb, ptr, skb_transport_header(skb), len))
BUG();
left -= len;
fh->frag_off = htons(offset);
if (left > 0)
fh->frag_off |= htons(IP6_MF);
- frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ipv6_hdr(frag)->payload_len = htons(frag->len -
+ sizeof(struct ipv6hdr));
ptr += len;
offset += len;
@@ -861,6 +882,41 @@ static int ip6_dst_lookup_tail(struct sock *sk,
goto out_err_release;
}
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ /*
+ * Here if the dst entry we've looked up
+ * has a neighbour entry that is in the INCOMPLETE
+ * state and the src address from the flow is
+ * marked as OPTIMISTIC, we release the found
+ * dst entry and replace it instead with the
+ * dst entry of the nexthop router
+ */
+ if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
+ struct inet6_ifaddr *ifp;
+ struct flowi fl_gw;
+ int redirect;
+
+ ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
+
+ redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+ if (ifp)
+ in6_ifa_put(ifp);
+
+ if (redirect) {
+ /*
+ * We need to get the dst entry for the
+ * default router instead
+ */
+ dst_release(*dst);
+ memcpy(&fl_gw, fl, sizeof(struct flowi));
+ memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
+ *dst = ip6_route_output(sk, &fl_gw);
+ if ((err = (*dst)->error))
+ goto out_err_release;
+ }
+ }
+#endif
+
return 0;
out_err_release:
@@ -939,10 +995,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb_put(skb,fragheaderlen + transhdrlen);
/* initialize network header pointer */
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* initialize protocol header pointer */
- skb->h.raw = skb->data + fragheaderlen;
+ skb->transport_header = skb->network_header + fragheaderlen;
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
@@ -1015,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
inet->cork.fl = *fl;
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
- mtu = dst_mtu(rt->u.dst.path);
+ mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+ rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
@@ -1162,10 +1219,10 @@ alloc_new_skb:
* Find where to start putting bytes
*/
data = skb_put(skb, fraglen);
- skb->nh.raw = data + exthdrlen;
+ skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
- skb->h.raw = data + exthdrlen;
-
+ skb->transport_header = (skb->network_header +
+ fragheaderlen);
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
skb_prev, maxfraglen,
@@ -1288,10 +1345,10 @@ int ip6_push_pending_frames(struct sock *sk)
tail_skb = &(skb_shinfo(skb)->frag_list);
/* move skb->data to ip header from ext header */
- if (skb->data < skb->nh.raw)
- __skb_pull(skb, skb->nh.raw - skb->data);
+ if (skb->data < skb_network_header(skb))
+ __skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
- __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ __skb_pull(tmp_skb, skb_network_header_len(skb));
*tail_skb = tmp_skb;
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
@@ -1303,13 +1360,15 @@ int ip6_push_pending_frames(struct sock *sk)
}
ipv6_addr_copy(final_dst, &fl->fl6_dst);
- __skb_pull(skb, skb->h.raw - skb->nh.raw);
+ __skb_pull(skb, skb_network_header_len(skb));
if (opt && opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
if (opt && opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
- skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ hdr = ipv6_hdr(skb);
*(__be32*)hdr = fl->fl6_flowlabel |
htonl(0x60000000 | ((int)np->cork.tclass << 20));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 08d944223ec..a0902fbdb4e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1,14 +1,15 @@
/*
- * IPv6 over IPv6 tunnel device
+ * IPv6 tunneling device
* Linux INET6 implementation
*
* Authors:
* Ville Nuorvala <vnuorval@tcs.hut.fi>
+ * Yasuyuki Kozakai <kozakai@linux-ipv6.org>
*
* $Id$
*
* Based on:
- * linux/net/ipv6/sit.c
+ * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
*
* RFC 2473
*
@@ -24,6 +25,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
+#include <linux/icmp.h>
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
@@ -41,6 +43,7 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
+#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -51,7 +54,7 @@
#include <net/inet_ecn.h>
MODULE_AUTHOR("Ville Nuorvala");
-MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
#define IPV6_TLV_TEL_DST_SIZE 8
@@ -63,6 +66,7 @@ MODULE_LICENSE("GPL");
#endif
#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
#define HASH_SIZE 32
@@ -70,12 +74,12 @@ MODULE_LICENSE("GPL");
(addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
(HASH_SIZE - 1))
-static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
-static int ip6ip6_tnl_dev_init(struct net_device *dev);
-static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+static int ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6_tnl_dev_init(struct net_device *dev);
+static void ip6_tnl_dev_setup(struct net_device *dev);
/* the IPv6 tunnel fallback device */
-static struct net_device *ip6ip6_fb_tnl_dev;
+static struct net_device *ip6_fb_tnl_dev;
/* lists for storing tunnels in use */
@@ -84,7 +88,7 @@ static struct ip6_tnl *tnls_wc[1];
static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
/* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6ip6_lock);
+static DEFINE_RWLOCK(ip6_tnl_lock);
static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
@@ -115,7 +119,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
}
/**
- * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
*
@@ -126,7 +130,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
**/
static struct ip6_tnl *
-ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
{
unsigned h0 = HASH(remote);
unsigned h1 = HASH(local);
@@ -145,18 +149,18 @@ ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
}
/**
- * ip6ip6_bucket - get head of list matching given tunnel parameters
+ * ip6_tnl_bucket - get head of list matching given tunnel parameters
* @p: parameters containing tunnel end-points
*
* Description:
- * ip6ip6_bucket() returns the head of the list matching the
+ * ip6_tnl_bucket() returns the head of the list matching the
* &struct in6_addr entries laddr and raddr in @p.
*
* Return: head of IPv6 tunnel list
**/
static struct ip6_tnl **
-ip6ip6_bucket(struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_parm *p)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
@@ -171,36 +175,36 @@ ip6ip6_bucket(struct ip6_tnl_parm *p)
}
/**
- * ip6ip6_tnl_link - add tunnel to hash table
+ * ip6_tnl_link - add tunnel to hash table
* @t: tunnel to be added
**/
static void
-ip6ip6_tnl_link(struct ip6_tnl *t)
+ip6_tnl_link(struct ip6_tnl *t)
{
- struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+ struct ip6_tnl **tp = ip6_tnl_bucket(&t->parms);
t->next = *tp;
- write_lock_bh(&ip6ip6_lock);
+ write_lock_bh(&ip6_tnl_lock);
*tp = t;
- write_unlock_bh(&ip6ip6_lock);
+ write_unlock_bh(&ip6_tnl_lock);
}
/**
- * ip6ip6_tnl_unlink - remove tunnel from hash table
+ * ip6_tnl_unlink - remove tunnel from hash table
* @t: tunnel to be removed
**/
static void
-ip6ip6_tnl_unlink(struct ip6_tnl *t)
+ip6_tnl_unlink(struct ip6_tnl *t)
{
struct ip6_tnl **tp;
- for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+ for (tp = ip6_tnl_bucket(&t->parms); *tp; tp = &(*tp)->next) {
if (t == *tp) {
- write_lock_bh(&ip6ip6_lock);
+ write_lock_bh(&ip6_tnl_lock);
*tp = t->next;
- write_unlock_bh(&ip6ip6_lock);
+ write_unlock_bh(&ip6_tnl_lock);
break;
}
}
@@ -237,12 +241,12 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
if (i == IP6_TNL_MAX)
goto failed;
}
- dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+ dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
if (dev == NULL)
goto failed;
t = netdev_priv(dev);
- dev->init = ip6ip6_tnl_dev_init;
+ dev->init = ip6_tnl_dev_init;
t->parms = *p;
if ((err = register_netdevice(dev)) < 0) {
@@ -250,19 +254,19 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
goto failed;
}
dev_hold(dev);
- ip6ip6_tnl_link(t);
+ ip6_tnl_link(t);
return t;
failed:
return NULL;
}
/**
- * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ * ip6_tnl_locate - find or create tunnel matching given parameters
* @p: tunnel parameters
* @create: != 0 if allowed to create new tunnel if no match found
*
* Description:
- * ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ * ip6_tnl_locate() first tries to locate an existing tunnel
* based on @parms. If this is unsuccessful, but @create is set a new
* tunnel device is created and registered for use.
*
@@ -270,13 +274,13 @@ failed:
* matching tunnel or NULL
**/
-static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
+static struct ip6_tnl *ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
{
struct in6_addr *remote = &p->raddr;
struct in6_addr *local = &p->laddr;
struct ip6_tnl *t;
- for (t = *ip6ip6_bucket(p); t; t = t->next) {
+ for (t = *ip6_tnl_bucket(p); t; t = t->next) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr))
return t;
@@ -287,24 +291,24 @@ static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
}
/**
- * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ * ip6_tnl_dev_uninit - tunnel device uninitializer
* @dev: the device to be destroyed
*
* Description:
- * ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ * ip6_tnl_dev_uninit() removes tunnel from its list
**/
static void
-ip6ip6_tnl_dev_uninit(struct net_device *dev)
+ip6_tnl_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- if (dev == ip6ip6_fb_tnl_dev) {
- write_lock_bh(&ip6ip6_lock);
+ if (dev == ip6_fb_tnl_dev) {
+ write_lock_bh(&ip6_tnl_lock);
tnls_wc[0] = NULL;
- write_unlock_bh(&ip6ip6_lock);
+ write_unlock_bh(&ip6_tnl_lock);
} else {
- ip6ip6_tnl_unlink(t);
+ ip6_tnl_unlink(t);
}
ip6_tnl_dst_reset(t);
dev_put(dev);
@@ -372,16 +376,16 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
}
/**
- * ip6ip6_err - tunnel error handler
+ * ip6_tnl_err - tunnel error handler
*
* Description:
- * ip6ip6_err() should handle errors in the tunnel according
+ * ip6_tnl_err() should handle errors in the tunnel according
* to the specifications in RFC 2473.
**/
static int
-ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __be32 info)
+ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
+ int *type, int *code, int *msg, __be32 *info, int offset)
{
struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
struct ip6_tnl *t;
@@ -396,13 +400,16 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
in trouble since we might need the source address for further
processing of the error. */
- read_lock(&ip6ip6_lock);
- if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+ read_lock(&ip6_tnl_lock);
+ if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+ goto out;
+
+ if (t->parms.proto != ipproto && t->parms.proto != 0)
goto out;
err = 0;
- switch (type) {
+ switch (*type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
@@ -414,7 +421,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
rel_msg = 1;
break;
case ICMPV6_TIME_EXCEED:
- if (code == ICMPV6_EXC_HOPLIMIT) {
+ if ((*code) == ICMPV6_EXC_HOPLIMIT) {
if (net_ratelimit())
printk(KERN_WARNING
"%s: Too small hop limit or "
@@ -425,10 +432,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
case ICMPV6_PARAMPROB:
teli = 0;
- if (code == ICMPV6_HDR_FIELD)
+ if ((*code) == ICMPV6_HDR_FIELD)
teli = parse_tlv_tnl_enc_lim(skb, skb->data);
- if (teli && teli == ntohl(info) - 2) {
+ if (teli && teli == ntohl(*info) - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
if (net_ratelimit())
@@ -445,7 +452,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
break;
case ICMPV6_PKT_TOOBIG:
- mtu = ntohl(info) - offset;
+ mtu = ntohl(*info) - offset;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
@@ -458,20 +465,144 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
break;
}
- if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+
+ *type = rel_type;
+ *code = rel_code;
+ *info = rel_info;
+ *msg = rel_msg;
+
+out:
+ read_unlock(&ip6_tnl_lock);
+ return err;
+}
+
+static int
+ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ int rel_msg = 0;
+ int rel_type = type;
+ int rel_code = code;
+ __u32 rel_info = info;
+ int err;
+ struct sk_buff *skb2;
+ struct iphdr *eiph;
+ struct flowi fl;
+ struct rtable *rt;
+
+ err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ if (err < 0)
+ return err;
+
+ if (rel_msg == 0)
+ return 0;
+
+ switch (rel_type) {
+ case ICMPV6_DEST_UNREACH:
+ if (rel_code != ICMPV6_ADDR_UNREACH)
+ return 0;
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_HOST_UNREACH;
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ if (rel_code != 0)
+ return 0;
+ rel_type = ICMP_DEST_UNREACH;
+ rel_code = ICMP_FRAG_NEEDED;
+ break;
+ default:
+ return 0;
+ }
+
+ if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
+ return 0;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ return 0;
+
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, offset);
+ skb_reset_network_header(skb2);
+ eiph = ip_hdr(skb2);
+
+ /* Try to guess incoming interface */
+ memset(&fl, 0, sizeof(fl));
+ fl.fl4_dst = eiph->saddr;
+ fl.fl4_tos = RT_TOS(eiph->tos);
+ fl.proto = IPPROTO_IPIP;
+ if (ip_route_output_key(&rt, &fl))
+ goto out;
+
+ skb2->dev = rt->u.dst.dev;
+
+ /* route "incoming" packet */
+ if (rt->rt_flags & RTCF_LOCAL) {
+ ip_rt_put(rt);
+ rt = NULL;
+ fl.fl4_dst = eiph->daddr;
+ fl.fl4_src = eiph->saddr;
+ fl.fl4_tos = eiph->tos;
+ if (ip_route_output_key(&rt, &fl) ||
+ rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+ ip_rt_put(rt);
+ goto out;
+ }
+ } else {
+ ip_rt_put(rt);
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+ skb2->dev) ||
+ skb2->dst->dev->type != ARPHRD_TUNNEL)
+ goto out;
+ }
+
+ /* change mtu on this route */
+ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
+ if (rel_info > dst_mtu(skb2->dst))
+ goto out;
+
+ skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+ rel_info = htonl(rel_info);
+ }
+
+ icmp_send(skb2, rel_type, rel_code, rel_info);
+
+out:
+ kfree_skb(skb2);
+ return 0;
+}
+
+static int
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ int rel_msg = 0;
+ int rel_type = type;
+ int rel_code = code;
+ __u32 rel_info = info;
+ int err;
+
+ err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ if (err < 0)
+ return err;
+
+ if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
struct rt6_info *rt;
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
- goto out;
+ return 0;
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, offset);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
- rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+ rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0);
if (rt && rt->rt6i_dev)
skb2->dev = rt->rt6i_dev;
@@ -483,19 +614,34 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
kfree_skb(skb2);
}
-out:
- read_unlock(&ip6ip6_lock);
- return err;
+
+ return 0;
}
-static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
- struct sk_buff *skb)
+static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+ struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
{
- struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+ __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
- if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
- IP6_ECN_set_ce(inner_iph);
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+ if (INET_ECN_is_ce(dsfield))
+ IP_ECN_set_ce(ip_hdr(skb));
+}
+
+static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+ struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
+{
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv6_copy_dscp(ipv6h, ipv6_hdr(skb));
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
+
static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
@@ -519,53 +665,61 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
}
/**
- * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
* @skb: received socket buffer
+ * @protocol: ethernet protocol ID
+ * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
*
* Return: 0
**/
-static int
-ip6ip6_rcv(struct sk_buff *skb)
+static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+ __u8 ipproto,
+ void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
+ struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
{
- struct ipv6hdr *ipv6h;
struct ip6_tnl *t;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- ipv6h = skb->nh.ipv6h;
+ read_lock(&ip6_tnl_lock);
- read_lock(&ip6ip6_lock);
+ if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+ if (t->parms.proto != ipproto && t->parms.proto != 0) {
+ read_unlock(&ip6_tnl_lock);
+ goto discard;
+ }
- if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
goto discard;
}
if (!ip6_tnl_rcv_ctl(t)) {
t->stat.rx_dropped++;
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
goto discard;
}
secpath_reset(skb);
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
- skb->protocol = htons(ETH_P_IPV6);
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
+ skb->protocol = htons(protocol);
skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
skb->dev = t->dev;
dst_release(skb->dst);
skb->dst = NULL;
nf_reset(skb);
- if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
- ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
- ip6ip6_ecn_decapsulate(ipv6h, skb);
+
+ dscp_ecn_decapsulate(t, ipv6h, skb);
+
t->stat.rx_packets++;
t->stat.rx_bytes += skb->len;
netif_rx(skb);
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
return 0;
}
- read_unlock(&ip6ip6_lock);
+ read_unlock(&ip6_tnl_lock);
return 1;
discard:
@@ -573,6 +727,18 @@ discard:
return 0;
}
+static int ip4ip6_rcv(struct sk_buff *skb)
+{
+ return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
+ ip4ip6_dscp_ecn_decapsulate);
+}
+
+static int ip6ip6_rcv(struct sk_buff *skb)
+{
+ return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
+ ip6ip6_dscp_ecn_decapsulate);
+}
+
struct ipv6_tel_txoption {
struct ipv6_txoptions ops;
__u8 dst_opt[8];
@@ -593,7 +759,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
}
/**
- * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
* @t: the outgoing tunnel device
* @hdr: IPv6 header from the incoming packet
*
@@ -607,7 +773,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
**/
static inline int
-ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
{
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
@@ -641,72 +807,49 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
return ret;
}
/**
- * ip6ip6_tnl_xmit - encapsulate packet and send
+ * ip6_tnl_xmit2 - encapsulate packet and send
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
+ * @dsfield: dscp code for outer header
+ * @fl: flow of tunneled packet
+ * @encap_limit: encapsulation limit
+ * @pmtu: Path MTU is stored if packet is too big
*
* Description:
* Build new header and do some sanity checks on the packet before sending
* it.
*
* Return:
- * 0
+ * 0 on success
+ * -1 fail
+ * %-EMSGSIZE message too big. return mtu in this case.
**/
-static int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+static int ip6_tnl_xmit2(struct sk_buff *skb,
+ struct net_device *dev,
+ __u8 dsfield,
+ struct flowi *fl,
+ int encap_limit,
+ __u32 *pmtu)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->stat;
- struct ipv6hdr *ipv6h = skb->nh.ipv6h;
- int encap_limit = -1;
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
- __u16 offset;
- struct flowi fl;
struct dst_entry *dst;
struct net_device *tdev;
int mtu;
int max_headroom = sizeof(struct ipv6hdr);
u8 proto;
- int err;
+ int err = -1;
int pkt_len;
- int dsfield;
-
- if (t->recursion++) {
- stats->collisions++;
- goto tx_err;
- }
- if (skb->protocol != htons(ETH_P_IPV6) ||
- !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
- goto tx_err;
-
- if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2, skb->dev);
- goto tx_err;
- }
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl, &t->fl, sizeof (fl));
- proto = fl.proto;
-
- dsfield = ipv6_get_dsfield(ipv6h);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
- fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
- fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
if ((dst = ip6_tnl_dst_check(t)) != NULL)
dst_hold(dst);
else {
- dst = ip6_route_output(NULL, &fl);
+ dst = ip6_route_output(NULL, fl);
- if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+ if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
goto tx_err_link_failure;
}
@@ -730,7 +873,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb->dst)
skb->dst->ops->update_pmtu(skb->dst, mtu);
if (skb->len > mtu) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ *pmtu = mtu;
+ err = -EMSGSIZE;
goto tx_err_dst_release;
}
@@ -754,22 +898,24 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
dst_release(skb->dst);
skb->dst = dst_clone(dst);
- skb->h.raw = skb->nh.raw;
+ skb->transport_header = skb->network_header;
+ proto = fl->proto;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
- skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
- ipv6h = skb->nh.ipv6h;
- *(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ipv6h = ipv6_hdr(skb);
+ *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
dsfield = INET_ECN_encapsulate(0, dsfield);
ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
- ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
- ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+ ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
+ ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
nf_reset(skb);
pkt_len = skb->len;
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
@@ -783,13 +929,131 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
stats->tx_aborted_errors++;
}
ip6_tnl_dst_store(t, dst);
- t->recursion--;
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
dst_release(dst);
+ return err;
+}
+
+static inline int
+ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct iphdr *iph = ip_hdr(skb);
+ int encap_limit = -1;
+ struct flowi fl;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t))
+ return -1;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl, &t->fl, sizeof (fl));
+ fl.proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ fl.fl6_flowlabel |= ntohl(((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK);
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi fl;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+ !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+ return -1;
+
+ offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl, &t->fl, sizeof (fl));
+ fl.proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+ err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+ if (err != 0) {
+ if (err == -EMSGSIZE)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->stat;
+ int ret;
+
+ if (t->recursion++) {
+ t->stat.collisions++;
+ goto tx_err;
+ }
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ ret = ip4ip6_tnl_xmit(skb, dev);
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ ret = ip6ip6_tnl_xmit(skb, dev);
+ break;
+ default:
+ goto tx_err;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ t->recursion--;
+ return 0;
+
tx_err:
stats->tx_errors++;
stats->tx_dropped++;
@@ -817,7 +1081,7 @@ static void ip6_tnl_set_cap(struct ip6_tnl *t)
}
}
-static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
struct ip6_tnl_parm *p = &t->parms;
@@ -870,17 +1134,17 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
}
/**
- * ip6ip6_tnl_change - update the tunnel parameters
+ * ip6_tnl_change - update the tunnel parameters
* @t: tunnel to be changed
* @p: tunnel configuration parameters
* @active: != 0 if tunnel is ready for use
*
* Description:
- * ip6ip6_tnl_change() updates the tunnel parameters
+ * ip6_tnl_change() updates the tunnel parameters
**/
static int
-ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
{
ipv6_addr_copy(&t->parms.laddr, &p->laddr);
ipv6_addr_copy(&t->parms.raddr, &p->raddr);
@@ -889,19 +1153,20 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
t->parms.encap_limit = p->encap_limit;
t->parms.flowinfo = p->flowinfo;
t->parms.link = p->link;
+ t->parms.proto = p->proto;
ip6_tnl_dst_reset(t);
- ip6ip6_tnl_link_config(t);
+ ip6_tnl_link_config(t);
return 0;
}
/**
- * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
* @ifr: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
- * ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ * ip6_tnl_ioctl() is used for managing IPv6 tunnels
* from userspace.
*
* The possible commands are the following:
@@ -923,7 +1188,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
**/
static int
-ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
@@ -931,12 +1196,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
- if (dev == ip6ip6_fb_tnl_dev) {
+ if (dev == ip6_fb_tnl_dev) {
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
err = -EFAULT;
break;
}
- t = ip6ip6_tnl_locate(&p, 0);
+ t = ip6_tnl_locate(&p, 0);
}
if (t == NULL)
t = netdev_priv(dev);
@@ -954,10 +1219,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -EINVAL;
- if (p.proto != IPPROTO_IPV6)
+ if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
+ p.proto != 0)
break;
- t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
- if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
+ if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -966,9 +1232,9 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
} else
t = netdev_priv(dev);
- ip6ip6_tnl_unlink(t);
- err = ip6ip6_tnl_change(t, &p);
- ip6ip6_tnl_link(t);
+ ip6_tnl_unlink(t);
+ err = ip6_tnl_change(t, &p);
+ ip6_tnl_link(t);
netdev_state_change(dev);
}
if (t) {
@@ -984,15 +1250,15 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
break;
- if (dev == ip6ip6_fb_tnl_dev) {
+ if (dev == ip6_fb_tnl_dev) {
err = -EFAULT;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -ENOENT;
- if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL)
+ if ((t = ip6_tnl_locate(&p, 0)) == NULL)
break;
err = -EPERM;
- if (t->dev == ip6ip6_fb_tnl_dev)
+ if (t->dev == ip6_fb_tnl_dev)
break;
dev = t->dev;
}
@@ -1006,20 +1272,20 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
/**
- * ip6ip6_tnl_get_stats - return the stats for tunnel device
+ * ip6_tnl_get_stats - return the stats for tunnel device
* @dev: virtual device associated with tunnel
*
* Return: stats for device
**/
static struct net_device_stats *
-ip6ip6_tnl_get_stats(struct net_device *dev)
+ip6_tnl_get_stats(struct net_device *dev)
{
return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
}
/**
- * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * ip6_tnl_change_mtu - change mtu manually for tunnel device
* @dev: virtual device associated with tunnel
* @new_mtu: the new mtu
*
@@ -1029,7 +1295,7 @@ ip6ip6_tnl_get_stats(struct net_device *dev)
**/
static int
-ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < IPV6_MIN_MTU) {
return -EINVAL;
@@ -1039,22 +1305,22 @@ ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
}
/**
- * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ * ip6_tnl_dev_setup - setup virtual tunnel device
* @dev: virtual device associated with tunnel
*
* Description:
* Initialize function pointers and device parameters
**/
-static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+static void ip6_tnl_dev_setup(struct net_device *dev)
{
SET_MODULE_OWNER(dev);
- dev->uninit = ip6ip6_tnl_dev_uninit;
+ dev->uninit = ip6_tnl_dev_uninit;
dev->destructor = free_netdev;
- dev->hard_start_xmit = ip6ip6_tnl_xmit;
- dev->get_stats = ip6ip6_tnl_get_stats;
- dev->do_ioctl = ip6ip6_tnl_ioctl;
- dev->change_mtu = ip6ip6_tnl_change_mtu;
+ dev->hard_start_xmit = ip6_tnl_xmit;
+ dev->get_stats = ip6_tnl_get_stats;
+ dev->do_ioctl = ip6_tnl_ioctl;
+ dev->change_mtu = ip6_tnl_change_mtu;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1065,50 +1331,56 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
/**
- * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
* @dev: virtual device associated with tunnel
**/
static inline void
-ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- t->fl.proto = IPPROTO_IPV6;
t->dev = dev;
strcpy(t->parms.name, dev->name);
}
/**
- * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
* @dev: virtual device associated with tunnel
**/
static int
-ip6ip6_tnl_dev_init(struct net_device *dev)
+ip6_tnl_dev_init(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6ip6_tnl_dev_init_gen(dev);
- ip6ip6_tnl_link_config(t);
+ ip6_tnl_dev_init_gen(dev);
+ ip6_tnl_link_config(t);
return 0;
}
/**
- * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
* @dev: fallback device
*
* Return: 0
**/
static int
-ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+ip6_fb_tnl_dev_init(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- ip6ip6_tnl_dev_init_gen(dev);
+ ip6_tnl_dev_init_gen(dev);
+ t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
tnls_wc[0] = t;
return 0;
}
+static struct xfrm6_tunnel ip4ip6_handler = {
+ .handler = ip4ip6_rcv,
+ .err_handler = ip4ip6_err,
+ .priority = 1,
+};
+
static struct xfrm6_tunnel ip6ip6_handler = {
.handler = ip6ip6_rcv,
.err_handler = ip6ip6_err,
@@ -1125,30 +1397,40 @@ static int __init ip6_tunnel_init(void)
{
int err;
+ if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
+ printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
+ err = -EAGAIN;
+ goto out;
+ }
+
if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
- printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
- return -EAGAIN;
+ printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
+ err = -EAGAIN;
+ goto unreg_ip4ip6;
}
- ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
- ip6ip6_tnl_dev_setup);
+ ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+ ip6_tnl_dev_setup);
- if (!ip6ip6_fb_tnl_dev) {
+ if (!ip6_fb_tnl_dev) {
err = -ENOMEM;
goto fail;
}
- ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
+ ip6_fb_tnl_dev->init = ip6_fb_tnl_dev_init;
- if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
- free_netdev(ip6ip6_fb_tnl_dev);
+ if ((err = register_netdev(ip6_fb_tnl_dev))) {
+ free_netdev(ip6_fb_tnl_dev);
goto fail;
}
return 0;
fail:
xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
+unreg_ip4ip6:
+ xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
+out:
return err;
}
-static void __exit ip6ip6_destroy_tunnels(void)
+static void __exit ip6_tnl_destroy_tunnels(void)
{
int h;
struct ip6_tnl *t;
@@ -1168,11 +1450,14 @@ static void __exit ip6ip6_destroy_tunnels(void)
static void __exit ip6_tunnel_cleanup(void)
{
+ if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
+ printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
+
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
- printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+ printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
rtnl_lock();
- ip6ip6_destroy_tunnels();
+ ip6_tnl_destroy_tunnels();
rtnl_unlock();
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5724ba9f75d..1ee50b5782e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -79,9 +79,9 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_NONE;
/* Remove ipcomp header and decompress original payload */
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
ipch = (void *)skb->data;
- skb->h.raw = skb->nh.raw + sizeof(*ipch);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
__skb_pull(skb, sizeof(*ipch));
/* decompression */
@@ -111,7 +111,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
skb->truesize += dlen - plen;
__skb_put(skb, dlen - plen);
- memcpy(skb->data, scratch, dlen);
+ skb_copy_to_linear_data(skb, scratch, dlen);
err = ipch->nexthdr;
out_put_cpu:
@@ -124,15 +124,13 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ipv6hdr *top_iph;
- int hdr_len;
struct ipv6_comp_hdr *ipch;
struct ipcomp_data *ipcd = x->data;
int plen, dlen;
u8 *start, *scratch;
struct crypto_comp *tfm;
int cpu;
-
- hdr_len = skb->h.raw - skb->data;
+ int hdr_len = skb_transport_offset(skb);
/* check whether datagram len is larger than threshold */
if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -145,7 +143,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
/* compression */
plen = skb->len - hdr_len;
dlen = IPCOMP_SCRATCH_SIZE;
- start = skb->h.raw;
+ start = skb_transport_header(skb);
cpu = get_cpu();
scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
@@ -166,10 +164,10 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
ipch = (struct ipv6_comp_hdr *)start;
- ipch->nexthdr = *skb->nh.raw;
+ ipch->nexthdr = *skb_network_header(skb);
ipch->flags = 0;
ipch->cpi = htons((u16 )ntohl(x->id.spi));
- *skb->nh.raw = IPPROTO_COMP;
+ *skb_network_header(skb) = IPPROTO_COMP;
out_ok:
return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f5f9582a8d3..aa3d07c52a8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -101,14 +101,14 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- ipv6h = skb->nh.ipv6h;
+ ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
err = -EPROTONOSUPPORT;
rcu_read_lock();
ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
if (likely(ops && ops->gso_send_check)) {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = ops->gso_send_check(skb);
}
rcu_read_unlock();
@@ -137,14 +137,14 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- ipv6h = skb->nh.ipv6h;
+ ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
segs = ERR_PTR(-EPROTONOSUPPORT);
rcu_read_lock();
ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
if (likely(ops && ops->gso_segment)) {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
segs = ops->gso_segment(skb, features);
}
rcu_read_unlock();
@@ -153,7 +153,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
goto out;
for (skb = segs; skb; skb = skb->next) {
- ipv6h = skb->nh.ipv6h;
+ ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(skb->len - skb->mac_len -
sizeof(*ipv6h));
}
@@ -694,7 +694,7 @@ done:
retv = ip6_ra_control(sk, val, NULL);
break;
case IPV6_MTU_DISCOVER:
- if (val<0 || val>2)
+ if (val<0 || val>3)
goto e_inval;
np->pmtudisc = val;
retv = 0;
@@ -761,6 +761,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
return err;
}
+EXPORT_SYMBOL(ipv6_setsockopt);
#ifdef CONFIG_COMPAT
int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
@@ -796,18 +797,37 @@ EXPORT_SYMBOL(compat_ipv6_setsockopt);
#endif
static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
- char __user *optval, int len)
+ int optname, char __user *optval, int len)
{
struct ipv6_opt_hdr *hdr;
- if (!opt || !opt->hopopt)
+ if (!opt)
+ return 0;
+
+ switch(optname) {
+ case IPV6_HOPOPTS:
+ hdr = opt->hopopt;
+ break;
+ case IPV6_RTHDRDSTOPTS:
+ hdr = opt->dst0opt;
+ break;
+ case IPV6_RTHDR:
+ hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+ break;
+ case IPV6_DSTOPTS:
+ hdr = opt->dst1opt;
+ break;
+ default:
+ return -EINVAL; /* should not happen */
+ }
+
+ if (!hdr)
return 0;
- hdr = opt->hopopt;
len = min_t(unsigned int, len, ipv6_optlen(hdr));
- if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
+ if (copy_to_user(optval, hdr, len));
return -EFAULT;
- return len;
+ return ipv6_optlen(hdr);
}
static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
@@ -945,7 +965,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
lock_sock(sk);
len = ipv6_getsockopt_sticky(sk, np->opt,
- optval, len);
+ optname, optval, len);
release_sock(sk);
return put_user(len, optlen);
}
@@ -1066,6 +1086,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
return err;
}
+EXPORT_SYMBOL(ipv6_getsockopt);
+
#ifdef CONFIG_COMPAT
int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
deleted file mode 100644
index e12e3d4fcce..00000000000
--- a/net/ipv6/ipv6_syms.c
+++ /dev/null
@@ -1,36 +0,0 @@
-
-#include <linux/module.h>
-#include <net/protocol.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/xfrm.h>
-
-EXPORT_SYMBOL(icmpv6_send);
-EXPORT_SYMBOL(icmpv6_statistics);
-EXPORT_SYMBOL(icmpv6_err_convert);
-EXPORT_SYMBOL(ndisc_mc_map);
-EXPORT_SYMBOL(register_inet6addr_notifier);
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(ipv6_setsockopt);
-EXPORT_SYMBOL(ipv6_getsockopt);
-EXPORT_SYMBOL(inet6_register_protosw);
-EXPORT_SYMBOL(inet6_unregister_protosw);
-EXPORT_SYMBOL(inet6_add_protocol);
-EXPORT_SYMBOL(inet6_del_protocol);
-EXPORT_SYMBOL(ip6_xmit);
-EXPORT_SYMBOL(inet6_release);
-EXPORT_SYMBOL(inet6_bind);
-EXPORT_SYMBOL(inet6_getname);
-EXPORT_SYMBOL(inet6_ioctl);
-EXPORT_SYMBOL(ipv6_get_saddr);
-EXPORT_SYMBOL(ipv6_chk_addr);
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-#ifdef CONFIG_XFRM
-EXPORT_SYMBOL(xfrm6_rcv);
-EXPORT_SYMBOL(xfrm6_input_addr);
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-#endif
-EXPORT_SYMBOL(rt6_lookup);
-EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a8d6625ec78..3e308fb41b4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -988,7 +988,7 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
return 0;
- pic = (struct icmp6hdr *)skb->h.raw;
+ pic = icmp6_hdr(skb);
switch (pic->icmp6_type) {
case ICMPV6_MGM_QUERY:
@@ -1167,11 +1167,11 @@ int igmp6_event_query(struct sk_buff *skb)
return -EINVAL;
/* compute payload length excluding extension headers */
- len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
- len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h;
+ len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+ len -= skb_network_header_len(skb);
/* Drop queries with not link local source */
- if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
return -EINVAL;
idev = in6_dev_get(skb->dev);
@@ -1179,7 +1179,7 @@ int igmp6_event_query(struct sk_buff *skb)
if (idev == NULL)
return 0;
- hdr = (struct icmp6hdr *) skb->h.raw;
+ hdr = icmp6_hdr(skb);
group = (struct in6_addr *) (hdr + 1);
group_type = ipv6_addr_type(group);
@@ -1212,7 +1212,7 @@ int igmp6_event_query(struct sk_buff *skb)
in6_dev_put(idev);
return -EINVAL;
}
- mlh2 = (struct mld2_query *) skb->h.raw;
+ mlh2 = (struct mld2_query *)skb_transport_header(skb);
max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
if (!max_delay)
max_delay = 1;
@@ -1235,7 +1235,7 @@ int igmp6_event_query(struct sk_buff *skb)
in6_dev_put(idev);
return -EINVAL;
}
- mlh2 = (struct mld2_query *) skb->h.raw;
+ mlh2 = (struct mld2_query *)skb_transport_header(skb);
mark = 1;
}
} else {
@@ -1300,10 +1300,10 @@ int igmp6_event_report(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
return -EINVAL;
- hdr = (struct icmp6hdr*) skb->h.raw;
+ hdr = icmp6_hdr(skb);
/* Drop reports with not link local source */
- addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
+ addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
if (addr_type != IPV6_ADDR_ANY &&
!(addr_type&IPV6_ADDR_LINKLOCAL))
return -EINVAL;
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- if (ipv6_get_lladdr(dev, &addr_buf)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -1423,8 +1423,9 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
- pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
- skb->h.raw = (unsigned char *)pmr;
+ skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
+ skb_put(skb, sizeof(*pmr));
+ pmr = (struct mld2_report *)skb_transport_header(skb);
pmr->type = ICMPV6_MLD2_REPORT;
pmr->resv1 = 0;
pmr->csum = 0;
@@ -1441,7 +1442,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
unsigned char ha[MAX_ADDR_LEN];
int err;
- ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
+ ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1);
err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
if (err < 0) {
kfree_skb(skb);
@@ -1459,20 +1460,21 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
static void mld_sendpack(struct sk_buff *skb)
{
- struct ipv6hdr *pip6 = skb->nh.ipv6h;
- struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+ struct ipv6hdr *pip6 = ipv6_hdr(skb);
+ struct mld2_report *pmr =
+ (struct mld2_report *)skb_transport_header(skb);
int payload_len, mldlen;
struct inet6_dev *idev = in6_dev_get(skb->dev);
int err;
IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
- payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
- sizeof(struct ipv6hdr);
- mldlen = skb->tail - skb->h.raw;
+ payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
+ mldlen = skb->tail - skb->transport_header;
pip6->payload_len = htons(payload_len);
pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
- IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+ IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
+ mldlen, 0));
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
mld_dev_queue_xmit);
if (!err) {
@@ -1506,7 +1508,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_auxwords = 0;
pgr->grec_nsrcs = 0;
pgr->grec_mca = pmc->mca_addr; /* structure copy */
- pmr = (struct mld2_report *)skb->h.raw;
+ pmr = (struct mld2_report *)skb_transport_header(skb);
pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
*ppgr = pgr;
return skb;
@@ -1539,7 +1541,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
if (!*psf_list)
goto empty_source;
- pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+ pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
/* EX and TO_EX get a fresh packet, if needed */
if (truncate) {
@@ -1791,7 +1793,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- if (ipv6_get_lladdr(dev, &addr_buf)) {
+ if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -2329,9 +2331,8 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
struct ifmcaddr6 *im = NULL;
struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ for_each_netdev(state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (!idev)
@@ -2358,7 +2359,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
read_unlock_bh(&state->idev->lock);
in6_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
break;
@@ -2473,9 +2474,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
struct ifmcaddr6 *im = NULL;
struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
- for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
- state->dev;
- state->dev = state->dev->next) {
+ state->idev = NULL;
+ state->im = NULL;
+ for_each_netdev(state->dev) {
struct inet6_dev *idev;
idev = in6_dev_get(state->dev);
if (unlikely(idev == NULL))
@@ -2511,7 +2512,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
read_unlock_bh(&state->idev->lock);
in6_dev_put(state->idev);
}
- state->dev = state->dev->next;
+ state->dev = next_net_device(state->dev);
if (!state->dev) {
state->idev = NULL;
goto out;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0afcabdd8ed..13b7160fb89 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -90,23 +90,26 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
{
struct ip6_mh *mh;
- if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
- !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+ if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
+ !pskb_may_pull(skb, (skb_transport_offset(skb) +
+ ((skb_transport_header(skb)[1] + 1) << 3))))
return -1;
- mh = (struct ip6_mh *)skb->h.raw;
+ mh = (struct ip6_mh *)skb_transport_header(skb);
if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
- mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+ mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+ skb_network_header(skb)));
return -1;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
mh->ip6mh_proto);
- mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+ mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+ skb_network_header(skb)));
return -1;
}
@@ -122,12 +125,12 @@ struct mip6_report_rate_limiter {
};
static struct mip6_report_rate_limiter mip6_report_rl = {
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
};
static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
@@ -152,10 +155,10 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
iph = (struct ipv6hdr *)skb->data;
iph->payload_len = htons(skb->len - sizeof(*iph));
- nexthdr = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_DSTOPTS;
+ nexthdr = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_DSTOPTS;
- dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+ dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
dstopt->nexthdr = nexthdr;
hao = mip6_padn((char *)(dstopt + 1),
@@ -215,21 +218,22 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
if (likely(opt->dsthao)) {
offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
if (likely(offset >= 0))
- hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+ hao = (struct ipv6_destopt_hao *)
+ (skb_network_header(skb) + offset);
}
skb_get_timestamp(skb, &stamp);
- if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
- hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+ if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+ hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
opt->iif))
goto out;
memset(&sel, 0, sizeof(sel));
- memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+ memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
sizeof(sel.daddr));
sel.prefixlen_d = 128;
- memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+ memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
sizeof(sel.saddr));
sel.prefixlen_s = 128;
sel.family = AF_INET6;
@@ -253,11 +257,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
- unsigned int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int packet_len = skb->tail - skb->network_header;
int found_rhdr = 0;
- *nexthdr = &skb->nh.ipv6h->nexthdr;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
@@ -288,7 +294,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
}
return offset;
@@ -361,10 +367,10 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
iph = (struct ipv6hdr *)skb->data;
iph->payload_len = htons(skb->len - sizeof(*iph));
- nexthdr = *skb->nh.raw;
- *skb->nh.raw = IPPROTO_ROUTING;
+ nexthdr = *skb_network_header(skb);
+ *skb_network_header(skb) = IPPROTO_ROUTING;
- rt2 = (struct rt2_hdr *)skb->h.raw;
+ rt2 = (struct rt2_hdr *)skb_transport_header(skb);
rt2->rt_hdr.nexthdr = nexthdr;
rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
@@ -383,11 +389,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
u8 **nexthdr)
{
u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
- unsigned int packet_len = skb->tail - skb->nh.raw;
+ struct ipv6_opt_hdr *exthdr =
+ (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int packet_len = skb->tail - skb->network_header;
int found_rhdr = 0;
- *nexthdr = &skb->nh.ipv6h->nexthdr;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
while (offset + 1 <= packet_len) {
@@ -397,7 +405,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
case NEXTHDR_ROUTING:
if (offset + 3 <= packet_len) {
struct ipv6_rt_hdr *rt;
- rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+ rt = (struct ipv6_rt_hdr *)(nh + offset);
if (rt->type != 0)
return offset;
}
@@ -417,7 +425,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
offset += ipv6_optlen(exthdr);
*nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
}
return offset;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 121f31c283f..d8b36451bad 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -319,6 +319,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
return -EINVAL;
}
+EXPORT_SYMBOL(ndisc_mc_map);
+
static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
{
const u32 *p32 = pkey;
@@ -425,36 +427,23 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
security_sk_classify_flow(ndisc_socket->sk, fl);
}
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- struct in6_addr *daddr, struct in6_addr *solicited_addr,
- int router, int solicited, int override, int inc_opt)
+static void __ndisc_send(struct net_device *dev,
+ struct neighbour *neigh,
+ struct in6_addr *daddr, struct in6_addr *saddr,
+ struct icmp6hdr *icmp6h, struct in6_addr *target,
+ int llinfo, int icmp6_mib_outnd)
{
- struct in6_addr tmpaddr;
- struct inet6_ifaddr *ifp;
- struct inet6_dev *idev;
struct flowi fl;
- struct dst_entry* dst;
+ struct dst_entry *dst;
struct sock *sk = ndisc_socket->sk;
- struct in6_addr *src_addr;
- struct nd_msg *msg;
- int len;
struct sk_buff *skb;
+ struct icmp6hdr *hdr;
+ struct inet6_dev *idev;
+ int len;
int err;
+ u8 *opt;
- len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
- /* for anycast or proxy, solicited_addr != src_addr */
- ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
- if (ifp) {
- src_addr = solicited_addr;
- in6_ifa_put(ifp);
- } else {
- if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
- return;
- src_addr = &tmpaddr;
- }
-
- ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+ ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr,
dev->ifindex);
dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
@@ -465,60 +454,57 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
if (err < 0)
return;
- if (inc_opt) {
- if (dev->addr_len)
- len += ndisc_opt_addr_space(dev);
- else
- inc_opt = 0;
- }
+ if (!dev->addr_len)
+ llinfo = 0;
+
+ len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
+ if (llinfo)
+ len += ndisc_opt_addr_space(dev);
skb = sock_alloc_send_skb(sk,
(MAX_HEADER + sizeof(struct ipv6hdr) +
len + LL_RESERVED_SPACE(dev)),
1, &err);
-
- if (skb == NULL) {
+ if (!skb) {
ND_PRINTK0(KERN_ERR
- "ICMPv6 NA: %s() failed to allocate an skb.\n",
+ "ICMPv6 ND: %s() failed to allocate an skb.\n",
__FUNCTION__);
dst_release(dst);
return;
}
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
-
- msg = (struct nd_msg *)skb_put(skb, len);
- skb->h.raw = (unsigned char*)msg;
+ ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
- msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
- msg->icmph.icmp6_code = 0;
- msg->icmph.icmp6_cksum = 0;
+ skb->transport_header = skb->tail;
+ skb_put(skb, len);
- msg->icmph.icmp6_unused = 0;
- msg->icmph.icmp6_router = router;
- msg->icmph.icmp6_solicited = solicited;
- msg->icmph.icmp6_override = override;
+ hdr = (struct icmp6hdr *)skb_transport_header(skb);
+ memcpy(hdr, icmp6h, sizeof(*hdr));
- /* Set the target address. */
- ipv6_addr_copy(&msg->target, solicited_addr);
+ opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
+ if (target) {
+ ipv6_addr_copy((struct in6_addr *)opt, target);
+ opt += sizeof(*target);
+ }
- if (inc_opt)
- ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+ if (llinfo)
+ ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
dev->addr_len, dev->type);
- /* checksum */
- msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
- IPPROTO_ICMPV6,
- csum_partial((__u8 *) msg,
- len, 0));
+ hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
+ IPPROTO_ICMPV6,
+ csum_partial((__u8 *) hdr,
+ len, 0));
skb->dst = dst;
+
idev = in6_dev_get(dst->dev);
IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
if (!err) {
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+ ICMP6_INC_STATS(idev, icmp6_mib_outnd);
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
}
@@ -526,165 +512,95 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
in6_dev_put(idev);
}
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ struct in6_addr *daddr, struct in6_addr *solicited_addr,
+ int router, int solicited, int override, int inc_opt)
+{
+ struct in6_addr tmpaddr;
+ struct inet6_ifaddr *ifp;
+ struct in6_addr *src_addr;
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+ };
+
+ /* for anycast or proxy, solicited_addr != src_addr */
+ ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+ if (ifp) {
+ src_addr = solicited_addr;
+ if (ifp->flags & IFA_F_OPTIMISTIC)
+ override = 0;
+ in6_ifa_put(ifp);
+ } else {
+ if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+ return;
+ src_addr = &tmpaddr;
+ }
+
+ icmp6h.icmp6_router = router;
+ icmp6h.icmp6_solicited = solicited;
+ icmp6h.icmp6_override = override;
+
+ __ndisc_send(dev, neigh, daddr, src_addr,
+ &icmp6h, solicited_addr,
+ inc_opt ? ND_OPT_TARGET_LL_ADDR : 0,
+ ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+}
+
void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
struct in6_addr *solicit,
struct in6_addr *daddr, struct in6_addr *saddr)
{
- struct flowi fl;
- struct dst_entry* dst;
- struct inet6_dev *idev;
- struct sock *sk = ndisc_socket->sk;
- struct sk_buff *skb;
- struct nd_msg *msg;
struct in6_addr addr_buf;
- int len;
- int err;
- int send_llinfo;
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+ };
if (saddr == NULL) {
- if (ipv6_get_lladdr(dev, &addr_buf))
+ if (ipv6_get_lladdr(dev, &addr_buf,
+ (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
return;
saddr = &addr_buf;
}
- ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
- dev->ifindex);
-
- dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
- if (!dst)
- return;
-
- err = xfrm_lookup(&dst, &fl, NULL, 0);
- if (err < 0)
- return;
-
- len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
- send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
- if (send_llinfo)
- len += ndisc_opt_addr_space(dev);
-
- skb = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_RESERVED_SPACE(dev)),
- 1, &err);
- if (skb == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 NA: %s() failed to allocate an skb.\n",
- __FUNCTION__);
- dst_release(dst);
- return;
- }
-
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
- msg = (struct nd_msg *)skb_put(skb, len);
- skb->h.raw = (unsigned char*)msg;
- msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
- msg->icmph.icmp6_code = 0;
- msg->icmph.icmp6_cksum = 0;
- msg->icmph.icmp6_unused = 0;
-
- /* Set the target address. */
- ipv6_addr_copy(&msg->target, solicit);
-
- if (send_llinfo)
- ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
- dev->addr_len, dev->type);
-
- /* checksum */
- msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- daddr, len,
- IPPROTO_ICMPV6,
- csum_partial((__u8 *) msg,
- len, 0));
- /* send it! */
- skb->dst = dst;
- idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
- err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
- if (!err) {
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
- }
-
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ __ndisc_send(dev, neigh, daddr, saddr,
+ &icmp6h, solicit,
+ !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0,
+ ICMP6_MIB_OUTNEIGHBORSOLICITS);
}
void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr)
{
- struct flowi fl;
- struct dst_entry* dst;
- struct inet6_dev *idev;
- struct sock *sk = ndisc_socket->sk;
- struct sk_buff *skb;
- struct icmp6hdr *hdr;
- __u8 * opt;
- int len;
- int err;
-
- ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
- dev->ifindex);
-
- dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
- if (!dst)
- return;
-
- err = xfrm_lookup(&dst, &fl, NULL, 0);
- if (err < 0)
- return;
-
- len = sizeof(struct icmp6hdr);
- if (dev->addr_len)
- len += ndisc_opt_addr_space(dev);
-
- skb = sock_alloc_send_skb(sk,
- (MAX_HEADER + sizeof(struct ipv6hdr) +
- len + LL_RESERVED_SPACE(dev)),
- 1, &err);
- if (skb == NULL) {
- ND_PRINTK0(KERN_ERR
- "ICMPv6 RS: %s() failed to allocate an skb.\n",
- __FUNCTION__);
- dst_release(dst);
- return;
- }
-
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
- hdr = (struct icmp6hdr *)skb_put(skb, len);
- skb->h.raw = (unsigned char*)hdr;
- hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
- hdr->icmp6_code = 0;
- hdr->icmp6_cksum = 0;
- hdr->icmp6_unused = 0;
-
- opt = (u8*) (hdr + 1);
-
- if (dev->addr_len)
- ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
- dev->addr_len, dev->type);
-
- /* checksum */
- hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
- IPPROTO_ICMPV6,
- csum_partial((__u8 *) hdr, len, 0));
+ struct icmp6hdr icmp6h = {
+ .icmp6_type = NDISC_ROUTER_SOLICITATION,
+ };
+ int send_sllao = dev->addr_len;
- /* send it! */
- skb->dst = dst;
- idev = in6_dev_get(dst->dev);
- IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
- err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
- if (!err) {
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
- ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ /*
+ * According to section 2.2 of RFC 4429, we must not
+ * send router solicitations with a sllao from
+ * optimistic addresses, but we may send the solicitation
+ * if we don't include the sllao. So here we check
+ * if our address is optimistic, and if so, we
+ * supress the inclusion of the sllao.
+ */
+ if (send_sllao) {
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1);
+ if (ifp) {
+ if (ifp->flags & IFA_F_OPTIMISTIC) {
+ send_sllao = 0;
+ }
+ in6_ifa_put(ifp);
+ } else {
+ send_sllao = 0;
+ }
}
-
- if (likely(idev != NULL))
- in6_dev_put(idev);
+#endif
+ __ndisc_send(dev, NULL, daddr, saddr,
+ &icmp6h, NULL,
+ send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0,
+ ICMP6_MIB_OUTROUTERSOLICITS);
}
@@ -708,8 +624,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
int probes = atomic_read(&neigh->probes);
- if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
- saddr = &skb->nh.ipv6h->saddr;
+ if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1))
+ saddr = &ipv6_hdr(skb)->saddr;
if ((probes -= neigh->parms->ucast_probes) < 0) {
if (!(neigh->nud_state & NUD_VALID)) {
@@ -732,11 +648,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
static void ndisc_recv_ns(struct sk_buff *skb)
{
- struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
- struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
- struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - msg->opt;
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
struct inet6_ifaddr *ifp;
@@ -796,28 +713,40 @@ static void ndisc_recv_ns(struct sk_buff *skb)
inc = ipv6_addr_is_multicast(daddr);
if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
- if (ifp->flags & IFA_F_TENTATIVE) {
- /* Address is tentative. If the source
- is unspecified address, it is someone
- does DAD, otherwise we ignore solicitations
- until DAD timer expires.
- */
- if (!dad)
+
+ if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
+ if (dad) {
+ if (dev->type == ARPHRD_IEEE802_TR) {
+ const unsigned char *sadr;
+ sadr = skb_mac_header(skb);
+ if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+ sadr[9] == dev->dev_addr[1] &&
+ sadr[10] == dev->dev_addr[2] &&
+ sadr[11] == dev->dev_addr[3] &&
+ sadr[12] == dev->dev_addr[4] &&
+ sadr[13] == dev->dev_addr[5]) {
+ /* looped-back to us */
+ goto out;
+ }
+ }
+
+ /*
+ * We are colliding with another node
+ * who is doing DAD
+ * so fail our DAD process
+ */
+ addrconf_dad_failure(ifp);
goto out;
- if (dev->type == ARPHRD_IEEE802_TR) {
- unsigned char *sadr = skb->mac.raw;
- if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
- sadr[9] == dev->dev_addr[1] &&
- sadr[10] == dev->dev_addr[2] &&
- sadr[11] == dev->dev_addr[3] &&
- sadr[12] == dev->dev_addr[4] &&
- sadr[13] == dev->dev_addr[5]) {
- /* looped-back to us */
+ } else {
+ /*
+ * This is not a dad solicitation.
+ * If we are an optimistic node,
+ * we should respond.
+ * Otherwise, we should ignore it.
+ */
+ if (!(ifp->flags & IFA_F_OPTIMISTIC))
goto out;
- }
}
- addrconf_dad_failure(ifp);
- return;
}
idev = ifp->idev;
@@ -898,11 +827,12 @@ out:
static void ndisc_recv_na(struct sk_buff *skb)
{
- struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
- struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
- struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
u8 *lladdr = NULL;
- u32 ndoptlen = skb->tail - msg->opt;
+ u32 ndoptlen = skb->tail - (skb->transport_header +
+ offsetof(struct nd_msg, opt));
struct ndisc_options ndopts;
struct net_device *dev = skb->dev;
struct inet6_ifaddr *ifp;
@@ -1000,11 +930,11 @@ out:
static void ndisc_recv_rs(struct sk_buff *skb)
{
- struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+ struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
struct neighbour *neigh;
struct inet6_dev *idev;
- struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
struct ndisc_options ndopts;
u8 *lladdr = NULL;
@@ -1057,7 +987,7 @@ out:
static void ndisc_router_discovery(struct sk_buff *skb)
{
- struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+ struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
struct rt6_info *rt = NULL;
@@ -1068,9 +998,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__u8 * opt = (__u8 *)(ra_msg + 1);
- optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+ optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
- if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 RA: source address is not link-local.\n");
return;
@@ -1136,7 +1066,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
- rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+ rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
if (rt)
neigh = rt->rt6i_nexthop;
@@ -1151,7 +1081,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK3(KERN_DEBUG
"ICMPv6 RA: adding default router.\n");
- rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev, pref);
+ rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
if (rt == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: %s() failed to add default route.\n",
@@ -1223,7 +1153,7 @@ skip_defrtr:
*/
if (!neigh)
- neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+ neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
skb->dev, 1);
if (neigh) {
u8 *lladdr = NULL;
@@ -1252,7 +1182,7 @@ skip_defrtr:
if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
continue;
rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
- &skb->nh.ipv6h->saddr);
+ &ipv6_hdr(skb)->saddr);
}
}
#endif
@@ -1311,13 +1241,13 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
int optlen;
u8 *lladdr = NULL;
- if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: source address is not link-local.\n");
return;
}
- optlen = skb->tail - skb->h.raw;
+ optlen = skb->tail - skb->transport_header;
optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
if (optlen < 0) {
@@ -1326,7 +1256,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- icmph = (struct icmp6hdr *) skb->h.raw;
+ icmph = icmp6_hdr(skb);
target = (struct in6_addr *) (icmph + 1);
dest = target + 1;
@@ -1376,8 +1306,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
if (neigh) {
- rt6_redirect(dest, &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr, neigh, lladdr,
+ rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr, neigh, lladdr,
on_link);
neigh_release(neigh);
}
@@ -1406,21 +1336,21 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
dev = skb->dev;
- if (ipv6_get_lladdr(dev, &saddr_buf)) {
+ if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: no link-local address on %s\n",
dev->name);
return;
}
- if (!ipv6_addr_equal(&skb->nh.ipv6h->daddr, target) &&
+ if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: target address is not link-local.\n");
return;
}
- ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+ ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr,
dev->ifindex);
dst = ip6_route_output(NULL, &fl);
@@ -1475,11 +1405,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
hlen = 0;
skb_reserve(buff, LL_RESERVED_SPACE(dev));
- ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+ ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
IPPROTO_ICMPV6, len);
- icmph = (struct icmp6hdr *)skb_put(buff, len);
- buff->h.raw = (unsigned char*)icmph;
+ skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
+ skb_put(buff, len);
+ icmph = icmp6_hdr(buff);
memset(icmph, 0, sizeof(struct icmp6hdr));
icmph->icmp6_type = NDISC_REDIRECT;
@@ -1491,7 +1422,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
addrp = (struct in6_addr *)(icmph + 1);
ipv6_addr_copy(addrp, target);
addrp++;
- ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
opt = (u8*) (addrp + 1);
@@ -1512,9 +1443,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
*(opt++) = (rd_len >> 3);
opt += 6;
- memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+ memcpy(opt, ipv6_hdr(skb), rd_len - 8);
- icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
+ icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
len, IPPROTO_ICMPV6,
csum_partial((u8 *) icmph, len, 0));
@@ -1544,14 +1475,14 @@ int ndisc_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb->len))
return 0;
- msg = (struct nd_msg *) skb->h.raw;
+ msg = (struct nd_msg *)skb_transport_header(skb);
- __skb_push(skb, skb->data-skb->h.raw);
+ __skb_push(skb, skb->data - skb_transport_header(skb));
- if (skb->nh.ipv6h->hop_limit != 255) {
+ if (ipv6_hdr(skb)->hop_limit != 255) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 NDISC: invalid hop-limit: %d\n",
- skb->nh.ipv6h->hop_limit);
+ ipv6_hdr(skb)->hop_limit);
return 0;
}
@@ -1584,7 +1515,7 @@ int ndisc_rcv(struct sk_buff *skb)
case NDISC_REDIRECT:
ndisc_redirect_rcv(skb);
break;
- };
+ }
return 0;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1c405dd30c6..38b14961391 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -11,7 +11,7 @@
int ip6_route_me_harder(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
struct dst_entry *dst;
struct flowi fl = {
.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -61,7 +61,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
struct ip6_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP6_LOCAL_OUT) {
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
rt_info->daddr = iph->daddr;
rt_info->saddr = iph->saddr;
@@ -73,7 +73,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
struct ip6_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP6_LOCAL_OUT) {
- struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(*pskb);
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
return ip6_route_me_harder(*pskb);
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
- struct ipv6hdr *ip6h = skb->nh.ipv6h;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
__sum16 csum = 0;
switch (skb->ip_summed) {
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index fdb30a5916e..0004db38af6 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -11,18 +11,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
- * to adapt it to IPv6
- * HEAVILY based in ipqueue.c by James Morris. It's just
- * a little modified version of it, so he's nearly the
- * real coder of this.
- * Few changes needed, mainly the hard_routing code and
- * the netlink socket protocol (we're NETLINK_IP6_FW).
- * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
- * 2005-02-04: Added /proc counter for dropped packets; fixed so
- * packets aren't delivered to user space if they're going
- * to be dropped.
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -189,12 +177,13 @@ ipq_flush(int verdict)
static struct sk_buff *
ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
{
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
+ struct timeval tv;
read_lock_bh(&queue_lock);
@@ -232,15 +221,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
if (!skb)
goto nlmsg_failure;
- old_tail= skb->tail;
+ old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
+ tv = ktime_to_timeval(entry->skb->tstamp);
+ pmsg->timestamp_sec = tv.tv_sec;
+ pmsg->timestamp_usec = tv.tv_usec;
pmsg->mark = entry->skb->mark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
@@ -376,7 +366,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
}
if (!skb_make_writable(&e->skb, v->data_len))
return -ENOMEM;
- memcpy(e->skb->data, v->payload, v->data_len);
+ skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
return 0;
@@ -485,7 +475,7 @@ ipq_rcv_skb(struct sk_buff *skb)
if (skblen < sizeof(*nlh))
return;
- nlh = (struct nlmsghdr *)skb->data;
+ nlh = nlmsg_hdr(skb);
nlmsglen = nlh->nlmsg_len;
if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
return;
@@ -667,7 +657,7 @@ static int __init ip6_queue_init(void)
struct proc_dir_entry *proc;
netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+ ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7c512e13f95..9aa62402668 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -7,15 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * - increase module usage count as soon as we have rules inside
- * a table
- * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- * - new extension header parser code
- * 15 Oct 2005 Harald Welte <laforge@netfilter.org>
- * - Unification of {ip,ip6}_tables into x_tables
- * - Removed tcp and udp code, since it's not ipv6 specific
*/
#include <linux/capability.h>
@@ -115,7 +106,7 @@ ip6_packet_match(const struct sk_buff *skb,
{
size_t i;
unsigned long ret;
- const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+ const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
@@ -301,7 +292,7 @@ ip6t_do_table(struct sk_buff **pskb,
goto no_match;
ADD_COUNTER(e->counters,
- ntohs((*pskb)->nh.ipv6h->payload_len)
+ ntohs(ipv6_hdr(*pskb)->payload_len)
+ IPV6_HDR_LEN,
1);
@@ -1448,8 +1439,8 @@ static void __exit ip6_tables_fini(void)
int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
int target, unsigned short *fragoff)
{
- unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
- u8 nexthdr = skb->nh.ipv6h->nexthdr;
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
unsigned int len = skb->len - start;
if (fragoff)
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ccbab66277e..4115a576ba2 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -32,7 +32,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
if (!skb_make_writable(pskb, (*pskb)->len))
return NF_DROP;
- ip6h = (*pskb)->nh.ipv6h;
+ ip6h = ipv6_hdr(*pskb);
switch (info->mode) {
case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index afaa039d0b7..5bb9cd34935 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -396,8 +396,8 @@ ip6t_log_packet(unsigned int pf,
/* MAC logging for input chain only. */
printk("MAC=");
if (skb->dev && (len = skb->dev->hard_header_len) &&
- skb->mac.raw != skb->nh.raw) {
- unsigned char *p = skb->mac.raw;
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
int i;
if (skb->dev->type == ARPHRD_SIT &&
@@ -412,7 +412,8 @@ ip6t_log_packet(unsigned int pf,
printk(" ");
if (skb->dev->type == ARPHRD_SIT) {
- struct iphdr *iph = (struct iphdr *)skb->mac.raw;
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
@@ -421,7 +422,7 @@ ip6t_log_packet(unsigned int pf,
printk(" ");
}
- dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
+ dump_packet(loginfo, skb, skb_network_offset(skb), 1);
printk("\n");
spin_unlock_bh(&log_lock);
}
@@ -489,14 +490,10 @@ static int __init ip6t_log_init(void)
ret = xt_register_target(&ip6t_log_reg);
if (ret < 0)
return ret;
- if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
- printk(KERN_WARNING "ip6t_LOG: not logging via system console "
- "since somebody else already registered for PF_INET6\n");
- /* we cannot make module load fail here, since otherwise
- * ip6tables userspace would abort */
- }
-
- return 0;
+ ret = nf_log_register(PF_INET6, &ip6t_logger);
+ if (ret < 0 && ret != -EEXIST)
+ xt_unregister_target(&ip6t_log_reg);
+ return ret;
}
static void __exit ip6t_log_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 6abee94c929..cb3d2415a06 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -47,7 +47,7 @@ static void send_reset(struct sk_buff *oldskb)
struct tcphdr otcph, *tcph;
unsigned int otcplen, hh_len;
int tcphoff, needs_ack;
- struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
+ struct ipv6hdr *oip6h = ipv6_hdr(oldskb), *ip6h;
struct dst_entry *dst = NULL;
u8 proto;
struct flowi fl;
@@ -120,8 +120,9 @@ static void send_reset(struct sk_buff *oldskb)
skb_reserve(nskb, hh_len + dst->header_len);
- ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
- skb_put(nskb, sizeof(struct ipv6hdr));
+ skb_put(nskb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(nskb);
+ ip6h = ipv6_hdr(nskb);
ip6h->version = 6;
ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
ip6h->nexthdr = IPPROTO_TCP;
@@ -155,8 +156,8 @@ static void send_reset(struct sk_buff *oldskb)
tcph->check = 0;
/* Adjust TCP checksum */
- tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
- &nskb->nh.ipv6h->daddr,
+ tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+ &ipv6_hdr(nskb)->daddr,
sizeof(struct tcphdr), IPPROTO_TCP,
csum_partial((char *)tcph,
sizeof(struct tcphdr), 0));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 967bed71d4a..0f3dd932f0a 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -32,8 +32,8 @@ match(const struct sk_buff *skb,
unsigned char eui64[8];
int i = 0;
- if (!(skb->mac.raw >= skb->head &&
- (skb->mac.raw + ETH_HLEN) <= skb->data) &&
+ if (!(skb_mac_header(skb) >= skb->head &&
+ (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
offset != 0) {
*hotdrop = 1;
return 0;
@@ -42,7 +42,7 @@ match(const struct sk_buff *skb,
memset(eui64, 0, sizeof(eui64));
if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
- if (skb->nh.ipv6h->version == 0x6) {
+ if (ipv6_hdr(skb)->version == 0x6) {
memcpy(eui64, eth_hdr(skb)->h_source, 3);
memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
eui64[3] = 0xff;
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
eui64[0] |= 0x02;
i = 0;
- while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i])
+ while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i])
&& (i < 8))
i++;
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 37c8a4d4ed7..d606c0e6d6f 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -25,7 +25,7 @@ static int match(const struct sk_buff *skb,
int offset, unsigned int protoff, int *hotdrop)
{
const struct ip6t_hl_info *info = matchinfo;
- const struct ipv6hdr *ip6h = skb->nh.ipv6h;
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
switch (info->mode) {
case IP6T_HL_EQ:
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 700a11d25de..fd6a0869099 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -45,7 +45,7 @@ ipv6header_match(const struct sk_buff *skb,
/* Make sure this isn't an evil packet */
/* type of the 1st exthdr */
- nexthdr = skb->nh.ipv6h->nexthdr;
+ nexthdr = ipv6_hdr(skb)->nexthdr;
/* pointer to the 1st exthdr */
ptr = sizeof(struct ipv6hdr);
/* available length */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 112a21d0c6d..76f0cf66f95 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -102,7 +102,7 @@ ip6t_local_out_hook(unsigned int hook,
#if 0
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0c468d35a93..a9f10e32c16 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -7,8 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
*/
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -138,7 +136,7 @@ ip6t_local_hook(unsigned int hook,
#if 0
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
@@ -146,21 +144,21 @@ ip6t_local_hook(unsigned int hook,
#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
- memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
- memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
+ memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
mark = (*pskb)->mark;
- hop_limit = (*pskb)->nh.ipv6h->hop_limit;
+ hop_limit = ipv6_hdr(*pskb)->hop_limit;
/* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
+ flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
if (ret != NF_DROP && ret != NF_STOLEN
- && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
- || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
+ && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
+ || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
|| (*pskb)->mark != mark
- || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+ || ipv6_hdr(*pskb)->hop_limit != hop_limit))
return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d1102455668..6d2a0820511 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -7,17 +7,6 @@
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - support Layer 3 protocol independent connection tracking.
- * Based on the original ip_conntrack code which had the following
- * copyright information:
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add get_features() to support various size of conntrack
- * structures.
*/
#include <linux/types.h>
@@ -138,16 +127,10 @@ static int
ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
u_int8_t *protonum)
{
- unsigned int extoff;
- unsigned char pnum;
- int protoff;
-
- extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
- pnum = (*pskb)->nh.ipv6h->nexthdr;
-
- protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
- (*pskb)->len - extoff);
-
+ unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+ int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+ (*pskb)->len - extoff);
/*
* (protoff == (*pskb)->len) mean that the packet doesn't have no data
* except of IPv6 & ext headers. but it's tracked anyway. - YK
@@ -179,9 +162,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
struct nf_conn_help *help;
enum ip_conntrack_info ctinfo;
unsigned int ret, protoff;
- unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
- - (*pskb)->data;
- unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
+ unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
/* This is where we call the helper: as the packet goes out. */
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 075da4f287b..0be790d250f 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -7,13 +7,6 @@
*
* Author:
* Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - ICMPv6 tracking support. Derived from the original ip_conntrack code
- * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
- * copyright information:
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*/
#include <linux/types.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 15ab1e3e8b5..347ab760823 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -82,7 +82,7 @@ struct nf_ct_frag6_queue
struct sk_buff *fragments;
int len;
int meat;
- struct timeval stamp;
+ ktime_t stamp;
unsigned int csum;
__u8 last_in; /* has first/last segment arrived? */
#define COMPLETE 4
@@ -353,9 +353,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
- init_timer(&fq->timer);
- fq->timer.function = nf_ct_frag6_expire;
- fq->timer.data = (long) fq;
+ setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
spin_lock_init(&fq->lock);
atomic_set(&fq->refcnt, 1);
@@ -400,19 +398,20 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
}
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
- ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
DEBUGP("offset is too large.\n");
return -1;
}
- if (skb->ip_summed == CHECKSUM_COMPLETE)
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
- csum_partial(skb->nh.raw,
- (u8*)(fhdr + 1) - skb->nh.raw,
+ csum_partial(nh, (u8 *)(fhdr + 1) - nh,
0));
+ }
/* Is this the final fragment? */
if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -542,7 +541,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
fq->fragments = skb;
skb->dev = NULL;
- skb_get_timestamp(skb, &fq->stamp);
+ fq->stamp = skb->tstamp;
fq->meat += skb->len;
atomic_add(skb->truesize, &nf_ct_frag6_mem);
@@ -583,7 +582,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
- payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+ payload_len = ((head->data - skb_network_header(head)) -
+ sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
DEBUGP("payload len is too large.\n");
goto out_oversize;
@@ -624,15 +625,15 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
- head->nh.raw[fq->nhoffset] = head->h.raw[0];
+ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
- head->mac.raw += sizeof(struct frag_hdr);
- head->nh.raw += sizeof(struct frag_hdr);
+ head->mac_header += sizeof(struct frag_hdr);
+ head->network_header += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next;
- head->h.raw = head->data;
- skb_push(head, head->data - head->nh.raw);
+ skb_reset_transport_header(head);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &nf_ct_frag6_mem);
for (fp=head->next; fp; fp = fp->next) {
@@ -648,12 +649,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &fq->stamp);
- head->nh.ipv6h->payload_len = htons(payload_len);
+ head->tstamp = fq->stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+ head->csum = csum_partial(skb_network_header(head),
+ skb_network_header_len(head),
+ head->csum);
fq->fragments = NULL;
@@ -701,9 +704,10 @@ out_fail:
static int
find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
{
- u8 nexthdr = skb->nh.ipv6h->nexthdr;
- u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
- int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ const int netoff = skb_network_offset(skb);
+ u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
+ int start = netoff + sizeof(struct ipv6hdr);
int len = skb->len - start;
u8 prevhdr = NEXTHDR_IPV6;
@@ -759,7 +763,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
struct sk_buff *ret_skb = NULL;
/* Jumbo payload inhibits frag. header */
- if (skb->nh.ipv6h->payload_len == 0) {
+ if (ipv6_hdr(skb)->payload_len == 0) {
DEBUGP("payload len = 0\n");
return skb;
}
@@ -780,9 +784,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
goto ret_orig;
}
- clone->h.raw = clone->data + fhoff;
- hdr = clone->nh.ipv6h;
- fhdr = (struct frag_hdr *)clone->h.raw;
+ skb_set_transport_header(clone, fhoff);
+ hdr = ipv6_hdr(clone);
+ fhdr = (struct frag_hdr *)skb_transport_header(clone);
if (!(fhdr->frag_off & htons(0xFFF9))) {
DEBUGP("Invalid fragment offset\n");
@@ -864,8 +868,7 @@ int nf_ct_frag6_init(void)
nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
- init_timer(&nf_ct_frag6_secret_timer);
- nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
+ setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
nf_ct_frag6_secret_timer.expires = jiffies
+ nf_ct_frag6_secret_interval;
add_timer(&nf_ct_frag6_secret_timer);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index fa3fb509f18..920dc9cf6a8 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -23,12 +23,12 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/stddef.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/transp_v6.h>
#include <net/ipv6.h>
-#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *proc_net_devsnmp6;
static int fold_prot_inuse(struct proto *proto)
@@ -142,26 +142,13 @@ static struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_SENTINEL
};
-static unsigned long
-fold_field(void *mib[], int offt)
-{
- unsigned long res = 0;
- int i;
-
- for_each_possible_cpu(i) {
- res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
- res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
- }
- return res;
-}
-
static inline void
snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
{
int i;
for (i=0; itemlist[i].name; i++)
seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
- fold_field(mib, itemlist[i].entry));
+ snmp_fold_field(mib, itemlist[i].entry));
}
static int snmp6_seq_show(struct seq_file *seq, void *v)
@@ -236,6 +223,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
return -EINVAL;
remove_proc_entry(idev->stats.proc_dir_entry->name,
proc_net_devsnmp6);
+ idev->stats.proc_dir_entry = NULL;
return 0;
}
@@ -271,47 +259,3 @@ void ipv6_misc_proc_exit(void)
proc_net_remove("snmp6");
}
-#else /* CONFIG_PROC_FS */
-
-
-int snmp6_register_dev(struct inet6_dev *idev)
-{
- return 0;
-}
-
-int snmp6_unregister_dev(struct inet6_dev *idev)
-{
- return 0;
-}
-#endif /* CONFIG_PROC_FS */
-
-int snmp6_alloc_dev(struct inet6_dev *idev)
-{
- int err = -ENOMEM;
-
- if (!idev || !idev->dev)
- return -EINVAL;
-
- if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib),
- __alignof__(struct ipstats_mib)) < 0)
- goto err_ip;
- if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
- __alignof__(struct icmpv6_mib)) < 0)
- goto err_icmp;
-
- return 0;
-
-err_icmp:
- snmp6_mib_free((void **)idev->stats.ipv6);
-err_ip:
- return err;
-}
-
-int snmp6_free_dev(struct inet6_dev *idev)
-{
- snmp6_mib_free((void **)idev->stats.icmpv6);
- snmp6_mib_free((void **)idev->stats.ipv6);
- return 0;
-}
-
-
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index ef43bd57bae..f929f47b925 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -60,6 +60,8 @@ int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
return ret;
}
+EXPORT_SYMBOL(inet6_add_protocol);
+
/*
* Remove a protocol from the hash tables.
*/
@@ -83,3 +85,5 @@ int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
return ret;
}
+
+EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 203e069e7fe..009a1047fc3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
int delivered = 0;
__u8 hash;
- saddr = &skb->nh.ipv6h->saddr;
+ saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
hash = nexthdr & (MAX_INET_PROTOS - 1);
@@ -361,17 +361,18 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- skb_postpull_rcsum(skb, skb->nh.raw,
- skb->h.raw - skb->nh.raw);
- if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,
+ skb_postpull_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
+ if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
skb->len, inet->num, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,
- skb->len, inet->num, 0));
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len,
+ inet->num, 0));
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
@@ -420,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
msg->msg_flags |= MSG_TRUNC;
}
- if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+ if (skb_csum_unnecessary(skb)) {
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
} else if (msg->msg_flags&MSG_TRUNC) {
if (__skb_checksum_complete(skb))
@@ -438,7 +439,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (sin6) {
sin6->sin6_family = AF_INET6;
sin6->sin6_port = 0;
- ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -488,7 +489,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
goto out;
offset = rp->offset;
- total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+ total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
+ skb->data);
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
@@ -511,7 +513,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
if (csum_skb)
continue;
- len = skb->len - (skb->h.raw - skb->data);
+ len = skb->len - skb_transport_offset(skb);
if (offset >= len) {
offset -= len;
continue;
@@ -523,7 +525,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
skb = csum_skb;
}
- offset += skb->h.raw - skb->data;
+ offset += skb_transport_offset(skb);
if (skb_copy_bits(skb, offset, &csum, 2))
BUG();
@@ -575,11 +577,13 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
skb->priority = sk->sk_priority;
skb->dst = dst_clone(&rt->u.dst);
- skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+ skb_put(skb, length);
+ skb_reset_network_header(skb);
+ iph = ipv6_hdr(skb);
skb->ip_summed = CHECKSUM_NONE;
- skb->h.raw = skb->nh.raw;
+ skb->transport_header = skb->network_header;
err = memcpy_fromiovecend((void *)iph, from, 0, length);
if (err)
goto error_fault;
@@ -878,7 +882,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
return 0;
default:
return -ENOPROTOOPT;
- };
+ }
return 0;
}
@@ -903,7 +907,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
return 0;
default:
return -ENOPROTOOPT;
- };
+ }
return 0;
}
@@ -957,7 +961,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
default:
return ipv6_setsockopt(sk, level, optname, optval,
optlen);
- };
+ }
+
return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
}
@@ -978,7 +983,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
default:
return compat_ipv6_setsockopt(sk, level, optname,
optval, optlen);
- };
+ }
return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
}
#endif
@@ -1031,7 +1036,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
default:
return ipv6_getsockopt(sk, level, optname, optval,
optlen);
- };
+ }
+
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
}
@@ -1052,7 +1058,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
default:
return compat_ipv6_getsockopt(sk, level, optname,
optval, optlen);
- };
+ }
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
}
#endif
@@ -1073,7 +1079,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL)
- amount = skb->tail - skb->h.raw;
+ amount = skb->tail - skb->transport_header;
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7034c54e501..de795c04e34 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -88,7 +88,7 @@ struct frag_queue
int len;
int meat;
int iif;
- struct timeval stamp;
+ ktime_t stamp;
unsigned int csum;
__u8 last_in; /* has first/last segment arrived? */
#define COMPLETE 4
@@ -430,19 +430,24 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
- ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+ end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ ((u8 *)&fhdr->frag_off -
+ skb_network_header(skb)));
return;
}
- if (skb->ip_summed == CHECKSUM_COMPLETE)
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
- csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+ csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+ 0));
+ }
/* Is this the final fragment? */
if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -562,7 +567,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (skb->dev)
fq->iif = skb->dev->ifindex;
skb->dev = NULL;
- skb_get_timestamp(skb, &fq->stamp);
+ fq->stamp = skb->tstamp;
fq->meat += skb->len;
atomic_add(skb->truesize, &ip6_frag_mem);
@@ -605,7 +610,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
BUG_TRAP(FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
- payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+ payload_len = ((head->data - skb_network_header(head)) -
+ sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
@@ -639,15 +646,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
- head->nh.raw[nhoff] = head->h.raw[0];
+ skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
- head->mac.raw += sizeof(struct frag_hdr);
- head->nh.raw += sizeof(struct frag_hdr);
+ head->mac_header += sizeof(struct frag_hdr);
+ head->network_header += sizeof(struct frag_hdr);
skb_shinfo(head)->frag_list = head->next;
- head->h.raw = head->data;
- skb_push(head, head->data - head->nh.raw);
+ skb_reset_transport_header(head);
+ skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &ip6_frag_mem);
for (fp=head->next; fp; fp = fp->next) {
@@ -663,15 +670,17 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
head->next = NULL;
head->dev = dev;
- skb_set_timestamp(head, &fq->stamp);
- head->nh.ipv6h->payload_len = htons(payload_len);
+ head->tstamp = fq->stamp;
+ ipv6_hdr(head)->payload_len = htons(payload_len);
IP6CB(head)->nhoff = nhoff;
*skb_in = head;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+ head->csum = csum_partial(skb_network_header(head),
+ skb_network_header_len(head),
+ head->csum);
rcu_read_lock();
IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -699,33 +708,34 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
struct net_device *dev = skb->dev;
struct frag_hdr *fhdr;
struct frag_queue *fq;
- struct ipv6hdr *hdr;
-
- hdr = skb->nh.ipv6h;
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
if (hdr->payload_len==0) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ skb_network_header_len(skb));
return -1;
}
- if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
+ if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
+ sizeof(struct frag_hdr)))) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+ skb_network_header_len(skb));
return -1;
}
- hdr = skb->nh.ipv6h;
- fhdr = (struct frag_hdr *)skb->h.raw;
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
if (!(fhdr->frag_off & htons(0xFFF9))) {
/* It is not a fragmented frame */
- skb->h.raw += sizeof(struct frag_hdr);
+ skb->transport_header += sizeof(struct frag_hdr);
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
- IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
+ IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aebb4e2d5ae..b46ad53044b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -575,6 +575,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
return NULL;
}
+EXPORT_SYMBOL(rt6_lookup);
+
/* ip6_ins_rt is called with FREE table->tb6_lock.
It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may
@@ -724,7 +726,7 @@ out2:
void ip6_route_input(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
@@ -829,6 +831,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
}
+EXPORT_SYMBOL(ip6_route_output);
/*
* Destination cache support functions
@@ -1757,7 +1760,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
rtnl_unlock();
return err;
- };
+ }
return -EINVAL;
}
@@ -1772,7 +1775,7 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
int type;
switch (ipstats_mib_noroutes) {
case IPSTATS_MIB_INNOROUTES:
- type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
+ type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
break;
@@ -2012,7 +2015,7 @@ errout:
return err;
}
-int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
int err;
@@ -2024,7 +2027,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
return ip6_route_del(&cfg);
}
-int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib6_config cfg;
int err;
@@ -2161,7 +2164,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
prefix, NLM_F_MULTI);
}
-int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
struct nlattr *tb[RTA_MAX+1];
struct rt6_info *rt;
@@ -2215,7 +2218,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
/* Reserve room for dummy headers, this skb can pass
through good chunk of routing engine.
*/
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
@@ -2486,8 +2489,9 @@ ctl_table ipv6_route_table[] = {
void __init ip6_route_init(void)
{
+#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
-
+#endif
ip6_dst_ops.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
@@ -2505,6 +2509,10 @@ void __init ip6_route_init(void)
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
fib6_rules_init();
#endif
+
+ __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
+ __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
+ __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
}
void ip6_route_cleanup(void)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 08d6ed3396e..1efa95a99f4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -99,10 +99,10 @@ static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
return NULL;
}
-static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
{
- __be32 remote = t->parms.iph.daddr;
- __be32 local = t->parms.iph.saddr;
+ __be32 remote = parms->iph.daddr;
+ __be32 local = parms->iph.saddr;
unsigned h = 0;
int prio = 0;
@@ -117,6 +117,11 @@ static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
return &tunnels[prio][h];
}
+static inline struct ip_tunnel **ipip6_bucket(struct ip_tunnel *t)
+{
+ return __ipip6_bucket(&t->parms);
+}
+
static void ipip6_tunnel_unlink(struct ip_tunnel *t)
{
struct ip_tunnel **tp;
@@ -147,19 +152,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
__be32 local = parms->iph.saddr;
struct ip_tunnel *t, **tp, *nt;
struct net_device *dev;
- unsigned h = 0;
- int prio = 0;
char name[IFNAMSIZ];
- if (remote) {
- prio |= 2;
- h ^= HASH(remote);
- }
- if (local) {
- prio |= 1;
- h ^= HASH(local);
- }
- for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip6_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -224,8 +219,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
ICMP in the real Internet is absolutely infeasible.
*/
struct iphdr *iph = (struct iphdr*)skb->data;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err;
@@ -280,8 +275,8 @@ out:
struct iphdr *iph = (struct iphdr*)dp;
int hlen = iph->ihl<<2;
struct ipv6hdr *iph6;
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
int rel_type = 0;
int rel_code = 0;
int rel_info = 0;
@@ -296,14 +291,14 @@ out:
default:
return;
case ICMP_PARAMETERPROB:
- if (skb->h.icmph->un.gateway < hlen)
+ if (icmp_hdr(skb)->un.gateway < hlen)
return;
/* So... This guy found something strange INSIDE encapsulated
packet. Well, he is fool, but what can we do ?
*/
rel_type = ICMPV6_PARAMPROB;
- rel_info = skb->h.icmph->un.gateway - hlen;
+ rel_info = icmp_hdr(skb)->un.gateway - hlen;
break;
case ICMP_DEST_UNREACH:
@@ -340,7 +335,7 @@ out:
dst_release(skb2->dst);
skb2->dst = NULL;
skb_pull(skb2, skb->data - (u8*)iph6);
- skb2->nh.raw = skb2->data;
+ skb_reset_network_header(skb2);
/* Try to guess incoming interface */
rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
@@ -366,7 +361,7 @@ out:
static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
{
if (INET_ECN_is_ce(iph->tos))
- IP6_ECN_set_ce(skb->nh.ipv6h);
+ IP6_ECN_set_ce(ipv6_hdr(skb));
}
static int ipip6_rcv(struct sk_buff *skb)
@@ -377,13 +372,13 @@ static int ipip6_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
read_lock(&ipip6_lock);
if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
secpath_reset(skb);
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb->mac_header = skb->network_header;
+ skb_reset_network_header(skb);
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
skb->pkt_type = PACKET_HOST;
@@ -430,7 +425,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *tiph = &tunnel->parms.iph;
- struct ipv6hdr *iph6 = skb->nh.ipv6h;
+ struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
@@ -468,7 +463,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
addr_type = ipv6_addr_type(addr6);
if (addr_type == IPV6_ADDR_ANY) {
- addr6 = &skb->nh.ipv6h->daddr;
+ addr6 = &ipv6_hdr(skb)->daddr;
addr_type = ipv6_addr_type(addr6);
}
@@ -550,11 +545,12 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_owner_w(new_skb, skb->sk);
dev_kfree_skb(skb);
skb = new_skb;
- iph6 = skb->nh.ipv6h;
+ iph6 = ipv6_hdr(skb);
}
- skb->h.raw = skb->nh.raw;
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb->transport_header = skb->network_header;
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags = 0;
dst_release(skb->dst);
@@ -564,7 +560,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
if (mtu > IPV6_MIN_MTU)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 92f99927d12..e2f25ea43b6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -115,10 +115,10 @@ static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
{
- return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
- skb->nh.ipv6h->saddr.s6_addr32,
- skb->h.th->dest,
- skb->h.th->source);
+ return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+ ipv6_hdr(skb)->saddr.s6_addr32,
+ tcp_hdr(skb)->dest,
+ tcp_hdr(skb)->source);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -486,7 +486,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
struct sk_buff *pktopts = treq->pktopts;
struct inet6_skb_parm *rxopt = IP6CB(pktopts);
if (rxopt->srcrt)
- opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
+ opt = ipv6_invert_rthdr(sk,
+ (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+ rxopt->srcrt));
}
if (opt && opt->srcrt) {
@@ -507,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
skb = tcp_make_synack(sk, dst, req);
if (skb) {
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
th->check = tcp_v6_check(th, skb->len,
&treq->loc_addr, &treq->rmt_addr,
@@ -835,8 +837,8 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
{
__u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
- struct ipv6hdr *ip6h = skb->nh.ipv6h;
- struct tcphdr *th = skb->h.th;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff << 2) - sizeof (*th);
int genhash;
u8 *ptr;
@@ -944,10 +946,11 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcphdr *th = skb->h.th;
+ struct tcphdr *th = tcp_hdr(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
@@ -964,12 +967,13 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(*th)))
return -EINVAL;
- ipv6h = skb->nh.ipv6h;
- th = skb->h.th;
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
th->check = 0;
th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
IPPROTO_TCP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
@@ -977,7 +981,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
{
- struct tcphdr *th = skb->h.th, *t1;
+ struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
struct flowi fl;
int tot_len = sizeof(*th);
@@ -993,7 +997,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_TCP_MD5SIG
if (sk)
- key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr);
+ key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
else
key = NULL;
@@ -1037,20 +1041,18 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
- tcp_v6_do_calc_md5_hash((__u8*)&opt[1],
- key,
- &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr,
- t1, IPPROTO_TCP,
- tot_len);
+ tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
+ &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr,
+ t1, IPPROTO_TCP, tot_len);
}
#endif
buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
sizeof(*t1), IPPROTO_TCP,
@@ -1079,7 +1081,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
{
- struct tcphdr *th = skb->h.th, *t1;
+ struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
struct flowi fl;
int tot_len = sizeof(struct tcphdr);
@@ -1091,7 +1093,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
#ifdef CONFIG_TCP_MD5SIG
if (!tw && skb->sk) {
- key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr);
+ key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
} else if (tw && tw->tw_md5_keylen) {
tw_key.key = tw->tw_md5_key;
tw_key.keylen = tw->tw_md5_keylen;
@@ -1140,20 +1142,18 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
if (key) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_do_calc_md5_hash((__u8 *)topt,
- key,
- &skb->nh.ipv6h->daddr,
- &skb->nh.ipv6h->saddr,
- t1, IPPROTO_TCP,
- tot_len);
+ tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
+ &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr,
+ t1, IPPROTO_TCP, tot_len);
}
#endif
buff->csum = csum_partial((char *)t1, tot_len, 0);
memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
tot_len, IPPROTO_TCP,
@@ -1197,18 +1197,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
struct request_sock *req, **prev;
- const struct tcphdr *th = skb->h.th;
+ const struct tcphdr *th = tcp_hdr(skb);
struct sock *nsk;
/* Find possible connection requests. */
req = inet6_csk_search_req(sk, &prev, th->source,
- &skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, inet6_iif(skb));
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, inet6_iif(skb));
if (req)
return tcp_check_req(sk, skb, req, prev);
- nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
- th->source, &skb->nh.ipv6h->daddr,
+ nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
+ th->source, &ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (nsk) {
@@ -1275,9 +1275,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
treq = inet6_rsk(req);
- ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
- ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
- TCP_ECN_create_request(req, skb->h.th);
+ ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+ TCP_ECN_create_request(req, tcp_hdr(skb));
treq->pktopts = NULL;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -1363,7 +1363,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1389,7 +1389,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
opt == NULL && treq->pktopts) {
struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
if (rxopt->srcrt)
- opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
+ opt = ipv6_invert_rthdr(sk,
+ (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
+ rxopt->srcrt));
}
if (dst == NULL) {
@@ -1469,7 +1471,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+ newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
/* Clone native IPv6 options from listening socket (if any)
@@ -1528,15 +1530,16 @@ out:
static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,skb->csum)) {
+ if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
}
}
- skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr, 0));
+ skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
+ &ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0));
if (skb->len <= 76) {
return __skb_checksum_complete(skb);
@@ -1600,7 +1603,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
TCP_CHECK_TIMER(sk);
if (opt_skb)
@@ -1608,7 +1611,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+ if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
@@ -1631,7 +1634,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
TCP_CHECK_TIMER(sk);
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
TCP_CHECK_TIMER(sk);
if (opt_skb)
@@ -1664,7 +1667,7 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
np->mcast_oif = inet6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+ np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1697,28 +1700,27 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
- th = skb->h.th;
+ th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr)/4)
goto bad_packet;
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
- if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
- tcp_v6_checksum_init(skb)))
+ if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
goto bad_packet;
- th = skb->h.th;
+ th = tcp_hdr(skb);
TCP_SKB_CB(skb)->seq = ntohl(th->seq);
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
+ TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
- &skb->nh.ipv6h->daddr, ntohs(th->dest),
+ sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest),
inet6_iif(skb));
if (!sk)
@@ -1798,7 +1800,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(&tcp_hashinfo,
- &skb->nh.ipv6h->daddr,
+ &ipv6_hdr(skb)->daddr,
ntohs(th->dest), inet6_iif(skb));
if (sk2 != NULL) {
struct inet_timewait_sock *tw = inet_twsk(sk);
@@ -1945,6 +1947,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
return inet6_destroy_sock(sk);
}
+#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
struct sock *sk, struct request_sock *req, int i, int uid)
@@ -2061,7 +2064,6 @@ static void get_timewait6_sock(struct seq_file *seq,
atomic_read(&tw->tw_refcnt), tw);
}
-#ifdef CONFIG_PROC_FS
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f590db57a7c..b083c09e3d2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -93,10 +93,10 @@ static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
continue;
score++;
}
- if(score == 4) {
+ if (score == 4) {
result = sk;
break;
- } else if(score > badness) {
+ } else if (score > badness) {
result = sk;
badness = score;
}
@@ -120,8 +120,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
- size_t copied;
- int err, copy_only, is_udplite = IS_UDPLITE(sk);
+ unsigned int ulen, copied;
+ int err;
+ int is_udplite = IS_UDPLITE(sk);
if (addr_len)
*addr_len=sizeof(struct sockaddr_in6);
@@ -134,24 +135,25 @@ try_again:
if (!skb)
goto out;
- copied = skb->len - sizeof(struct udphdr);
- if (copied > len) {
- copied = len;
+ ulen = skb->len - sizeof(struct udphdr);
+ copied = len;
+ if (copied > ulen)
+ copied = ulen;
+ else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
- }
/*
- * Decide whether to checksum and/or copy data.
+ * If checksum is needed at all, try to do it while copying the
+ * data. If the data is truncated, or if we only want a partial
+ * coverage checksum (UDP-Lite), do it before the copy.
*/
- copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
- if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
- if (__udp_lib_checksum_complete(skb))
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
- copy_only = 1;
}
- if (copy_only)
+ if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied );
else {
@@ -170,15 +172,16 @@ try_again:
sin6 = (struct sockaddr_in6 *) msg->msg_name;
sin6->sin6_family = AF_INET6;
- sin6->sin6_port = skb->h.uh->source;
+ sin6->sin6_port = udp_hdr(skb)->source;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
if (skb->protocol == htons(ETH_P_IP))
ipv6_addr_set(&sin6->sin6_addr, 0, 0,
- htonl(0xffff), skb->nh.iph->saddr);
+ htonl(0xffff), ip_hdr(skb)->saddr);
else {
- ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin6->sin6_addr,
+ &ipv6_hdr(skb)->saddr);
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6->sin6_scope_id = IP6CB(skb)->iif;
}
@@ -194,7 +197,7 @@ try_again:
err = copied;
if (flags & MSG_TRUNC)
- err = skb->len - sizeof(struct udphdr);
+ err = ulen;
out_free:
skb_free_datagram(sk, skb);
@@ -279,8 +282,10 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (udp_lib_checksum_complete(skb))
- goto drop;
+ if (sk->sk_filter) {
+ if (udp_lib_checksum_complete(skb))
+ goto drop;
+ }
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
@@ -325,7 +330,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
continue;
}
- if(!inet6_mc_check(s, loc_addr, rmt_addr))
+ if (!inet6_mc_check(s, loc_addr, rmt_addr))
continue;
return s;
}
@@ -341,7 +346,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
struct in6_addr *daddr, struct hlist_head udptable[])
{
struct sock *sk, *sk2;
- const struct udphdr *uh = skb->h.uh;
+ const struct udphdr *uh = udp_hdr(skb);
int dif;
read_lock(&udp_hash_lock);
@@ -366,9 +371,20 @@ out:
return 0;
}
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
-
+static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
+ int proto)
{
+ int err;
+
+ UDP_SKB_CB(skb)->partial_cov = 0;
+ UDP_SKB_CB(skb)->cscov = skb->len;
+
+ if (proto == IPPROTO_UDPLITE) {
+ err = udplite_checksum_init(skb, uh);
+ if (err)
+ return err;
+ }
+
if (uh->check == 0) {
/* RFC 2460 section 8.1 says that we SHOULD log
this error. Well, it is reasonable.
@@ -377,21 +393,20 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
return 1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE &&
- !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
- skb->len, IPPROTO_UDP, skb->csum ))
+ !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+ skb->len, proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
- &skb->nh.ipv6h->daddr,
- skb->len, IPPROTO_UDP,
- 0));
+ if (!skb_csum_unnecessary(skb))
+ skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len, proto, 0));
- return (UDP_SKB_CB(skb)->partial_cov = 0);
+ return 0;
}
int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
- int is_udplite)
+ int proto)
{
struct sk_buff *skb = *pskb;
struct sock *sk;
@@ -403,15 +418,16 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto short_packet;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
- uh = skb->h.uh;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+ uh = udp_hdr(skb);
ulen = ntohs(uh->len);
if (ulen > skb->len)
goto short_packet;
- if(! is_udplite ) { /* UDP validates ulen. */
+ if (proto == IPPROTO_UDP) {
+ /* UDP validates ulen. */
/* Check for jumbo payload */
if (ulen == 0)
@@ -423,19 +439,15 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
if (ulen < skb->len) {
if (pskb_trim_rcsum(skb, ulen))
goto short_packet;
- saddr = &skb->nh.ipv6h->saddr;
- daddr = &skb->nh.ipv6h->daddr;
- uh = skb->h.uh;
+ saddr = &ipv6_hdr(skb)->saddr;
+ daddr = &ipv6_hdr(skb)->daddr;
+ uh = udp_hdr(skb);
}
-
- if (udp6_csum_init(skb, uh))
- goto discard;
-
- } else { /* UDP-Lite validates cscov. */
- if (udplite6_csum_init(skb, uh))
- goto discard;
}
+ if (udp6_csum_init(skb, uh, proto))
+ goto discard;
+
/*
* Multicast receive code
*/
@@ -457,33 +469,34 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto discard;
- UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+ UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
kfree_skb(skb);
- return(0);
+ return 0;
}
/* deliver */
udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- return(0);
+ return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
- is_udplite? "-Lite" : "", ulen, skb->len);
+ proto == IPPROTO_UDPLITE ? "-Lite" : "",
+ ulen, skb->len);
discard:
- UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
- return(0);
+ return 0;
}
static __inline__ int udpv6_rcv(struct sk_buff **pskb)
{
- return __udp6_lib_rcv(pskb, udp_hash, 0);
+ return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
}
/*
@@ -521,7 +534,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
/*
* Create a UDP header
*/
- uh = skb->h.uh;
+ uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 629f97162fb..f54016a5500 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -19,7 +19,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
static int udplitev6_rcv(struct sk_buff **pskb)
{
- return __udp6_lib_rcv(pskb, udplite_hash, 1);
+ return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
}
static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 31f651f9509..d7ed8aa56ec 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -28,14 +28,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
unsigned int nhoff;
nhoff = IP6CB(skb)->nhoff;
- nexthdr = skb->nh.raw[nhoff];
+ nexthdr = skb_network_header(skb)[nhoff];
seq = 0;
if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
goto drop;
do {
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
if (xfrm_nr == XFRM_MAX_DEPTH)
goto drop;
@@ -58,7 +58,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
if (nexthdr <= 0)
goto drop_unlock;
- skb->nh.raw[nhoff] = nexthdr;
+ skb_network_header(skb)[nhoff] = nexthdr;
if (x->props.replay_window)
xfrm_replay_advance(x, seq);
@@ -112,8 +112,8 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
return -1;
} else {
#ifdef CONFIG_NETFILTER
- skb->nh.ipv6h->payload_len = htons(skb->len);
- __skb_push(skb, skb->data - skb->nh.raw);
+ ipv6_hdr(skb)->payload_len = htons(skb->len);
+ __skb_push(skb, skb->data - skb_network_header(skb));
NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
ip6_rcv_finish);
@@ -140,19 +140,19 @@ int xfrm6_rcv(struct sk_buff **pskb)
return xfrm6_rcv_spi(*pskb, 0);
}
+EXPORT_SYMBOL(xfrm6_rcv);
+
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto)
{
struct xfrm_state *x = NULL;
int wildcard = 0;
- struct in6_addr any;
xfrm_address_t *xany;
struct xfrm_state *xfrm_vec_one = NULL;
int nh = 0;
int i = 0;
- ipv6_addr_set(&any, 0, 0, 0, 0);
- xany = (xfrm_address_t *)&any;
+ xany = (xfrm_address_t *)&in6addr_any;
for (i = 0; i < 3; i++) {
xfrm_address_t *dst, *src;
@@ -247,3 +247,5 @@ drop:
xfrm_state_put(xfrm_vec_one);
return -1;
}
+
+EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index edcfffa9e87..2e61d6ddece 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -38,17 +38,18 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
- skb->nh.raw = prevhdr - x->props.header_len;
- skb->h.raw = skb->data + hdr_len;
+ skb_set_network_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_transport_header(skb, hdr_len);
memmove(skb->data, iph, hdr_len);
- skb->nh.raw = skb->data;
- top_iph = skb->nh.ipv6h;
- skb->nh.raw = &top_iph->nexthdr;
- skb->h.ipv6h = top_iph + 1;
+ skb_reset_network_header(skb);
+ top_iph = ipv6_hdr(skb);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ skb->network_header += offsetof(struct ipv6hdr, nexthdr);
ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
@@ -59,6 +60,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *ip6h;
+ const unsigned char *old_mac;
int size = sizeof(struct ipv6hdr);
int err = -EINVAL;
@@ -66,13 +68,14 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
skb_push(skb, size);
- memmove(skb->data, skb->nh.raw, size);
- skb->nh.raw = skb->data;
+ memmove(skb->data, skb_network_header(skb), size);
+ skb_reset_network_header(skb);
- skb->mac.raw = memmove(skb->data - skb->mac_len,
- skb->mac.raw, skb->mac_len);
+ old_mac = skb_mac_header(skb);
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
- ip6h = skb->nh.ipv6h;
+ ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(skb->len - size);
ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 6031c16d46c..6ad6d7ac6bd 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -50,11 +50,12 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- skb->nh.raw = prevhdr - x->props.header_len;
- skb->h.raw = skb->data + hdr_len;
+ skb_set_network_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_transport_header(skb, hdr_len);
memmove(skb->data, iph, hdr_len);
return 0;
}
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 3a4b39b12ba..c026bfea820 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -32,11 +32,12 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
int hdr_len;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- skb->nh.raw = prevhdr - x->props.header_len;
- skb->h.raw = skb->data + hdr_len;
+ skb_set_network_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_transport_header(skb, hdr_len);
memmove(skb->data, iph, hdr_len);
return 0;
}
@@ -51,13 +52,16 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
- int ihl = skb->data - skb->h.raw;
+ int ihl = skb->data - skb_transport_header(skb);
- if (skb->h.raw != skb->nh.raw)
- skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
- skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
sizeof(struct ipv6hdr));
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 0bc866c0d83..a6c0cdf46ad 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,8 +18,8 @@
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
- struct ipv6hdr *outer_iph = skb->nh.ipv6h;
- struct ipv6hdr *inner_iph = skb->h.ipv6h;
+ struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
IP6_ECN_set_ce(inner_iph);
@@ -27,8 +27,8 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
{
- if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h)))
- IP_ECN_set_ce(skb->h.ipiph);
+ if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
+ IP_ECN_set_ce(ipip_hdr(skb));
}
/* Add encapsulation header.
@@ -51,12 +51,12 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
int dsfield;
skb_push(skb, x->props.header_len);
- iph = skb->nh.ipv6h;
+ iph = ipv6_hdr(skb);
- skb->nh.raw = skb->data;
- top_iph = skb->nh.ipv6h;
- skb->nh.raw = &top_iph->nexthdr;
- skb->h.ipv6h = top_iph + 1;
+ skb_reset_network_header(skb);
+ top_iph = ipv6_hdr(skb);
+ skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+ skb->network_header += offsetof(struct ipv6hdr, nexthdr);
top_iph->version = 6;
if (xdst->route->ops->family == AF_INET6) {
@@ -86,9 +86,11 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
+ const unsigned char *old_mac;
+ const unsigned char *nh = skb_network_header(skb);
- if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
- && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
+ if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
+ nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
goto out;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
@@ -97,9 +99,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
(err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
goto out;
- if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
+ nh = skb_network_header(skb);
+ if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+ ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
} else {
@@ -107,9 +110,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
ip6ip_ecn_decapsulate(skb);
skb->protocol = htons(ETH_P_IP);
}
- skb->mac.raw = memmove(skb->data - skb->mac_len,
- skb->mac.raw, skb->mac_len);
- skb->nh.raw = skb->data;
+ old_mac = skb_mac_header(skb);
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ skb_reset_network_header(skb);
err = 0;
out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d6d786b89d2..56364a5f676 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,6 +23,8 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
return ip6_find_1stfragopt(skb, prevhdr);
}
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+
static int xfrm6_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
@@ -76,11 +78,11 @@ static int xfrm6_output_one(struct sk_buff *skb)
x->curlft.bytes += skb->len;
x->curlft.packets++;
if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
- x->lastused = (u64)xtime.tv_sec;
+ x->lastused = get_seconds();
spin_unlock_bh(&x->lock);
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (!(skb->dst = dst_pop(dst))) {
err = -EHOSTUNREACH;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d8a585bd2cb..1faa2ea80af 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -240,7 +240,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
if (!afinfo) {
dst = *dst_p;
goto error;
- };
+ }
+
dst_prev->output = afinfo->output;
xfrm_state_put_afinfo(afinfo);
/* Sheit... I remember I did this right. Apparently,
@@ -270,17 +271,19 @@ error:
static inline void
_decode_session6(struct sk_buff *skb, struct flowi *fl)
{
- u16 offset = skb->h.raw - skb->nh.raw;
- struct ipv6hdr *hdr = skb->nh.ipv6h;
+ u16 offset = skb_network_header_len(skb);
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
struct ipv6_opt_hdr *exthdr;
- u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
+ const unsigned char *nh = skb_network_header(skb);
+ u8 nexthdr = nh[IP6CB(skb)->nhoff];
memset(fl, 0, sizeof(struct flowi));
ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
- while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+ nh = skb_network_header(skb);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
switch (nexthdr) {
case NEXTHDR_ROUTING:
@@ -288,7 +291,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
case NEXTHDR_DEST:
offset += ipv6_optlen(exthdr);
nexthdr = exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
break;
case IPPROTO_UDP:
@@ -296,7 +299,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
- if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+ if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
__be16 *ports = (__be16 *)exthdr;
fl->fl_ip_sport = ports[0];
@@ -306,7 +309,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
return;
case IPPROTO_ICMPV6:
- if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+ if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
u8 *icmp = (u8 *)exthdr;
fl->fl_icmp_type = icmp[0];
@@ -317,7 +320,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
#ifdef CONFIG_IPV6_MIP6
case IPPROTO_MH:
- if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+ if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
struct ip6_mh *mh;
mh = (struct ip6_mh *)exthdr;
@@ -335,7 +338,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
fl->fl_ipsec_spi = 0;
fl->proto = nexthdr;
return;
- };
+ }
}
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 93c42232aa3..5502cc948df 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -257,11 +257,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_tunnel_rcv(struct sk_buff *skb)
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
__be32 spi;
spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
- return xfrm6_rcv_spi(skb, spi);
+ return xfrm6_rcv_spi(skb, spi) > 0 ? : 0;
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index cac35a77f06..392f8bc9269 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -576,7 +576,9 @@ static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
skb2 = alloc_skb(len, GFP_ATOMIC);
if (skb2) {
skb_reserve(skb2, out_offset);
- skb2->nh.raw = skb2->h.raw = skb_put(skb2, skb->len);
+ skb_reset_network_header(skb2);
+ skb_reset_transport_header(skb2);
+ skb_put(skb2, skb->len);
memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
}
@@ -1807,8 +1809,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
copied);
if (rc)
goto out_free;
- if (skb->tstamp.off_sec)
- skb_get_timestamp(skb, &sk->sk_stamp);
+ if (skb->tstamp.tv64)
+ sk->sk_stamp = skb->tstamp;
msg->msg_namelen = sizeof(*sipx);
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 8e1cad971f1..e16c1142352 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -203,7 +203,9 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
skb->sk = sk;
/* Fill in IPX header */
- skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxhdr));
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_put(skb, sizeof(struct ipxhdr));
ipx = ipx_hdr(skb);
ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
IPX_SKB_CB(skb)->ipx_tctrl = 0;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0eb7d596d47..06c97c60d54 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -89,7 +89,6 @@ static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
self = instance;
sk = instance;
- IRDA_ASSERT(sk != NULL, return -1;);
err = sock_queue_rcv_skb(sk, skb);
if (err) {
@@ -131,14 +130,12 @@ static void irda_disconnect_indication(void *instance, void *sap,
}
/* Prevent race conditions with irda_release() and irda_shutdown() */
+ bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
- lock_sock(sk);
sk->sk_state = TCP_CLOSE;
- sk->sk_err = ECONNRESET;
sk->sk_shutdown |= SEND_SHUTDOWN;
sk->sk_state_change(sk);
- release_sock(sk);
/* Close our TSAP.
* If we leave it open, IrLMP put it back into the list of
@@ -158,6 +155,7 @@ static void irda_disconnect_indication(void *instance, void *sap,
self->tsap = NULL;
}
}
+ bh_unlock_sock(sk);
/* Note : once we are there, there is not much you want to do
* with the socket anymore, apart from closing it.
@@ -220,7 +218,7 @@ static void irda_connect_confirm(void *instance, void *sap,
break;
default:
self->max_data_size = irttp_get_max_seg_size(self->tsap);
- };
+ }
IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
self->max_data_size);
@@ -283,7 +281,7 @@ static void irda_connect_indication(void *instance, void *sap,
break;
default:
self->max_data_size = irttp_get_max_seg_size(self->tsap);
- };
+ }
IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
self->max_data_size);
@@ -306,8 +304,6 @@ static void irda_connect_response(struct irda_sock *self)
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return;);
-
skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
GFP_ATOMIC);
if (skb == NULL) {
@@ -337,7 +333,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
self = instance;
sk = instance;
- IRDA_ASSERT(sk != NULL, return;);
+ BUG_ON(sk == NULL);
switch (flow) {
case FLOW_STOP:
@@ -449,7 +445,7 @@ static void irda_discovery_timeout(u_long priv)
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
self = (struct irda_sock *) priv;
- IRDA_ASSERT(self != NULL, return;);
+ BUG_ON(self == NULL);
/* Nothing for the caller */
self->cachelog = NULL;
@@ -546,8 +542,6 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
{
IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name);
- IRDA_ASSERT(self != NULL, return -1;);
-
if (self->iriap) {
IRDA_WARNING("%s(): busy with a previous query\n",
__FUNCTION__);
@@ -635,8 +629,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name);
- IRDA_ASSERT(self != NULL, return -1;);
-
/* Ask lmp for the current discovery log
* Note : we have to use irlmp_get_discoveries(), as opposed
* to play with the cachelog directly, because while we are
@@ -784,8 +776,6 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct irda_sock *self = irda_sk(sk);
int err;
- IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
if (addr_len != sizeof(struct sockaddr_irda))
@@ -841,8 +831,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return -1;);
-
err = irda_create(newsock, sk->sk_protocol);
if (err)
return err;
@@ -873,44 +861,28 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
* calling us, the data is waiting for us ;-)
* Jean II
*/
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- int ret = 0;
- DECLARE_WAITQUEUE(waitq, current);
+ while (1) {
+ skb = skb_dequeue(&sk->sk_receive_queue);
+ if (skb)
+ break;
/* Non blocking operation */
if (flags & O_NONBLOCK)
return -EWOULDBLOCK;
- /* The following code is a cut'n'paste of the
- * wait_event_interruptible() macro.
- * We don't us the macro because the condition has
- * side effects : we want to make sure that only one
- * skb get dequeued - Jean II */
- add_wait_queue(sk->sk_sleep, &waitq);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb != NULL)
- break;
- if (!signal_pending(current)) {
- schedule();
- continue;
- }
- ret = -ERESTARTSYS;
- break;
- }
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &waitq);
- if(ret)
- return -ERESTARTSYS;
+ err = wait_event_interruptible(*(sk->sk_sleep),
+ skb_peek(&sk->sk_receive_queue));
+ if (err)
+ return err;
}
newsk = newsock->sk;
+ if (newsk == NULL)
+ return -EIO;
+
newsk->sk_state = TCP_ESTABLISHED;
new = irda_sk(newsk);
- IRDA_ASSERT(new != NULL, return -1;);
/* Now attach up the new socket */
new->tsap = irttp_dup(self->tsap, new);
@@ -1061,7 +1033,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_ESTABLISHED) {
sock->state = SS_UNCONNECTED;
- return sock_error(sk); /* Always set at this point */
+ err = sock_error(sk);
+ return err? err : -ECONNRESET;
}
sock->state = SS_CONNECTED;
@@ -1171,8 +1144,6 @@ static void irda_destroy_socket(struct irda_sock *self)
{
IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
- IRDA_ASSERT(self != NULL, return;);
-
/* Unregister with IrLMP */
irlmp_unregister_client(self->ckey);
irlmp_unregister_service(self->skey);
@@ -1274,7 +1245,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct irda_sock *self;
struct sk_buff *skb;
- unsigned char *asmptr;
int err;
IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1292,7 +1262,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
return -ENOTCONN;
self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
/* Check if IrTTP is wants us to slow down */
@@ -1317,9 +1286,9 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
return -ENOBUFS;
skb_reserve(skb, self->max_header_size + 16);
-
- asmptr = skb->h.raw = skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ skb_reset_transport_header(skb);
+ skb_put(skb, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1355,16 +1324,16 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(!sock_error(sk), return -1;);
+ if ((err = sock_error(sk)) < 0)
+ return err;
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
if (!skb)
return err;
- skb->h.raw = skb->data;
- copied = skb->len;
+ skb_reset_transport_header(skb);
+ copied = skb->len;
if (copied > size) {
IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n",
@@ -1403,13 +1372,13 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
struct irda_sock *self = irda_sk(sk);
int noblock = flags & MSG_DONTWAIT;
size_t copied = 0;
- int target = 1;
- DECLARE_WAITQUEUE(waitq, current);
+ int target, err;
+ long timeo;
IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
- IRDA_ASSERT(self != NULL, return -1;);
- IRDA_ASSERT(!sock_error(sk), return -1;);
+ if ((err = sock_error(sk)) < 0)
+ return err;
if (sock->flags & __SO_ACCEPTCON)
return(-EINVAL);
@@ -1417,8 +1386,8 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- if (flags & MSG_WAITALL)
- target = size;
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+ timeo = sock_rcvtimeo(sk, noblock);
msg->msg_namelen = 0;
@@ -1426,19 +1395,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
int chunk;
struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb==NULL) {
+ if (skb == NULL) {
+ DEFINE_WAIT(wait);
int ret = 0;
if (copied >= target)
break;
- /* The following code is a cut'n'paste of the
- * wait_event_interruptible() macro.
- * We don't us the macro because the test condition
- * is messy. - Jean II */
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- add_wait_queue(sk->sk_sleep, &waitq);
- set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
/*
* POSIX 1003.1g mandates this order.
@@ -1451,17 +1415,17 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
else if (noblock)
ret = -EAGAIN;
else if (signal_pending(current))
- ret = -ERESTARTSYS;
+ ret = sock_intr_errno(timeo);
+ else if (sk->sk_state != TCP_ESTABLISHED)
+ ret = -ENOTCONN;
else if (skb_peek(&sk->sk_receive_queue) == NULL)
/* Wait process until data arrives */
schedule();
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &waitq);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ finish_wait(sk->sk_sleep, &wait);
- if(ret)
- return(ret);
+ if (ret)
+ return ret;
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
@@ -1530,7 +1494,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct irda_sock *self;
struct sk_buff *skb;
- unsigned char *asmptr;
int err;
IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1547,7 +1510,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
return -ENOTCONN;
self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
/*
* Check that we don't send out too big frames. This is an unreliable
@@ -1566,10 +1528,11 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
return -ENOBUFS;
skb_reserve(skb, self->max_header_size);
+ skb_reset_transport_header(skb);
IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
- asmptr = skb->h.raw = skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ skb_put(skb, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1602,7 +1565,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
__u8 pid = 0;
int bound = 0;
struct sk_buff *skb;
- unsigned char *asmptr;
int err;
IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1616,7 +1578,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
}
self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
/* Check if an address was specified with sendto. Jean II */
if (msg->msg_name) {
@@ -1662,10 +1623,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
return -ENOBUFS;
skb_reserve(skb, self->max_header_size);
+ skb_reset_transport_header(skb);
IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
- asmptr = skb->h.raw = skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ skb_put(skb, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1689,8 +1651,6 @@ static int irda_shutdown(struct socket *sock, int how)
struct sock *sk = sock->sk;
struct irda_sock *self = irda_sk(sk);
- IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self);
sk->sk_state = TCP_CLOSE;
@@ -1863,8 +1823,6 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
struct ias_attrib * ias_attr; /* Attribute in IAS object */
int opt;
- IRDA_ASSERT(self != NULL, return -1;);
-
IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
if (level != SOL_IRLMP)
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 01d7c9c7b3b..e5e4792a031 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -133,8 +133,8 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
* Inserting is a little bit tricky since we don't know how much
* room we will need. But this should hopefully work OK
*/
- count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb),
- &ircomm_param_info);
+ count = irda_param_insert(self, pi, skb_tail_pointer(skb),
+ skb_tailroom(skb), &ircomm_param_info);
if (count < 0) {
IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__);
spin_unlock_irqrestore(&self->spinlock, flags);
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index e717801b38f..7b5def1ea63 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(alloc_irdadev);
dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
{
struct dongle_reg *reg;
- dongle_t *dongle = NULL;
+ dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
might_sleep();
@@ -397,19 +397,14 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
if (!reg || !try_module_get(reg->owner) ) {
IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
type);
- goto out;
+ kfree(dongle);
+ dongle = NULL;
+ }
+ if (dongle) {
+ /* Bind the registration info to this particular instance */
+ dongle->issue = reg;
+ dongle->dev = dev;
}
-
- /* Allocate dongle info for this instance */
- dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
- if (!dongle)
- goto out;
-
- /* Bind the registration info to this particular instance */
- dongle->issue = reg;
- dongle->dev = dev;
-
- out:
spin_unlock(&dongles->hb_spinlock);
return dongle;
}
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index fcf9d659962..ed69773b0f8 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1039,7 +1039,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
}
/* Insert at end of sk-buffer */
- frame = skb->tail;
+ frame = skb_tail_pointer(skb);
/* Make space for data */
if (skb_tailroom(skb) < (param_len+value_len+3)) {
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 672ab3f6903..c421521c0a9 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -234,8 +234,7 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
* might have been previously set by the low level IrDA network
* device driver
*/
- skb->dev = self->dev;
- skb->protocol=eth_type_trans(skb, skb->dev); /* Remove eth header */
+ skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */
self->stats.rx_packets++;
self->stats.rx_bytes += skb->len;
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 7b6433fe1dc..0b02073ffdf 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -590,7 +590,7 @@ static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
if (!self->discovery_log) {
IRDA_WARNING("%s: discovery log is gone! "
"maybe the discovery timeout has been set"
- " to short?\n", __FUNCTION__);
+ " too short?\n", __FUNCTION__);
break;
}
hashbin_insert(self->discovery_log,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 0b04603e9c4..3c5a68e3641 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -93,7 +93,9 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
{
/* Some common init stuff */
skb->dev = self->netdev;
- skb->h.raw = skb->nh.raw = skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
skb->protocol = htons(ETH_P_IRDA);
skb->priority = TC_PRIO_BESTEFFORT;
@@ -411,7 +413,7 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
@@ -482,7 +484,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
char *text;
if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
@@ -526,7 +528,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
/* Check if things are sane at this point... */
if((discovery_info == NULL) ||
!pskb_may_pull(skb, 3)) {
- IRDA_ERROR("%s: discovery frame to short!\n",
+ IRDA_ERROR("%s: discovery frame too short!\n",
__FUNCTION__);
return;
}
@@ -1171,7 +1173,7 @@ static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
IRDA_ASSERT(info != NULL, return;);
if (!pskb_may_pull(skb, 4)) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
@@ -1260,7 +1262,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
if (!pskb_may_pull(skb, sizeof(*frame))) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
return;
}
frame = (struct test_frame *) skb->data;
@@ -1268,7 +1270,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
/* Broadcast frames must carry saddr and daddr fields */
if (info->caddr == CBROADCAST) {
if (skb->len < sizeof(struct test_frame)) {
- IRDA_DEBUG(0, "%s() test frame to short!\n",
+ IRDA_DEBUG(0, "%s() test frame too short!\n",
__FUNCTION__);
return;
}
@@ -1334,7 +1336,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
/* Check if frame is large enough for parsing */
if (!pskb_may_pull(skb, 2)) {
- IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+ IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
dev_kfree_skb(skb);
return -1;
}
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 92662330dbc..d058b467f9e 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -384,6 +384,9 @@ EXPORT_SYMBOL(hashbin_new);
* for deallocating this structure if it's complex. If not the user can
* just supply kfree, which should take care of the job.
*/
+#ifdef CONFIG_LOCKDEP
+static int hashbin_lock_depth = 0;
+#endif
int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
{
irda_queue_t* queue;
@@ -395,7 +398,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
/* Synchronize */
if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+ spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
+ hashbin_lock_depth++);
}
/*
@@ -419,6 +423,9 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
/* Release lock */
if ( hashbin->hb_type & HB_LOCK) {
spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+#ifdef CONFIG_LOCKDEP
+ hashbin_lock_depth--;
+#endif
}
/*
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index da3f2bc1b6f..7069e4a5825 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -256,7 +256,7 @@ static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
* Copy all fragments to a new buffer
*/
while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
- memcpy(skb->data+n, frag->data, frag->len);
+ skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
n += frag->len;
dev_kfree_skb(frag);
@@ -314,8 +314,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self,
skb_reserve(frag, self->max_header_size);
/* Copy data from the original skb into this fragment. */
- memcpy(skb_put(frag, self->max_seg_size), skb->data,
- self->max_seg_size);
+ skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
+ self->max_seg_size);
/* Insert TTP header, with the more bit set */
frame = skb_push(frag, TTP_HEADER);
@@ -551,7 +551,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
}
if (skb->len > self->max_seg_size) {
- IRDA_DEBUG(1, "%s(), UData is to large for IrLAP!\n",
+ IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
__FUNCTION__);
goto err;
}
@@ -598,7 +598,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
* inside an IrLAP frame
*/
if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
- IRDA_ERROR("%s: SAR disabled, and data is to large for IrLAP!\n",
+ IRDA_ERROR("%s: SAR disabled, and data is too large for IrLAP!\n",
__FUNCTION__);
ret = -EMSGSIZE;
goto err;
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 75a72d203b0..2627dad7cd8 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -160,7 +160,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
}
/* Check if buffer is long enough for insertion */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for insertion!\n",
+ IRDA_WARNING("%s: buffer too short for insertion!\n",
__FUNCTION__);
return -1;
}
@@ -216,7 +216,7 @@ static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
/* Check if buffer is long enough for parsing */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for parsing! "
+ IRDA_WARNING("%s: buffer too short for parsing! "
"Need %d bytes, but len is only %d\n",
__FUNCTION__, p.pl, len);
return -1;
@@ -304,7 +304,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
/* Check if buffer is long enough for parsing */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for parsing! "
+ IRDA_WARNING("%s: buffer too short for parsing! "
"Need %d bytes, but len is only %d\n",
__FUNCTION__, p.pl, len);
return -1;
@@ -343,7 +343,7 @@ static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
/* Check if buffer is long enough for parsing */
if (len < (2+p.pl)) {
- IRDA_WARNING("%s: buffer to short for parsing! "
+ IRDA_WARNING("%s: buffer too short for parsing! "
"Need %d bytes, but len is only %d\n",
__FUNCTION__, p.pl, len);
return -1;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 349012c926b..aeb18cf1dca 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -469,49 +469,49 @@ int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
int ret;
/* Insert data rate */
- ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail,
+ ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert max turnaround time */
- ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail,
+ ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert data size */
- ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail,
+ ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert window size */
- ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail,
+ ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert additional BOFs */
- ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail,
+ ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert minimum turnaround time */
- ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail,
+ ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
skb_put(skb, ret);
/* Insert link disconnect/threshold time */
- ret = irda_param_insert(self, PI_LINK_DISC, skb->tail,
+ ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
skb_tailroom(skb), &irlap_param_info);
if (ret < 0)
return ret;
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index 5abfb71aae8..a7a7f191f1a 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -239,7 +239,8 @@ async_bump(struct net_device *dev,
if(docopy) {
/* Copy data without CRC (lenght already checked) */
- memcpy(newskb->data, rx_buff->data, rx_buff->len - 2);
+ skb_copy_to_linear_data(newskb, rx_buff->data,
+ rx_buff->len - 2);
/* Deliver this skb */
dataskb = newskb;
} else {
@@ -256,7 +257,7 @@ async_bump(struct net_device *dev,
/* Feed it to IrLAP layer */
dataskb->dev = dev;
- dataskb->mac.raw = dataskb->data;
+ skb_reset_mac_header(dataskb);
dataskb->protocol = htons(ETH_P_IRDA);
netif_rx(dataskb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index acc94214bde..2f1373855a8 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -45,7 +45,8 @@ static struct proto iucv_proto = {
static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
-static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]);
+static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8],
+ u8 ipuser[16]);
static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
static struct iucv_sock_list iucv_sk_list = {
@@ -147,11 +148,12 @@ static void iucv_sock_close(struct sock *sk)
unsigned char user_data[16];
struct iucv_sock *iucv = iucv_sk(sk);
int err;
+ unsigned long timeo;
iucv_sock_clear_timer(sk);
lock_sock(sk);
- switch(sk->sk_state) {
+ switch (sk->sk_state) {
case IUCV_LISTEN:
iucv_sock_cleanup_listen(sk);
break;
@@ -159,6 +161,21 @@ static void iucv_sock_close(struct sock *sk)
case IUCV_CONNECTED:
case IUCV_DISCONN:
err = 0;
+
+ sk->sk_state = IUCV_CLOSING;
+ sk->sk_state_change(sk);
+
+ if (!skb_queue_empty(&iucv->send_skb_q)) {
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+ timeo = sk->sk_lingertime;
+ else
+ timeo = IUCV_DISCONN_TIMEOUT;
+ err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+ }
+
+ sk->sk_state = IUCV_CLOSED;
+ sk->sk_state_change(sk);
+
if (iucv->path) {
low_nmcpy(user_data, iucv->src_name);
high_nmcpy(user_data, iucv->dst_name);
@@ -168,12 +185,11 @@ static void iucv_sock_close(struct sock *sk)
iucv->path = NULL;
}
- sk->sk_state = IUCV_CLOSED;
- sk->sk_state_change(sk);
sk->sk_err = ECONNRESET;
sk->sk_state_change(sk);
skb_queue_purge(&iucv->send_skb_q);
+ skb_queue_purge(&iucv->backlog_skb_q);
sock_set_flag(sk, SOCK_ZAPPED);
break;
@@ -181,7 +197,7 @@ static void iucv_sock_close(struct sock *sk)
default:
sock_set_flag(sk, SOCK_ZAPPED);
break;
- };
+ }
release_sock(sk);
iucv_sock_kill(sk);
@@ -204,6 +220,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
sock_init_data(sock, sk);
INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
+ skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
iucv_sk(sk)->send_tag = 0;
sk->sk_destruct = iucv_sock_destruct;
@@ -276,7 +293,7 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
struct iucv_sock *isk, *n;
struct sock *sk;
- list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+ list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) {
sk = (struct sock *) isk;
lock_sock(sk);
@@ -510,7 +527,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
long timeo;
int err = 0;
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_state != IUCV_LISTEN) {
err = -EBADFD;
@@ -521,7 +538,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
/* Wait for an incoming connection */
add_wait_queue_exclusive(sk->sk_sleep, &wait);
- while (!(nsk = iucv_accept_dequeue(sk, newsock))){
+ while (!(nsk = iucv_accept_dequeue(sk, newsock))) {
set_current_state(TASK_INTERRUPTIBLE);
if (!timeo) {
err = -EAGAIN;
@@ -530,7 +547,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
release_sock(sk);
timeo = schedule_timeout(timeo);
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_state != IUCV_LISTEN) {
err = -EBADFD;
@@ -602,13 +619,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- if (sk->sk_state == IUCV_CONNECTED){
- if(!(skb = sock_alloc_send_skb(sk, len,
- msg->msg_flags & MSG_DONTWAIT,
- &err)))
- return err;
+ if (sk->sk_state == IUCV_CONNECTED) {
+ if (!(skb = sock_alloc_send_skb(sk, len,
+ msg->msg_flags & MSG_DONTWAIT,
+ &err)))
+ goto out;
- if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){
+ if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
err = -EFAULT;
goto fail;
}
@@ -647,10 +664,16 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
{
int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
+ struct iucv_sock *iucv = iucv_sk(sk);
int target, copied = 0;
- struct sk_buff *skb;
+ struct sk_buff *skb, *rskb, *cskb;
int err = 0;
+ if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
+ skb_queue_empty(&iucv->backlog_skb_q) &&
+ skb_queue_empty(&sk->sk_receive_queue))
+ return 0;
+
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
@@ -665,10 +688,12 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = min_t(unsigned int, skb->len, len);
- if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) {
+ cskb = skb;
+ if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
skb_queue_head(&sk->sk_receive_queue, skb);
if (copied == 0)
return -EFAULT;
+ goto done;
}
len -= copied;
@@ -683,6 +708,18 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
}
kfree_skb(skb);
+
+ /* Queue backlog skbs */
+ rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+ while (rskb) {
+ if (sock_queue_rcv_skb(sk, rskb)) {
+ skb_queue_head(&iucv_sk(sk)->backlog_skb_q,
+ rskb);
+ break;
+ } else {
+ rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+ }
+ }
} else
skb_queue_head(&sk->sk_receive_queue, skb);
@@ -695,7 +732,7 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
struct iucv_sock *isk, *n;
struct sock *sk;
- list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+ list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) {
sk = (struct sock *) isk;
if (sk->sk_state == IUCV_CONNECTED)
@@ -726,12 +763,15 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
mask |= POLLHUP;
if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ (sk->sk_shutdown & RCV_SHUTDOWN))
mask |= POLLIN | POLLRDNORM;
if (sk->sk_state == IUCV_CLOSED)
mask |= POLLHUP;
+ if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED)
+ mask |= POLLIN;
+
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
@@ -754,7 +794,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
return -EINVAL;
lock_sock(sk);
- switch(sk->sk_state) {
+ switch (sk->sk_state) {
case IUCV_CLOSED:
err = -ENOTCONN;
goto fail;
@@ -770,7 +810,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
(void *) prmmsg, 8);
if (err) {
- switch(err) {
+ switch (err) {
case 1:
err = -ENOTCONN;
break;
@@ -817,13 +857,6 @@ static int iucv_sock_release(struct socket *sock)
iucv_sk(sk)->path = NULL;
}
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){
- lock_sock(sk);
- err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0,
- sk->sk_lingertime);
- release_sock(sk);
- }
-
sock_orphan(sk);
iucv_sock_kill(sk);
return err;
@@ -880,7 +913,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
/* Create the new socket */
nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
- if (!nsk){
+ if (!nsk) {
err = iucv_path_sever(path, user_data);
goto fail;
}
@@ -903,7 +936,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
path->msglim = IUCV_QUEUELEN_DEFAULT;
err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
- if (err){
+ if (err) {
err = iucv_path_sever(path, user_data);
goto fail;
}
@@ -927,18 +960,53 @@ static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16])
sk->sk_state_change(sk);
}
+static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len,
+ struct sk_buff_head fragmented_skb_q)
+{
+ int dataleft, size, copied = 0;
+ struct sk_buff *nskb;
+
+ dataleft = len;
+ while (dataleft) {
+ if (dataleft >= sk->sk_rcvbuf / 4)
+ size = sk->sk_rcvbuf / 4;
+ else
+ size = dataleft;
+
+ nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
+ if (!nskb)
+ return -ENOMEM;
+
+ memcpy(nskb->data, skb->data + copied, size);
+ copied += size;
+ dataleft -= size;
+
+ nskb->h.raw = nskb->data;
+ nskb->nh.raw = nskb->data;
+ nskb->len = size;
+
+ skb_queue_tail(fragmented_skb_q, nskb);
+ }
+
+ return 0;
+}
+
static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
{
struct sock *sk = path->private;
- struct sk_buff *skb;
+ struct iucv_sock *iucv = iucv_sk(sk);
+ struct sk_buff *skb, *fskb;
+ struct sk_buff_head fragmented_skb_q;
int rc;
+ skb_queue_head_init(&fragmented_skb_q);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
return;
skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA);
if (!skb) {
- iucv_message_reject(path, msg);
+ iucv_path_sever(path, NULL);
return;
}
@@ -952,14 +1020,39 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
kfree_skb(skb);
return;
}
+ if (skb->truesize >= sk->sk_rcvbuf / 4) {
+ rc = iucv_fragment_skb(sk, skb, msg->length,
+ &fragmented_skb_q);
+ kfree_skb(skb);
+ skb = NULL;
+ if (rc) {
+ iucv_path_sever(path, NULL);
+ return;
+ }
+ } else {
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+ skb->len = msg->length;
+ }
+ }
+ /* Queue the fragmented skb */
+ fskb = skb_dequeue(&fragmented_skb_q);
+ while (fskb) {
+ if (!skb_queue_empty(&iucv->backlog_skb_q))
+ skb_queue_tail(&iucv->backlog_skb_q, fskb);
+ else if (sock_queue_rcv_skb(sk, fskb))
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb);
+ fskb = skb_dequeue(&fragmented_skb_q);
+ }
- skb->h.raw = skb->data;
- skb->nh.raw = skb->data;
- skb->len = msg->length;
+ /* Queue the original skb if it exists (was not fragmented) */
+ if (skb) {
+ if (!skb_queue_empty(&iucv->backlog_skb_q))
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+ else if (sock_queue_rcv_skb(sk, skb))
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
}
- if (sock_queue_rcv_skb(sk, skb))
- kfree_skb(skb);
}
static void iucv_callback_txdone(struct iucv_path *path,
@@ -971,17 +1064,27 @@ static void iucv_callback_txdone(struct iucv_path *path,
struct sk_buff *list_skb = list->next;
unsigned long flags;
- spin_lock_irqsave(&list->lock, flags);
+ if (list_skb) {
+ spin_lock_irqsave(&list->lock, flags);
+
+ do {
+ this = list_skb;
+ list_skb = list_skb->next;
+ } while (memcmp(&msg->tag, this->cb, 4) && list_skb);
+
+ spin_unlock_irqrestore(&list->lock, flags);
- do {
- this = list_skb;
- list_skb = list_skb->next;
- } while (memcmp(&msg->tag, this->cb, 4));
+ skb_unlink(this, &iucv_sk(sk)->send_skb_q);
+ kfree_skb(this);
+ }
- spin_unlock_irqrestore(&list->lock, flags);
+ if (sk->sk_state == IUCV_CLOSING) {
+ if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+ sk->sk_state = IUCV_CLOSED;
+ sk->sk_state_change(sk);
+ }
+ }
- skb_unlink(this, &iucv_sk(sk)->send_skb_q);
- kfree_skb(this);
}
static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])
@@ -1022,7 +1125,7 @@ static struct net_proto_family iucv_sock_family_ops = {
.create = iucv_sock_create,
};
-static int afiucv_init(void)
+static int __init afiucv_init(void)
{
int err;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1b10d576f22..fb3faf72e85 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -32,7 +32,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
-
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -69,7 +68,7 @@
#define IUCV_IPNORPY 0x10
#define IUCV_IPALL 0x80
-static int iucv_bus_match (struct device *dev, struct device_driver *drv)
+static int iucv_bus_match(struct device *dev, struct device_driver *drv)
{
return 0;
}
@@ -78,8 +77,11 @@ struct bus_type iucv_bus = {
.name = "iucv",
.match = iucv_bus_match,
};
+EXPORT_SYMBOL(iucv_bus);
struct device *iucv_root;
+EXPORT_SYMBOL(iucv_root);
+
static int iucv_available;
/* General IUCV interrupt structure */
@@ -90,20 +92,43 @@ struct iucv_irq_data {
u32 res2[8];
};
-struct iucv_work {
+struct iucv_irq_list {
struct list_head list;
struct iucv_irq_data data;
};
-static LIST_HEAD(iucv_work_queue);
-static DEFINE_SPINLOCK(iucv_work_lock);
-
static struct iucv_irq_data *iucv_irq_data;
static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE;
static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE;
-static void iucv_tasklet_handler(unsigned long);
-static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0);
+/*
+ * Queue of interrupt buffers lock for delivery via the tasklet
+ * (fast but can't call smp_call_function).
+ */
+static LIST_HEAD(iucv_task_queue);
+
+/*
+ * The tasklet for fast delivery of iucv interrupts.
+ */
+static void iucv_tasklet_fn(unsigned long);
+static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0);
+
+/*
+ * Queue of interrupt buffers for delivery via a work queue
+ * (slower but can call smp_call_function).
+ */
+static LIST_HEAD(iucv_work_queue);
+
+/*
+ * The work element to deliver path pending interrupts.
+ */
+static void iucv_work_fn(struct work_struct *work);
+static DECLARE_WORK(iucv_work, iucv_work_fn);
+
+/*
+ * Spinlock protecting task and work queue.
+ */
+static DEFINE_SPINLOCK(iucv_queue_lock);
enum iucv_command_codes {
IUCV_QUERY = 0,
@@ -147,10 +172,10 @@ static unsigned long iucv_max_pathid;
static DEFINE_SPINLOCK(iucv_table_lock);
/*
- * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet.
- * Needed for iucv_path_sever called from tasklet.
+ * iucv_active_cpu: contains the number of the cpu executing the tasklet
+ * or the work handler. Needed for iucv_path_sever called from tasklet.
*/
-static int iucv_tasklet_cpu = -1;
+static int iucv_active_cpu = -1;
/*
* Mutex and wait queue for iucv_register/iucv_unregister.
@@ -382,7 +407,7 @@ static void iucv_declare_cpu(void *data)
rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm);
if (rc) {
char *err = "Unknown";
- switch(rc) {
+ switch (rc) {
case 0x03:
err = "Directory error";
break;
@@ -449,17 +474,19 @@ static void iucv_setmask_mp(void)
{
int cpu;
+ preempt_disable();
for_each_online_cpu(cpu)
/* Enable all cpus with a declared buffer. */
if (cpu_isset(cpu, iucv_buffer_cpumask) &&
!cpu_isset(cpu, iucv_irq_cpumask))
smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu);
+ preempt_enable();
}
/**
* iucv_setmask_up
*
- * Allow iucv interrupts on a single cpus.
+ * Allow iucv interrupts on a single cpu.
*/
static void iucv_setmask_up(void)
{
@@ -493,8 +520,10 @@ static int iucv_enable(void)
goto out;
/* Declare per cpu buffers. */
rc = -EIO;
+ preempt_disable();
for_each_online_cpu(cpu)
smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu);
+ preempt_enable();
if (cpus_empty(iucv_buffer_cpumask))
/* No cpu could declare an iucv buffer. */
goto out_path;
@@ -519,7 +548,6 @@ static void iucv_disable(void)
kfree(iucv_path_table);
}
-#ifdef CONFIG_HOTPLUG_CPU
static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -562,10 +590,9 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block iucv_cpu_notifier = {
+static struct notifier_block __cpuinitdata iucv_cpu_notifier = {
.notifier_call = iucv_cpu_notify,
};
-#endif
/**
* iucv_sever_pathid
@@ -586,48 +613,49 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
return iucv_call_b2f0(IUCV_SEVER, parm);
}
+#ifdef CONFIG_SMP
/**
- * __iucv_cleanup_pathid
+ * __iucv_cleanup_queue
* @dummy: unused dummy argument
*
* Nop function called via smp_call_function to force work items from
* pending external iucv interrupts to the work queue.
*/
-static void __iucv_cleanup_pathid(void *dummy)
+static void __iucv_cleanup_queue(void *dummy)
{
}
+#endif
/**
- * iucv_cleanup_pathid
- * @pathid: 16 bit pathid
+ * iucv_cleanup_queue
*
* Function called after a path has been severed to find all remaining
* work items for the now stale pathid. The caller needs to hold the
* iucv_table_lock.
*/
-static void iucv_cleanup_pathid(u16 pathid)
+static void iucv_cleanup_queue(void)
{
- struct iucv_work *p, *n;
+ struct iucv_irq_list *p, *n;
/*
- * Path is severed, the pathid can be reused immediatly on
- * a iucv connect or a connection pending interrupt.
- * iucv_path_connect and connection pending interrupt will
- * wait until the iucv_table_lock is released before the
- * recycled pathid enters the system.
- * Force remaining interrupts to the work queue, then
- * scan the work queue for items of this path.
+ * When a path is severed, the pathid can be reused immediatly
+ * on a iucv connect or a connection pending interrupt. Remove
+ * all entries from the task queue that refer to a stale pathid
+ * (iucv_path_table[ix] == NULL). Only then do the iucv connect
+ * or deliver the connection pending interrupt. To get all the
+ * pending interrupts force them to the work queue by calling
+ * an empty function on all cpus.
*/
- smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1);
- spin_lock_irq(&iucv_work_lock);
- list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
- /* Remove work items for pathid except connection pending */
- if (p->data.ippathid == pathid && p->data.iptype != 0x01) {
+ smp_call_function(__iucv_cleanup_queue, NULL, 0, 1);
+ spin_lock_irq(&iucv_queue_lock);
+ list_for_each_entry_safe(p, n, &iucv_task_queue, list) {
+ /* Remove stale work items from the task queue. */
+ if (iucv_path_table[p->data.ippathid] == NULL) {
list_del(&p->list);
kfree(p);
}
}
- spin_unlock_irq(&iucv_work_lock);
+ spin_unlock_irq(&iucv_queue_lock);
}
/**
@@ -665,6 +693,7 @@ out_mutex:
mutex_unlock(&iucv_register_mutex);
return rc;
}
+EXPORT_SYMBOL(iucv_register);
/**
* iucv_unregister
@@ -686,7 +715,6 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
iucv_sever_pathid(p->pathid, NULL);
iucv_path_table[p->pathid] = NULL;
list_del(&p->list);
- iucv_cleanup_pathid(p->pathid);
iucv_path_free(p);
}
spin_unlock_bh(&iucv_table_lock);
@@ -698,6 +726,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
iucv_setmask_mp();
mutex_unlock(&iucv_register_mutex);
}
+EXPORT_SYMBOL(iucv_unregister);
/**
* iucv_path_accept
@@ -736,6 +765,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_path_accept);
/**
* iucv_path_connect
@@ -759,9 +789,9 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
union iucv_param *parm;
int rc;
- preempt_disable();
- if (iucv_tasklet_cpu != smp_processor_id())
- spin_lock_bh(&iucv_table_lock);
+ BUG_ON(in_atomic());
+ spin_lock_bh(&iucv_table_lock);
+ iucv_cleanup_queue();
parm = percpu_ptr(iucv_param, smp_processor_id());
memset(parm, 0, sizeof(union iucv_param));
parm->ctrl.ipmsglim = path->msglim;
@@ -796,11 +826,10 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
rc = -EIO;
}
}
- if (iucv_tasklet_cpu != smp_processor_id())
- spin_unlock_bh(&iucv_table_lock);
- preempt_enable();
+ spin_unlock_bh(&iucv_table_lock);
return rc;
}
+EXPORT_SYMBOL(iucv_path_connect);
/**
* iucv_path_quiesce:
@@ -827,6 +856,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_path_quiesce);
/**
* iucv_path_resume:
@@ -867,21 +897,20 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
{
int rc;
-
preempt_disable();
- if (iucv_tasklet_cpu != smp_processor_id())
+ if (iucv_active_cpu != smp_processor_id())
spin_lock_bh(&iucv_table_lock);
rc = iucv_sever_pathid(path->pathid, userdata);
if (!rc) {
iucv_path_table[path->pathid] = NULL;
list_del_init(&path->list);
- iucv_cleanup_pathid(path->pathid);
}
- if (iucv_tasklet_cpu != smp_processor_id())
+ if (iucv_active_cpu != smp_processor_id())
spin_unlock_bh(&iucv_table_lock);
preempt_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_path_sever);
/**
* iucv_message_purge
@@ -914,6 +943,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_purge);
/**
* iucv_message_receive
@@ -984,6 +1014,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_receive);
/**
* iucv_message_reject
@@ -1012,6 +1043,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg)
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_reject);
/**
* iucv_message_reply
@@ -1055,6 +1087,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_reply);
/**
* iucv_message_send
@@ -1103,6 +1136,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_send);
/**
* iucv_message_send2way
@@ -1159,6 +1193,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
local_bh_enable();
return rc;
}
+EXPORT_SYMBOL(iucv_message_send2way);
/**
* iucv_path_pending
@@ -1246,8 +1281,7 @@ static void iucv_path_complete(struct iucv_irq_data *data)
struct iucv_path_complete *ipc = (void *) data;
struct iucv_path *path = iucv_path_table[ipc->ippathid];
- BUG_ON(!path || !path->handler);
- if (path->handler->path_complete)
+ if (path && path->handler && path->handler->path_complete)
path->handler->path_complete(path, ipc->ipuser);
}
@@ -1275,14 +1309,14 @@ static void iucv_path_severed(struct iucv_irq_data *data)
struct iucv_path_severed *ips = (void *) data;
struct iucv_path *path = iucv_path_table[ips->ippathid];
- BUG_ON(!path || !path->handler);
+ if (!path || !path->handler) /* Already severed */
+ return;
if (path->handler->path_severed)
path->handler->path_severed(path, ips->ipuser);
else {
iucv_sever_pathid(path->pathid, NULL);
iucv_path_table[path->pathid] = NULL;
list_del_init(&path->list);
- iucv_cleanup_pathid(path->pathid);
iucv_path_free(path);
}
}
@@ -1311,8 +1345,7 @@ static void iucv_path_quiesced(struct iucv_irq_data *data)
struct iucv_path_quiesced *ipq = (void *) data;
struct iucv_path *path = iucv_path_table[ipq->ippathid];
- BUG_ON(!path || !path->handler);
- if (path->handler->path_quiesced)
+ if (path && path->handler && path->handler->path_quiesced)
path->handler->path_quiesced(path, ipq->ipuser);
}
@@ -1340,8 +1373,7 @@ static void iucv_path_resumed(struct iucv_irq_data *data)
struct iucv_path_resumed *ipr = (void *) data;
struct iucv_path *path = iucv_path_table[ipr->ippathid];
- BUG_ON(!path || !path->handler);
- if (path->handler->path_resumed)
+ if (path && path->handler && path->handler->path_resumed)
path->handler->path_resumed(path, ipr->ipuser);
}
@@ -1373,8 +1405,7 @@ static void iucv_message_complete(struct iucv_irq_data *data)
struct iucv_path *path = iucv_path_table[imc->ippathid];
struct iucv_message msg;
- BUG_ON(!path || !path->handler);
- if (path->handler->message_complete) {
+ if (path && path->handler && path->handler->message_complete) {
msg.flags = imc->ipflags1;
msg.id = imc->ipmsgid;
msg.audit = imc->ipaudit;
@@ -1419,8 +1450,7 @@ static void iucv_message_pending(struct iucv_irq_data *data)
struct iucv_path *path = iucv_path_table[imp->ippathid];
struct iucv_message msg;
- BUG_ON(!path || !path->handler);
- if (path->handler->message_pending) {
+ if (path && path->handler && path->handler->message_pending) {
msg.flags = imp->ipflags1;
msg.id = imp->ipmsgid;
msg.class = imp->iptrgcls;
@@ -1435,17 +1465,16 @@ static void iucv_message_pending(struct iucv_irq_data *data)
}
/**
- * iucv_tasklet_handler:
+ * iucv_tasklet_fn:
*
* This tasklet loops over the queue of irq buffers created by
* iucv_external_interrupt, calls the appropriate action handler
* and then frees the buffer.
*/
-static void iucv_tasklet_handler(unsigned long ignored)
+static void iucv_tasklet_fn(unsigned long ignored)
{
typedef void iucv_irq_fn(struct iucv_irq_data *);
static iucv_irq_fn *irq_fn[] = {
- [0x01] = iucv_path_pending,
[0x02] = iucv_path_complete,
[0x03] = iucv_path_severed,
[0x04] = iucv_path_quiesced,
@@ -1455,38 +1484,70 @@ static void iucv_tasklet_handler(unsigned long ignored)
[0x08] = iucv_message_pending,
[0x09] = iucv_message_pending,
};
- struct iucv_work *p;
+ struct list_head task_queue = LIST_HEAD_INIT(task_queue);
+ struct iucv_irq_list *p, *n;
/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
spin_lock(&iucv_table_lock);
- iucv_tasklet_cpu = smp_processor_id();
+ iucv_active_cpu = smp_processor_id();
- spin_lock_irq(&iucv_work_lock);
- while (!list_empty(&iucv_work_queue)) {
- p = list_entry(iucv_work_queue.next, struct iucv_work, list);
+ spin_lock_irq(&iucv_queue_lock);
+ list_splice_init(&iucv_task_queue, &task_queue);
+ spin_unlock_irq(&iucv_queue_lock);
+
+ list_for_each_entry_safe(p, n, &task_queue, list) {
list_del_init(&p->list);
- spin_unlock_irq(&iucv_work_lock);
irq_fn[p->data.iptype](&p->data);
kfree(p);
- spin_lock_irq(&iucv_work_lock);
}
- spin_unlock_irq(&iucv_work_lock);
- iucv_tasklet_cpu = -1;
+ iucv_active_cpu = -1;
spin_unlock(&iucv_table_lock);
}
/**
+ * iucv_work_fn:
+ *
+ * This work function loops over the queue of path pending irq blocks
+ * created by iucv_external_interrupt, calls the appropriate action
+ * handler and then frees the buffer.
+ */
+static void iucv_work_fn(struct work_struct *work)
+{
+ typedef void iucv_irq_fn(struct iucv_irq_data *);
+ struct list_head work_queue = LIST_HEAD_INIT(work_queue);
+ struct iucv_irq_list *p, *n;
+
+ /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
+ spin_lock_bh(&iucv_table_lock);
+ iucv_active_cpu = smp_processor_id();
+
+ spin_lock_irq(&iucv_queue_lock);
+ list_splice_init(&iucv_work_queue, &work_queue);
+ spin_unlock_irq(&iucv_queue_lock);
+
+ iucv_cleanup_queue();
+ list_for_each_entry_safe(p, n, &work_queue, list) {
+ list_del_init(&p->list);
+ iucv_path_pending(&p->data);
+ kfree(p);
+ }
+
+ iucv_active_cpu = -1;
+ spin_unlock_bh(&iucv_table_lock);
+}
+
+/**
* iucv_external_interrupt
* @code: irq code
*
* Handles external interrupts coming in from CP.
- * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler().
+ * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
*/
static void iucv_external_interrupt(u16 code)
{
struct iucv_irq_data *p;
- struct iucv_work *work;
+ struct iucv_irq_list *work;
p = percpu_ptr(iucv_irq_data, smp_processor_id());
if (p->ippathid >= iucv_max_pathid) {
@@ -1500,16 +1561,23 @@ static void iucv_external_interrupt(u16 code)
printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
return;
}
- work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC);
+ work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
if (!work) {
printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
return;
}
memcpy(&work->data, p, sizeof(work->data));
- spin_lock(&iucv_work_lock);
- list_add_tail(&work->list, &iucv_work_queue);
- spin_unlock(&iucv_work_lock);
- tasklet_schedule(&iucv_tasklet);
+ spin_lock(&iucv_queue_lock);
+ if (p->iptype == 0x01) {
+ /* Path pending interrupt. */
+ list_add_tail(&work->list, &iucv_work_queue);
+ schedule_work(&iucv_work);
+ } else {
+ /* The other interrupts. */
+ list_add_tail(&work->list, &iucv_task_queue);
+ tasklet_schedule(&iucv_tasklet);
+ }
+ spin_unlock(&iucv_queue_lock);
}
/**
@@ -1517,7 +1585,7 @@ static void iucv_external_interrupt(u16 code)
*
* Allocates and initializes various data structures.
*/
-static int iucv_init(void)
+static int __init iucv_init(void)
{
int rc;
@@ -1528,7 +1596,7 @@ static int iucv_init(void)
rc = iucv_query_maxconn();
if (rc)
goto out;
- rc = register_external_interrupt (0x4000, iucv_external_interrupt);
+ rc = register_external_interrupt(0x4000, iucv_external_interrupt);
if (rc)
goto out;
rc = bus_register(&iucv_bus);
@@ -1539,7 +1607,7 @@ static int iucv_init(void)
rc = PTR_ERR(iucv_root);
goto out_bus;
}
- /* Note: GFP_DMA used used to get memory below 2G */
+ /* Note: GFP_DMA used to get memory below 2G */
iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data),
GFP_KERNEL|GFP_DMA);
if (!iucv_irq_data) {
@@ -1577,14 +1645,16 @@ out:
*
* Frees everything allocated from iucv_init.
*/
-static void iucv_exit(void)
+static void __exit iucv_exit(void)
{
- struct iucv_work *p, *n;
+ struct iucv_irq_list *p, *n;
- spin_lock_irq(&iucv_work_lock);
+ spin_lock_irq(&iucv_queue_lock);
+ list_for_each_entry_safe(p, n, &iucv_task_queue, list)
+ kfree(p);
list_for_each_entry_safe(p, n, &iucv_work_queue, list)
kfree(p);
- spin_unlock_irq(&iucv_work_lock);
+ spin_unlock_irq(&iucv_queue_lock);
unregister_hotcpu_notifier(&iucv_cpu_notifier);
percpu_free(iucv_param);
percpu_free(iucv_irq_data);
@@ -1596,24 +1666,6 @@ static void iucv_exit(void)
subsys_initcall(iucv_init);
module_exit(iucv_exit);
-/**
- * Export all public stuff
- */
-EXPORT_SYMBOL (iucv_bus);
-EXPORT_SYMBOL (iucv_root);
-EXPORT_SYMBOL (iucv_register);
-EXPORT_SYMBOL (iucv_unregister);
-EXPORT_SYMBOL (iucv_path_accept);
-EXPORT_SYMBOL (iucv_path_connect);
-EXPORT_SYMBOL (iucv_path_quiesce);
-EXPORT_SYMBOL (iucv_path_sever);
-EXPORT_SYMBOL (iucv_message_purge);
-EXPORT_SYMBOL (iucv_message_receive);
-EXPORT_SYMBOL (iucv_message_reject);
-EXPORT_SYMBOL (iucv_message_reply);
-EXPORT_SYMBOL (iucv_message_send);
-EXPORT_SYMBOL (iucv_message_send2way);
-
MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver");
MODULE_LICENSE("GPL");
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 345019345f0..a99444142dc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -379,7 +379,7 @@ static int verify_address_len(void *p)
*/
return -EINVAL;
break;
- };
+ }
return 0;
}
@@ -3667,7 +3667,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
if (err)
goto out_free;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index d12413cff5b..d4b13a031fd 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -160,8 +160,14 @@ static struct packet_type llc_tr_packet_type = {
static int __init llc_init(void)
{
- if (dev_base->next)
- memcpy(llc_station_mac_sa, dev_base->next->dev_addr, ETH_ALEN);
+ struct net_device *dev;
+
+ dev = first_net_device();
+ if (dev != NULL)
+ dev = next_net_device(dev);
+
+ if (dev != NULL)
+ memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN);
else
memset(llc_station_mac_sa, 0, ETH_ALEN);
dev_add_pack(&llc_packet_type);
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index b3f65d1e80b..099ed8fec14 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -112,7 +112,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
if (unlikely(!pskb_may_pull(skb, llc_len)))
return 0;
- skb->h.raw += llc_len;
+ skb->transport_header += llc_len;
skb_pull(skb, llc_len);
if (skb->protocol == htons(ETH_P_802_2)) {
__be16 pdulen = eth_hdr(skb)->h_proto;
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index f4291f349e9..754f4fedc85 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -41,7 +41,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
struct net_device *dev = skb->dev;
struct trh_hdr *trh;
- skb->mac.raw = skb_push(skb, sizeof(*trh));
+ skb_push(skb, sizeof(*trh));
+ skb_reset_mac_header(skb);
trh = tr_hdr(skb);
trh->ac = AC;
trh->fc = LLC_FRAME;
@@ -52,7 +53,7 @@ int llc_mac_hdr_init(struct sk_buff *skb,
if (da) {
memcpy(trh->daddr, da, dev->addr_len);
tr_source_route(skb, trh, dev);
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
}
break;
}
@@ -62,7 +63,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
unsigned short len = skb->len;
struct ethhdr *eth;
- skb->mac.raw = skb_push(skb, sizeof(*eth));
+ skb_push(skb, sizeof(*eth));
+ skb_reset_mac_header(skb);
eth = eth_hdr(skb);
eth->h_proto = htons(len);
memcpy(eth->h_dest, da, ETH_ALEN);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 2615dc81aa3..2525165e2e8 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -36,11 +36,12 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
if (skb) {
+ skb_reset_mac_header(skb);
skb_reserve(skb, 50);
- skb->nh.raw = skb->h.raw = skb->data;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
skb->protocol = htons(ETH_P_802_2);
skb->dev = dev;
- skb->mac.raw = skb->head;
if (sk != NULL)
skb_set_owner_w(skb, sk);
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54698af6d0a..c558f321425 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -25,6 +25,7 @@ config NETFILTER_NETLINK_LOG
and is also scheduled to replace the old syslog-based ipt_LOG
and ip6t_LOG modules.
+# Rename this to NF_CONNTRACK in a 2.6.25
config NF_CONNTRACK_ENABLED
tristate "Netfilter connection tracking support"
help
@@ -39,42 +40,9 @@ config NF_CONNTRACK_ENABLED
To compile it as a module, choose M here. If unsure, say N.
-choice
- prompt "Netfilter connection tracking support"
- depends on NF_CONNTRACK_ENABLED
-
-config NF_CONNTRACK_SUPPORT
- bool "Layer 3 Independent Connection tracking"
- help
- Layer 3 independent connection tracking is experimental scheme
- which generalize ip_conntrack to support other layer 3 protocols.
-
- This is required to do Masquerading or other kinds of Network
- Address Translation (except for Fast NAT). It can also be used to
- enhance packet filtering (see `Connection state match support'
- below).
-
-config IP_NF_CONNTRACK_SUPPORT
- bool "Layer 3 Dependent Connection tracking (OBSOLETE)"
- help
- The old, Layer 3 dependent ip_conntrack subsystem of netfilter.
-
- This is required to do Masquerading or other kinds of Network
- Address Translation (except for Fast NAT). It can also be used to
- enhance packet filtering (see `Connection state match support'
- below).
-
-endchoice
-
config NF_CONNTRACK
tristate
- default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
- default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
-
-config IP_NF_CONNTRACK
- tristate
- default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
- default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
+ default NF_CONNTRACK_ENABLED
config NF_CT_ACCT
bool "Connection tracking flow accounting"
@@ -303,9 +271,8 @@ config NETFILTER_XT_TARGET_CONNMARK
tristate '"CONNMARK" target support'
depends on NETFILTER_XTABLES
depends on IP_NF_MANGLE || IP6_NF_MANGLE
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
- select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
- select NF_CONNTRACK_MARK if NF_CONNTRACK
+ depends on NF_CONNTRACK
+ select NF_CONNTRACK_MARK
help
This option adds a `CONNMARK' target, which allows one to manipulate
the connection mark value. Similar to the MARK target, but
@@ -366,7 +333,7 @@ config NETFILTER_XT_TARGET_NOTRACK
tristate '"NOTRACK" target support'
depends on NETFILTER_XTABLES
depends on IP_NF_RAW || IP6_NF_RAW
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
The NOTRACK target allows a select rule to specify
which packets *not* to enter the conntrack/NAT
@@ -387,9 +354,7 @@ config NETFILTER_XT_TARGET_SECMARK
config NETFILTER_XT_TARGET_CONNSECMARK
tristate '"CONNSECMARK" target support'
- depends on NETFILTER_XTABLES && \
- ((NF_CONNTRACK && NF_CONNTRACK_SECMARK) || \
- (IP_NF_CONNTRACK && IP_NF_CONNTRACK_SECMARK))
+ depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
help
The CONNSECMARK target copies security markings from packets
to connections, and restores security markings from connections
@@ -437,9 +402,8 @@ config NETFILTER_XT_MATCH_COMMENT
config NETFILTER_XT_MATCH_CONNBYTES
tristate '"connbytes" per-connection counter match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
- select IP_NF_CT_ACCT if IP_NF_CONNTRACK
- select NF_CT_ACCT if NF_CONNTRACK
+ depends on NF_CONNTRACK
+ select NF_CT_ACCT
help
This option adds a `connbytes' match, which allows you to match the
number of bytes and/or packets for each direction within a connection.
@@ -450,9 +414,8 @@ config NETFILTER_XT_MATCH_CONNBYTES
config NETFILTER_XT_MATCH_CONNMARK
tristate '"connmark" connection mark match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
- select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
- select NF_CONNTRACK_MARK if NF_CONNTRACK
+ depends on NF_CONNTRACK
+ select NF_CONNTRACK_MARK
help
This option adds a `connmark' match, which allows you to match the
connection mark value previously set for the session by `CONNMARK'.
@@ -464,7 +427,7 @@ config NETFILTER_XT_MATCH_CONNMARK
config NETFILTER_XT_MATCH_CONNTRACK
tristate '"conntrack" connection tracking match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
This is a general conntrack match module, a superset of the state match.
@@ -508,7 +471,7 @@ config NETFILTER_XT_MATCH_ESP
config NETFILTER_XT_MATCH_HELPER
tristate '"helper" match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
Helper matching allows you to match packets in dynamic connections
tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -632,7 +595,7 @@ config NETFILTER_XT_MATCH_SCTP
config NETFILTER_XT_MATCH_STATE
tristate '"state" match support'
depends on NETFILTER_XTABLES
- depends on IP_NF_CONNTRACK || NF_CONNTRACK
+ depends on NF_CONNTRACK
help
Connection state matching allows you to match packets based on their
relationship to a tracked connection (ie. previous packets). This
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c3ebdbd917e..a84478ee2de 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,10 +5,6 @@
* way.
*
* Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000: Added NF_REPEAT --RR.
- * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
*/
#include <linux/kernel.h>
#include <linux/netfilter.h>
@@ -244,6 +240,7 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(nf_proto_csum_replace4);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
@@ -264,6 +261,22 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
}
EXPORT_SYMBOL(nf_ct_attach);
+void (*nf_ct_destroy)(struct nf_conntrack *);
+EXPORT_SYMBOL(nf_ct_destroy);
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct)
+{
+ void (*destroy)(struct nf_conntrack *);
+
+ rcu_read_lock();
+ destroy = rcu_dereference(nf_ct_destroy);
+ BUG_ON(destroy == NULL);
+ destroy(nfct);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_conntrack_destroy);
+#endif /* CONFIG_NF_CONNTRACK */
+
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_net_netfilter;
EXPORT_SYMBOL(proc_net_netfilter);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b3a70eb6d42..e132c8ae878 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -9,24 +9,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * - add usage/reference counts to ip_conntrack_expect
- * - export ip_conntrack[_expect]_{find_get,put} functions
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - generalize L3 protocol denendent part.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add support various size of conntrack structures.
- * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
- * - restructure nf_conn (introduce nf_conn_help)
- * - redesign 'features' how they were originally intended
- * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
- * - add support for L3 protocol module load on demand.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_core.c
*/
#include <linux/types.h>
@@ -128,10 +110,11 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
unsigned int size, unsigned int rnd)
{
unsigned int a, b;
- a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
- ((tuple->src.l3num) << 16) | tuple->dst.protonum);
- b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
- (tuple->src.u.all << 16) | tuple->dst.u.all);
+
+ a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all),
+ (tuple->src.l3num << 16) | tuple->dst.protonum);
+ b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+ (tuple->src.u.all << 16) | tuple->dst.u.all);
return jhash_2words(a, b, rnd) % size;
}
@@ -633,13 +616,11 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
memset(conntrack, 0, nf_ct_cache[features].size);
conntrack->features = features;
atomic_set(&conntrack->ct_general.use, 1);
- conntrack->ct_general.destroy = destroy_conntrack;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
/* Don't set timer yet: wait for confirmation */
- init_timer(&conntrack->timeout);
- conntrack->timeout.data = (unsigned long)conntrack;
- conntrack->timeout.function = death_by_timeout;
+ setup_timer(&conntrack->timeout, death_by_timeout,
+ (unsigned long)conntrack);
read_unlock_bh(&nf_ct_cache_lock);
return conntrack;
@@ -768,7 +749,7 @@ resolve_normal_ct(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
+ if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
l4proto)) {
DEBUGP("resolve_normal_ct: Can't get tuple\n");
@@ -960,7 +941,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
if (do_acct) {
ct->counters[CTINFO2DIR(ctinfo)].packets++;
ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- skb->len - (unsigned int)(skb->nh.raw - skb->data);
+ skb->len - skb_network_offset(skb);
if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
|| (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
@@ -1140,6 +1121,8 @@ void nf_conntrack_cleanup(void)
while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
schedule();
+ rcu_assign_pointer(nf_ct_destroy, NULL);
+
for (i = 0; i < NF_CT_F_NUM; i++) {
if (nf_ct_cache[i].use == 0)
continue;
@@ -1152,14 +1135,7 @@ void nf_conntrack_cleanup(void)
free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
nf_conntrack_htable_size);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic);
-
- /* free l3proto protocol tables */
- for (i = 0; i < PF_MAX; i++)
- if (nf_ct_protos[i]) {
- kfree(nf_ct_protos[i]);
- nf_ct_protos[i] = NULL;
- }
+ nf_conntrack_proto_fini();
}
static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1237,7 +1213,6 @@ module_param_call(hashsize, set_hashsize, param_get_uint,
int __init nf_conntrack_init(void)
{
- unsigned int i;
int ret;
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
@@ -1279,18 +1254,13 @@ int __init nf_conntrack_init(void)
goto err_free_conntrack_slab;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic);
+ ret = nf_conntrack_proto_init();
if (ret < 0)
goto out_free_expect_slab;
- /* Don't NEED lock here, but good form anyway. */
- write_lock_bh(&nf_conntrack_lock);
- for (i = 0; i < AF_MAX; i++)
- nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
- write_unlock_bh(&nf_conntrack_lock);
-
/* For use by REJECT target */
rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
+ rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1a223e0c085..6bd421df2db 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -91,3 +91,26 @@ void nf_ct_event_cache_flush(void)
}
}
+int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+
+int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+
+int nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier);
+
+int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ce70a6fc6bd..c31af29a443 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -290,9 +290,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
master_help->expecting++;
list_add(&exp->list, &nf_conntrack_expect_list);
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
- exp->timeout.function = expectation_timed_out;
+ setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
add_timer(&exp->timeout);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 3089dfc40c8..a186799f654 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -7,12 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- * - track EPRT and EPSV commands with IPv6 address.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
*/
#include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index bb26a658cc1..1093478cc00 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -46,7 +46,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nf_conntrack_expect *exp;
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
struct rtable *rt = (struct rtable *)(*pskb)->dst;
struct in_device *in_dev;
__be32 mask = 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48f05314ebf..aa1a97ee514 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -6,9 +6,6 @@
* (C) 2003 by Patrick Mchardy <kaber@trash.net>
* (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
*
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
* Initial connection tracking via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
*
@@ -16,8 +13,6 @@
*
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
- *
- * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
*/
#include <linux/init.h>
@@ -33,6 +28,7 @@
#include <linux/notifier.h>
#include <linux/netfilter.h>
+#include <net/netlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -268,9 +264,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nfattr *nest_parms;
- unsigned char *b;
-
- b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
event |= NFNL_SUBSYS_CTNETLINK << 8;
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -303,12 +297,12 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
ctnetlink_dump_use(skb, ct) < 0)
goto nfattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
nfattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -322,7 +316,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
struct nf_conn *ct = (struct nf_conn *)ptr;
struct sk_buff *skb;
unsigned int type;
- unsigned char *b;
+ sk_buff_data_t b;
unsigned int flags = 0, group;
/* ignore our fake conntrack entry */
@@ -662,7 +656,7 @@ static const size_t cta_min[CTA_MAX] = {
static int
ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
@@ -710,7 +704,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
static int
ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
@@ -721,22 +715,12 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
int err = 0;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- u32 rlen;
-
#ifndef CONFIG_NF_CT_ACCT
if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
return -ENOTSUPP;
#endif
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
-
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
+ return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
+ ctnetlink_done);
}
if (nfattr_bad_size(cda, CTA_MAX, cta_min))
@@ -1010,7 +994,7 @@ err:
static int
ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
@@ -1152,9 +1136,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned char *b;
-
- b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -1168,12 +1150,12 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nfattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
nfattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1186,7 +1168,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
struct sk_buff *skb;
unsigned int type;
- unsigned char *b;
+ sk_buff_data_t b;
int flags = 0;
if (events & IPEXP_NEW) {
@@ -1263,7 +1245,7 @@ static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
@@ -1276,17 +1258,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
return -EINVAL;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- u32 rlen;
-
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_exp_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
+ return netlink_dump_start(ctnl, skb, nlh,
+ ctnetlink_exp_dump_table,
+ ctnetlink_done);
}
if (cda[CTA_EXPECT_MASTER-1])
@@ -1333,7 +1307,7 @@ out:
static int
ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_expect *exp, *tmp;
struct nf_conntrack_tuple tuple;
@@ -1467,7 +1441,7 @@ out:
static int
ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *cda[])
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 456155f05c7..6d947068c58 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,13 +28,13 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_l3protos);
-#ifdef CONFIG_SYSCTL
-static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex);
+static DEFINE_MUTEX(nf_ct_proto_mutex);
+#ifdef CONFIG_SYSCTL
static int
nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
struct ctl_table *table, unsigned int *users)
@@ -164,13 +164,11 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
int err = 0;
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l3proto->ctl_table != NULL) {
err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
l3proto->ctl_table_path,
l3proto->ctl_table, NULL);
}
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif
return err;
}
@@ -178,11 +176,9 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
{
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l3proto->ctl_table_header != NULL)
nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
l3proto->ctl_table, NULL);
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif
}
@@ -190,27 +186,23 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
{
int ret = 0;
- if (proto->l3proto >= AF_MAX) {
- ret = -EBUSY;
- goto out;
- }
+ if (proto->l3proto >= AF_MAX)
+ return -EBUSY;
- write_lock_bh(&nf_conntrack_lock);
+ mutex_lock(&nf_ct_proto_mutex);
if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
- rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
- write_unlock_bh(&nf_conntrack_lock);
ret = nf_ct_l3proto_register_sysctl(proto);
if (ret < 0)
- nf_conntrack_l3proto_unregister(proto);
- return ret;
+ goto out_unlock;
+
+ rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
out_unlock:
- write_unlock_bh(&nf_conntrack_lock);
-out:
+ mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -219,14 +211,14 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
BUG_ON(proto->l3proto >= AF_MAX);
- write_lock_bh(&nf_conntrack_lock);
+ mutex_lock(&nf_ct_proto_mutex);
BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
&nf_conntrack_l3proto_generic);
- write_unlock_bh(&nf_conntrack_lock);
- synchronize_rcu();
-
nf_ct_l3proto_unregister_sysctl(proto);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_rcu();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l3proto, proto);
@@ -238,7 +230,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
int err = 0;
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l4proto->ctl_table != NULL) {
err = nf_ct_register_sysctl(l4proto->ctl_table_header,
nf_net_netfilter_sysctl_path,
@@ -260,7 +251,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
}
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
out:
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif /* CONFIG_SYSCTL */
return err;
}
@@ -268,7 +258,6 @@ out:
static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_SYSCTL
- mutex_lock(&nf_ct_proto_sysctl_mutex);
if (l4proto->ctl_table_header != NULL &&
*l4proto->ctl_table_header != NULL)
nf_ct_unregister_sysctl(l4proto->ctl_table_header,
@@ -279,7 +268,6 @@ static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto
nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
l4proto->ctl_compat_table, NULL);
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
- mutex_unlock(&nf_ct_proto_sysctl_mutex);
#endif /* CONFIG_SYSCTL */
}
@@ -289,68 +277,41 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
- if (l4proto->l3proto >= PF_MAX) {
- ret = -EBUSY;
- goto out;
- }
-
- if (l4proto == &nf_conntrack_l4proto_generic)
- return nf_ct_l4proto_register_sysctl(l4proto);
+ if (l4proto->l3proto >= PF_MAX)
+ return -EBUSY;
-retry:
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_protos[l4proto->l3proto]) {
- if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
- != &nf_conntrack_l4proto_generic) {
- ret = -EBUSY;
- goto out_unlock;
- }
- } else {
+ mutex_lock(&nf_ct_proto_mutex);
+ if (!nf_ct_protos[l4proto->l3proto]) {
/* l3proto may be loaded latter. */
struct nf_conntrack_l4proto **proto_array;
int i;
- write_unlock_bh(&nf_conntrack_lock);
-
- proto_array = (struct nf_conntrack_l4proto **)
- kmalloc(MAX_NF_CT_PROTO *
- sizeof(struct nf_conntrack_l4proto *),
- GFP_KERNEL);
+ proto_array = kmalloc(MAX_NF_CT_PROTO *
+ sizeof(struct nf_conntrack_l4proto *),
+ GFP_KERNEL);
if (proto_array == NULL) {
ret = -ENOMEM;
- goto out;
+ goto out_unlock;
}
+
for (i = 0; i < MAX_NF_CT_PROTO; i++)
proto_array[i] = &nf_conntrack_l4proto_generic;
-
- write_lock_bh(&nf_conntrack_lock);
- if (nf_ct_protos[l4proto->l3proto]) {
- /* bad timing, but no problem */
- write_unlock_bh(&nf_conntrack_lock);
- kfree(proto_array);
- } else {
- nf_ct_protos[l4proto->l3proto] = proto_array;
- write_unlock_bh(&nf_conntrack_lock);
- }
-
- /*
- * Just once because array is never freed until unloading
- * nf_conntrack.ko
- */
- goto retry;
+ nf_ct_protos[l4proto->l3proto] = proto_array;
+ } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
+ &nf_conntrack_l4proto_generic) {
+ ret = -EBUSY;
+ goto out_unlock;
}
- rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
- write_unlock_bh(&nf_conntrack_lock);
-
ret = nf_ct_l4proto_register_sysctl(l4proto);
if (ret < 0)
- nf_conntrack_l4proto_unregister(l4proto);
- return ret;
+ goto out_unlock;
+
+ rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ l4proto);
out_unlock:
- write_unlock_bh(&nf_conntrack_lock);
-out:
+ mutex_unlock(&nf_ct_proto_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -359,21 +320,42 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
{
BUG_ON(l4proto->l3proto >= PF_MAX);
- if (l4proto == &nf_conntrack_l4proto_generic) {
- nf_ct_l4proto_unregister_sysctl(l4proto);
- return;
- }
-
- write_lock_bh(&nf_conntrack_lock);
+ mutex_lock(&nf_ct_proto_mutex);
BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
&nf_conntrack_l4proto_generic);
- write_unlock_bh(&nf_conntrack_lock);
- synchronize_rcu();
-
nf_ct_l4proto_unregister_sysctl(l4proto);
+ mutex_unlock(&nf_ct_proto_mutex);
+
+ synchronize_rcu();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_cleanup(kill_l4proto, l4proto);
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+
+int nf_conntrack_proto_init(void)
+{
+ unsigned int i;
+ int err;
+
+ err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < AF_MAX; i++)
+ rcu_assign_pointer(nf_ct_l3protos[i],
+ &nf_conntrack_l3proto_generic);
+ return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+ unsigned int i;
+
+ nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic);
+
+ /* free l3proto protocol tables */
+ for (i = 0; i < PF_MAX; i++)
+ kfree(nf_ct_protos[i]);
+}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 7c069939695..6faf1bed722 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -4,11 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
*/
#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3c80558716a..0d3254b974c 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -7,15 +7,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/ip_conntrack_sctp.c
- */
-
-/*
- * Added support for proc manipulation of timeouts.
*/
#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 153d6619993..ccdd5d231e0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -4,24 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- * - Real stateful connection tracking
- * - Modified state transitions table
- * - Window scaling support added
- * - SACK support added
- *
- * Willy Tarreau:
- * - State table bugfixes
- * - More robust state changes
- * - Tuning timer parameters
- *
- * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - genelized Layer 3 protocol part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
- *
- * version 2.2
*/
#include <linux/types.h>
@@ -470,11 +452,10 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__be32 *)ptr ==
- __constant_htonl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
+ && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+ | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
return;
while (length > 0) {
@@ -765,26 +746,18 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
#define TH_ECE 0x40
#define TH_CWR 0x80
-/* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
{
[TH_SYN] = 1,
- [TH_SYN|TH_PUSH] = 1,
[TH_SYN|TH_URG] = 1,
- [TH_SYN|TH_PUSH|TH_URG] = 1,
[TH_SYN|TH_ACK] = 1,
- [TH_SYN|TH_ACK|TH_PUSH] = 1,
[TH_RST] = 1,
[TH_RST|TH_ACK] = 1,
- [TH_RST|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK] = 1,
+ [TH_FIN|TH_ACK|TH_URG] = 1,
[TH_ACK] = 1,
- [TH_ACK|TH_PUSH] = 1,
[TH_ACK|TH_URG] = 1,
- [TH_ACK|TH_URG|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_URG] = 1,
- [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -831,7 +804,7 @@ static int tcp_error(struct sk_buff *skb,
}
/* Check TCP flags. */
- tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
if (!tcp_valid_flags[tcpflags]) {
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -1110,11 +1083,26 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
const struct nf_conn *ct)
{
struct nfattr *nest_parms;
+ struct nf_ct_tcp_flags tmp = {};
read_lock_bh(&tcp_lock);
nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
+
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[0].td_scale);
+
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[1].td_scale);
+
+ tmp.flags = ct->proto.tcp.seen[0].flags;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+ sizeof(struct nf_ct_tcp_flags), &tmp);
+
+ tmp.flags = ct->proto.tcp.seen[1].flags;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+ sizeof(struct nf_ct_tcp_flags), &tmp);
read_unlock_bh(&tcp_lock);
NFA_NEST_END(skb, nest_parms);
@@ -1127,7 +1115,11 @@ nfattr_failure:
}
static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1] = sizeof(struct nf_ct_tcp_flags),
+ [CTA_PROTOINFO_TCP_FLAGS_REPLY-1] = sizeof(struct nf_ct_tcp_flags)
};
static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1151,6 +1143,30 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
write_lock_bh(&tcp_lock);
ct->proto.tcp.state =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+
+ if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
+ struct nf_ct_tcp_flags *attr =
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
+ ct->proto.tcp.seen[0].flags &= ~attr->mask;
+ ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+ }
+
+ if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
+ struct nf_ct_tcp_flags *attr =
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
+ ct->proto.tcp.seen[1].flags &= ~attr->mask;
+ ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+ }
+
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
+ tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
+ ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+ ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+ ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
+ NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+ }
write_unlock_bh(&tcp_lock);
return 0;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a5e5726ec0c..3620ecc095f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -4,11 +4,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
*/
#include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b8586360e51..45baeb0e30f 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,20 +1,9 @@
-/* This file contains all the functions required for the standalone
- nf_conntrack module.
-
- These are not required by the compatibility layer.
-*/
-
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - generalize L3 protocol dependent part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
#include <linux/types.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index bf23e489e4c..8797e6953ef 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -3,7 +3,7 @@
*
* (C) 2001 by Jay Schulist <jschlst@samba.org>,
* (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
*
* Initial netfilter messages via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -28,10 +28,9 @@
#include <asm/uaccess.h>
#include <asm/system.h>
#include <net/sock.h>
+#include <net/netlink.h>
#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/netfilter.h>
#include <linux/netlink.h>
#include <linux/netfilter/nfnetlink.h>
@@ -41,32 +40,34 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
static char __initdata nfversion[] = "0.30";
-#if 0
-#define DEBUGP(format, args...) \
- printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
- __LINE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
static struct sock *nfnl = NULL;
static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
-DECLARE_MUTEX(nfnl_sem);
+static DEFINE_MUTEX(nfnl_mutex);
-void nfnl_lock(void)
+static void nfnl_lock(void)
{
- nfnl_shlock();
+ mutex_lock(&nfnl_mutex);
}
-void nfnl_unlock(void)
+static int nfnl_trylock(void)
{
- nfnl_shunlock();
+ return !mutex_trylock(&nfnl_mutex);
}
-int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+static void __nfnl_unlock(void)
{
- DEBUGP("registering subsystem ID %u\n", n->subsys_id);
+ mutex_unlock(&nfnl_mutex);
+}
+
+static void nfnl_unlock(void)
+{
+ mutex_unlock(&nfnl_mutex);
+ if (nfnl->sk_receive_queue.qlen)
+ nfnl->sk_data_ready(nfnl, 0);
+}
+int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+{
nfnl_lock();
if (subsys_table[n->subsys_id]) {
nfnl_unlock();
@@ -77,24 +78,23 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
return 0;
}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
{
- DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
-
nfnl_lock();
subsys_table[n->subsys_id] = NULL;
nfnl_unlock();
return 0;
}
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
{
u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
- if (subsys_id >= NFNL_SUBSYS_COUNT
- || subsys_table[subsys_id] == NULL)
+ if (subsys_id >= NFNL_SUBSYS_COUNT)
return NULL;
return subsys_table[subsys_id];
@@ -105,10 +105,8 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
{
u_int8_t cb_id = NFNL_MSG_TYPE(type);
- if (cb_id >= ss->cb_count) {
- DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
+ if (cb_id >= ss->cb_count)
return NULL;
- }
return &ss->cb[cb_id];
}
@@ -125,6 +123,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
memcpy(NFA_DATA(nfa), data, attrlen);
memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
}
+EXPORT_SYMBOL_GPL(__nfa_fill);
void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
{
@@ -137,6 +136,7 @@ void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
nfa = NFA_NEXT(nfa, len);
}
}
+EXPORT_SYMBOL_GPL(nfattr_parse);
/**
* nfnetlink_check_attributes - check and parse nfnetlink attributes
@@ -150,37 +150,15 @@ static int
nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
struct nlmsghdr *nlh, struct nfattr *cda[])
{
- int min_len;
- u_int16_t attr_count;
+ int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
-
- if (unlikely(cb_id >= subsys->cb_count)) {
- DEBUGP("msgtype %u >= %u, returning\n",
- cb_id, subsys->cb_count);
- return -EINVAL;
- }
-
- min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
- if (unlikely(nlh->nlmsg_len < min_len))
- return -EINVAL;
-
- attr_count = subsys->cb[cb_id].attr_count;
- memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+ u_int16_t attr_count = subsys->cb[cb_id].attr_count;
/* check attribute lengths. */
if (likely(nlh->nlmsg_len > min_len)) {
struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
- while (NFA_OK(attr, attrlen)) {
- unsigned flavor = NFA_TYPE(attr);
- if (flavor) {
- if (flavor > attr_count)
- return -EINVAL;
- cda[flavor - 1] = attr;
- }
- attr = NFA_NEXT(attr, attrlen);
- }
+ nfattr_parse(cda, attr_count, attr, attrlen);
}
/* implicit: if nlmsg_len == min_len, we return 0, and an empty
@@ -208,62 +186,46 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
return err;
}
+EXPORT_SYMBOL_GPL(nfnetlink_send);
int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
{
return netlink_unicast(nfnl, skb, pid, flags);
}
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
/* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb,
- struct nlmsghdr *nlh, int *errp)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct nfnl_callback *nc;
struct nfnetlink_subsystem *ss;
- int type, err = 0;
-
- DEBUGP("entered; subsys=%u, msgtype=%u\n",
- NFNL_SUBSYS_ID(nlh->nlmsg_type),
- NFNL_MSG_TYPE(nlh->nlmsg_type));
-
- if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
- DEBUGP("missing CAP_NET_ADMIN\n");
- *errp = -EPERM;
- return -1;
- }
+ int type, err;
- /* Only requests are handled by kernel now. */
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
- DEBUGP("received non-request message\n");
- return 0;
- }
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
/* All the messages must at least contain nfgenmsg */
- if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
- DEBUGP("received message was too short\n");
+ if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
return 0;
- }
type = nlh->nlmsg_type;
ss = nfnetlink_get_subsys(type);
if (!ss) {
#ifdef CONFIG_KMOD
- /* don't call nfnl_shunlock, since it would reenter
+ /* don't call nfnl_unlock, since it would reenter
* with further packet processing */
- up(&nfnl_sem);
+ __nfnl_unlock();
request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
- nfnl_shlock();
+ nfnl_lock();
ss = nfnetlink_get_subsys(type);
if (!ss)
#endif
- goto err_inval;
+ return -EINVAL;
}
nc = nfnetlink_find_client(type, ss);
- if (!nc) {
- DEBUGP("unable to find client for type %d\n", type);
- goto err_inval;
- }
+ if (!nc)
+ return -EINVAL;
{
u_int16_t attr_count =
@@ -274,73 +236,21 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
err = nfnetlink_check_attributes(ss, nlh, cda);
if (err < 0)
- goto err_inval;
-
- DEBUGP("calling handler\n");
- err = nc->call(nfnl, skb, nlh, cda, errp);
- *errp = err;
- return err;
- }
-
-err_inval:
- DEBUGP("returning -EINVAL\n");
- *errp = -EINVAL;
- return -1;
-}
-
-/* Process one packet of messages. */
-static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
-{
- int err;
- struct nlmsghdr *nlh;
-
- while (skb->len >= NLMSG_SPACE(0)) {
- u32 rlen;
-
- nlh = (struct nlmsghdr *)skb->data;
- if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
- || skb->len < nlh->nlmsg_len)
- return 0;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- if (nfnetlink_rcv_msg(skb, nlh, &err)) {
- if (!err)
- return -1;
- netlink_ack(skb, nlh, err);
- } else
- if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
- skb_pull(skb, rlen);
+ return err;
+ return nc->call(nfnl, skb, nlh, cda);
}
-
- return 0;
}
static void nfnetlink_rcv(struct sock *sk, int len)
{
- do {
- struct sk_buff *skb;
+ unsigned int qlen = 0;
- if (nfnl_shlock_nowait())
+ do {
+ if (nfnl_trylock())
return;
-
- while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- if (nfnetlink_rcv_skb(skb)) {
- if (skb->len)
- skb_queue_head(&sk->sk_receive_queue,
- skb);
- else
- kfree_skb(skb);
- break;
- }
- kfree_skb(skb);
- }
-
- /* don't call nfnl_shunlock, since it would reenter
- * with further packet processing */
- up(&nfnl_sem);
- } while(nfnl && nfnl->sk_receive_queue.qlen);
+ netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg);
+ __nfnl_unlock();
+ } while (qlen);
}
static void __exit nfnetlink_exit(void)
@@ -355,7 +265,7 @@ static int __init nfnetlink_init(void)
printk("Netfilter messages via NETLINK v%s.\n", nfversion);
nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
- nfnetlink_rcv, THIS_MODULE);
+ nfnetlink_rcv, NULL, THIS_MODULE);
if (!nfnl) {
printk(KERN_ERR "cannot initialize nfnetlink!\n");
return -1;
@@ -366,10 +276,3 @@ static int __init nfnetlink_init(void)
module_init(nfnetlink_init);
module_exit(nfnetlink_exit);
-
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
-EXPORT_SYMBOL_GPL(nfnetlink_send);
-EXPORT_SYMBOL_GPL(nfnetlink_unicast);
-EXPORT_SYMBOL_GPL(nfattr_parse);
-EXPORT_SYMBOL_GPL(__nfa_fill);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 5cb30ebba0f..e32e30e7a17 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -10,11 +10,6 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 2006-01-26 Harald Welte <laforge@netfilter.org>
- * - Add optional local and global sequence number to detect lost
- * events from userspace
- *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -163,10 +158,7 @@ instance_create(u_int16_t group_num, int pid)
/* needs to be two, since we _put() after creation */
atomic_set(&inst->use, 2);
- init_timer(&inst->timer);
- inst->timer.function = nfulnl_timer;
- inst->timer.data = (unsigned long)inst;
- /* don't start timer yet. (re)start it with every packet */
+ setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
inst->peer_pid = pid;
inst->group_num = group_num;
@@ -200,20 +192,14 @@ out_unlock:
static int __nfulnl_send(struct nfulnl_instance *inst);
static void
-_instance_destroy2(struct nfulnl_instance *inst, int lock)
+__instance_destroy(struct nfulnl_instance *inst)
{
/* first pull it out of the global list */
- if (lock)
- write_lock_bh(&instances_lock);
-
UDEBUG("removing instance %p (queuenum=%u) from hash\n",
inst, inst->group_num);
hlist_del(&inst->hlist);
- if (lock)
- write_unlock_bh(&instances_lock);
-
/* then flush all pending packets from skb */
spin_lock_bh(&inst->lock);
@@ -235,15 +221,11 @@ _instance_destroy2(struct nfulnl_instance *inst, int lock)
}
static inline void
-__instance_destroy(struct nfulnl_instance *inst)
-{
- _instance_destroy2(inst, 0);
-}
-
-static inline void
instance_destroy(struct nfulnl_instance *inst)
{
- _instance_destroy2(inst, 1);
+ write_lock_bh(&instances_lock);
+ __instance_destroy(inst);
+ write_unlock_bh(&instances_lock);
}
static int
@@ -365,9 +347,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
{
int status;
- if (!inst->skb)
- return 0;
-
if (inst->qlen > 1)
inst->lastnlh->nlmsg_type = NLMSG_DONE;
@@ -391,7 +370,8 @@ static void nfulnl_timer(unsigned long data)
UDEBUG("timer function called, flushing buffer\n");
spin_lock_bh(&inst->lock);
- __nfulnl_send(inst);
+ if (inst->skb)
+ __nfulnl_send(inst);
spin_unlock_bh(&inst->lock);
instance_put(inst);
}
@@ -409,15 +389,14 @@ __build_packet_message(struct nfulnl_instance *inst,
const struct nf_loginfo *li,
const char *prefix, unsigned int plen)
{
- unsigned char *old_tail;
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
__be32 tmp_uint;
+ sk_buff_data_t old_tail = inst->skb->tail;
UDEBUG("entered\n");
- old_tail = inst->skb->tail;
nlh = NLMSG_PUT(inst->skb, 0, 0,
NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
sizeof(struct nfgenmsg));
@@ -509,11 +488,11 @@ __build_packet_message(struct nfulnl_instance *inst,
NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
}
- if (skb->tstamp.off_sec) {
+ if (skb->tstamp.tv64) {
struct nfulnl_msg_packet_timestamp ts;
-
- ts.sec = cpu_to_be64(skb->tstamp.off_sec);
- ts.usec = cpu_to_be64(skb->tstamp.off_usec);
+ struct timeval tv = ktime_to_timeval(skb->tstamp);
+ ts.sec = cpu_to_be64(tv.tv_sec);
+ ts.usec = cpu_to_be64(tv.tv_usec);
NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
}
@@ -596,7 +575,6 @@ nfulnl_log_packet(unsigned int pf,
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
unsigned int qthreshold;
- unsigned int nlbufsiz;
unsigned int plen;
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
@@ -606,12 +584,7 @@ nfulnl_log_packet(unsigned int pf,
inst = instance_lookup_get(li->u.ulog.group);
if (!inst)
- inst = instance_lookup_get(0);
- if (!inst) {
- PRINTR("nfnetlink_log: trying to log packet, "
- "but no instance for group %u\n", li->u.ulog.group);
return;
- }
plen = 0;
if (prefix)
@@ -667,24 +640,11 @@ nfulnl_log_packet(unsigned int pf,
break;
default:
- spin_unlock_bh(&inst->lock);
- instance_put(inst);
- return;
+ goto unlock_and_release;
}
- if (size > inst->nlbufsiz)
- nlbufsiz = size;
- else
- nlbufsiz = inst->nlbufsiz;
-
- if (!inst->skb) {
- if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
- UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
- inst->nlbufsiz, size);
- goto alloc_failure;
- }
- } else if (inst->qlen >= qthreshold ||
- size > skb_tailroom(inst->skb)) {
+ if (inst->qlen >= qthreshold ||
+ (inst->skb && size > skb_tailroom(inst->skb))) {
/* either the queue len is too high or we don't have
* enough room in the skb left. flush to userspace. */
UDEBUG("flushing old skb\n");
@@ -693,12 +653,12 @@ nfulnl_log_packet(unsigned int pf,
if (del_timer(&inst->timer))
instance_put(inst);
__nfulnl_send(inst);
+ }
- if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
- UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
- inst->nlbufsiz, size);
+ if (!inst->skb) {
+ inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+ if (!inst->skb)
goto alloc_failure;
- }
}
UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
@@ -760,7 +720,7 @@ static struct notifier_block nfulnl_rtnl_notifier = {
static int
nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
return -ENOTSUPP;
}
@@ -798,7 +758,7 @@ static const int nfula_cfg_min[NFULA_CFG_MAX] = {
static int
nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfula[])
{
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
@@ -830,13 +790,13 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
NETLINK_CB(skb).pid);
if (!inst) {
ret = -EINVAL;
- goto out_put;
+ goto out;
}
break;
case NFULNL_CFG_CMD_UNBIND:
if (!inst) {
ret = -ENODEV;
- goto out_put;
+ goto out;
}
if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -845,7 +805,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
instance_destroy(inst);
- break;
+ goto out;
case NFULNL_CFG_CMD_PF_BIND:
UDEBUG("registering log handler for pf=%u\n", pf);
ret = nf_log_register(pf, &nfulnl_logger);
@@ -869,7 +829,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
"group=%u pid=%u =>ENOENT\n",
group_num, NETLINK_CB(skb).pid);
ret = -ENOENT;
- goto out_put;
+ goto out;
}
if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -939,10 +899,8 @@ struct iter_state {
unsigned int bucket;
};
-static struct hlist_node *get_first(struct seq_file *seq)
+static struct hlist_node *get_first(struct iter_state *st)
{
- struct iter_state *st = seq->private;
-
if (!st)
return NULL;
@@ -953,10 +911,8 @@ static struct hlist_node *get_first(struct seq_file *seq)
return NULL;
}
-static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
{
- struct iter_state *st = seq->private;
-
h = h->next;
while (!h) {
if (++st->bucket >= INSTANCE_BUCKETS)
@@ -967,13 +923,13 @@ static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
return h;
}
-static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
{
struct hlist_node *head;
- head = get_first(seq);
+ head = get_first(st);
if (head)
- while (pos && (head = get_next(seq, head)))
+ while (pos && (head = get_next(st, head)))
pos--;
return pos ? NULL : head;
}
@@ -981,13 +937,13 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
read_lock_bh(&instances_lock);
- return get_idx(seq, *pos);
+ return get_idx(seq->private, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
- return get_next(s, v);
+ return get_next(s->private, v);
}
static void seq_stop(struct seq_file *s, void *v)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d9ce4a71d0f..7a97bec6772 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -338,7 +338,7 @@ static struct sk_buff *
nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct nfqnl_queue_entry *entry, int *errp)
{
- unsigned char *old_tail;
+ sk_buff_data_t old_tail;
size_t size;
size_t data_len = 0;
struct sk_buff *skb;
@@ -404,7 +404,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (!skb)
goto nlmsg_failure;
- old_tail= skb->tail;
+ old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0,
NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
sizeof(struct nfgenmsg));
@@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
}
- if (entskb->tstamp.off_sec) {
+ if (entskb->tstamp.tv64) {
struct nfqnl_msg_packet_timestamp ts;
-
- ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
- ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
+ struct timeval tv = ktime_to_timeval(entskb->tstamp);
+ ts.sec = cpu_to_be64(tv.tv_sec);
+ ts.usec = cpu_to_be64(tv.tv_usec);
NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
}
@@ -648,7 +648,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
}
if (!skb_make_writable(&e->skb, data_len))
return -ENOMEM;
- memcpy(e->skb->data, data, data_len);
+ skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
return 0;
}
@@ -783,7 +783,7 @@ static const int nfqa_verdict_min[NFQA_MAX] = {
static int
nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -848,7 +848,7 @@ err_out_put:
static int
nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
return -ENOTSUPP;
}
@@ -865,7 +865,7 @@ static struct nf_queue_handler nfqh = {
static int
nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+ struct nlmsghdr *nlh, struct nfattr *nfqa[])
{
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ec607a421a5..0eb2504b89b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -56,8 +56,8 @@ enum {
};
static const char *xt_prefix[NPROTO] = {
- [AF_INET] = "ip",
- [AF_INET6] = "ip6",
+ [AF_INET] = "ip",
+ [AF_INET6] = "ip6",
[NF_ARP] = "arp",
};
@@ -651,12 +651,6 @@ void *xt_unregister_table(struct xt_table *table)
EXPORT_SYMBOL_GPL(xt_unregister_table);
#ifdef CONFIG_PROC_FS
-static char *xt_proto_prefix[NPROTO] = {
- [AF_INET] = "ip",
- [AF_INET6] = "ip6",
- [NF_ARP] = "arp",
-};
-
static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos)
{
struct list_head *head = list->next;
@@ -798,7 +792,7 @@ int xt_proto_init(int af)
#ifdef CONFIG_PROC_FS
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
if (!proc)
@@ -806,14 +800,14 @@ int xt_proto_init(int af)
proc->data = (void *) ((unsigned long) af | (TABLE << 16));
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
if (!proc)
goto out_remove_tables;
proc->data = (void *) ((unsigned long) af | (MATCH << 16));
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
if (!proc)
@@ -825,12 +819,12 @@ int xt_proto_init(int af)
#ifdef CONFIG_PROC_FS
out_remove_matches:
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc_net_remove(buf);
out_remove_tables:
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc_net_remove(buf);
out:
@@ -844,15 +838,15 @@ void xt_proto_fini(int af)
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc_net_remove(buf);
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
proc_net_remove(buf);
- strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
proc_net_remove(buf);
#endif /*CONFIG_PROC_FS*/
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 795c058b16a..b03ce009d0b 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -30,10 +30,7 @@ MODULE_ALIAS("ipt_CONNMARK");
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack_ecache.h>
-#endif
static unsigned int
target(struct sk_buff **pskb,
@@ -44,40 +41,33 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_connmark_target_info *markinfo = targinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
u_int32_t diff;
u_int32_t mark;
u_int32_t newmark;
- u_int32_t ctinfo;
- u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
- if (ctmark) {
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct) {
switch(markinfo->mode) {
case XT_CONNMARK_SET:
- newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
- if (newmark != *ctmark) {
- *ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
- ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+ newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != ct->mark) {
+ ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
}
break;
case XT_CONNMARK_SAVE:
- newmark = (*ctmark & ~markinfo->mask) |
+ newmark = (ct->mark & ~markinfo->mask) |
((*pskb)->mark & markinfo->mask);
- if (*ctmark != newmark) {
- *ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
- ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+ if (ct->mark != newmark) {
+ ct->mark = newmark;
nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
}
break;
case XT_CONNMARK_RESTORE:
mark = (*pskb)->mark;
- diff = (*ctmark ^ mark) & markinfo->mask;
+ diff = (ct->mark ^ mark) & markinfo->mask;
(*pskb)->mark = mark ^ diff;
break;
}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 1ab0db641f9..81c0c58bab4 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -19,7 +19,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_CONNSECMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
#define PFX "CONNSECMARK: "
@@ -36,12 +36,12 @@ MODULE_ALIAS("ip6t_CONNSECMARK");
static void secmark_save(struct sk_buff *skb)
{
if (skb->secmark) {
- u32 *connsecmark;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- connsecmark = nf_ct_get_secmark(skb, &ctinfo);
- if (connsecmark && !*connsecmark)
- *connsecmark = skb->secmark;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && !ct->secmark)
+ ct->secmark = skb->secmark;
}
}
@@ -52,12 +52,12 @@ static void secmark_save(struct sk_buff *skb)
static void secmark_restore(struct sk_buff *skb)
{
if (!skb->secmark) {
- u32 *connsecmark;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- connsecmark = nf_ct_get_secmark(skb, &ctinfo);
- if (connsecmark && *connsecmark)
- skb->secmark = *connsecmark;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct && ct->secmark)
+ skb->secmark = ct->secmark;
}
}
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a7cc75aeb38..9f2f2201f6a 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -8,8 +8,6 @@
* published by the Free Software Foundation.
*
* See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
*/
#include <linux/module.h>
@@ -35,13 +33,13 @@ static unsigned int target(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
if (!skb_make_writable(pskb, sizeof(struct iphdr)))
return NF_DROP;
- ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+ ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
@@ -56,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
return NF_DROP;
- ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+ ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index b874a2008b2..5085fb3d1e2 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -5,7 +5,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_NOTRACK");
@@ -26,7 +26,7 @@ target(struct sk_buff **pskb,
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
- nf_ct_untrack(*pskb);
+ (*pskb)->nfct = &nf_conntrack_untracked.ct_general;
(*pskb)->nfctinfo = IP_CT_NEW;
nf_conntrack_get((*pskb)->nfct);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index db7e38c08de..15fe8f64951 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -54,7 +54,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
return -1;
tcplen = (*pskb)->len - tcphoff;
- tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+ tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
/* Since it passed flags test in tcp match, we know it is is
not a fragment, and has data >= tcp header length. SYN
@@ -113,7 +113,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
return -1;
kfree_skb(*pskb);
*pskb = newskb;
- tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+ tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
}
skb_put((*pskb), TCPOLEN_MSS);
@@ -145,7 +145,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct iphdr *iph = (*pskb)->nh.iph;
+ struct iphdr *iph = ip_hdr(*pskb);
__be16 newlen;
int ret;
@@ -154,7 +154,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(*pskb);
newlen = htons(ntohs(iph->tot_len) + ret);
nf_csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -171,7 +171,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
const struct xt_target *target,
const void *targinfo)
{
- struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h;
+ struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
u8 nexthdr;
int tcphoff;
int ret;
@@ -187,7 +187,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- ipv6h = (*pskb)->nh.ipv6h;
+ ipv6h = ipv6_hdr(*pskb);
ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e32dfa2668..804afe55e14 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,20 +1,11 @@
/* Kernel module to match connection tracking byte counter.
* GPL (C) 2002 Martin Devera (devik@cdi.cz).
- *
- * 2004-07-20 Harald Welte <laforge@netfilter.org>
- * - reimplemented to use per-connection accounting counters
- * - add functionality to match number of packets
- * - add functionality to match average packet size
- * - add support to match directions seperately
- * 2005-10-16 Harald Welte <laforge@netfilter.org>
- * - Port to x_tables
- *
*/
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_connbytes.h>
+#include <net/netfilter/nf_conntrack.h>
#include <asm/div64.h>
#include <asm/bitops.h>
@@ -24,22 +15,6 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
MODULE_ALIAS("ipt_connbytes");
-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- do_div(dividend, d);
- return dividend;
-}
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -51,13 +26,17 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_connbytes_info *sinfo = matchinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
u_int64_t what = 0; /* initialize to make gcc happy */
u_int64_t bytes = 0;
u_int64_t pkts = 0;
const struct ip_conntrack_counter *counters;
- if (!(counters = nf_ct_get_counters(skb)))
- return 0; /* no match */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+ counters = ct->counters;
switch (sinfo->what) {
case XT_CONNBYTES_PKTS:
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 36c2defff23..e1803256c79 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -21,16 +21,15 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
MODULE_DESCRIPTION("IP tables connmark match module");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_connmark");
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_connmark.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -42,12 +41,14 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_connmark_info *info = matchinfo;
- u_int32_t ctinfo;
- const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
- if (!ctmark)
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
return 0;
- return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
+ return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
}
static int
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2885c378288..f4ea8fe07a5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -10,121 +10,15 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_conntrack.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables connection tracking match module");
MODULE_ALIAS("ipt_conntrack");
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct xt_match *match,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct xt_conntrack_info *sinfo = matchinfo;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int statebit;
-
- ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
- if (ct == &ip_conntrack_untracked)
- statebit = XT_CONNTRACK_STATE_UNTRACKED;
- else if (ct)
- statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
- else
- statebit = XT_CONNTRACK_STATE_INVALID;
-
- if (sinfo->flags & XT_CONNTRACK_STATE) {
- if (ct) {
- if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
- statebit |= XT_CONNTRACK_STATE_SNAT;
- if (test_bit(IPS_DST_NAT_BIT, &ct->status))
- statebit |= XT_CONNTRACK_STATE_DNAT;
- }
- if (FWINV((statebit & sinfo->statemask) == 0,
- XT_CONNTRACK_STATE))
- return 0;
- }
-
- if (ct == NULL) {
- if (sinfo->flags & ~XT_CONNTRACK_STATE)
- return 0;
- return 1;
- }
-
- if (sinfo->flags & XT_CONNTRACK_PROTO &&
- FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
- sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
- XT_CONNTRACK_PROTO))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
- FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
- sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
- sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
- XT_CONNTRACK_ORIGSRC))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
- FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
- sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
- sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
- XT_CONNTRACK_ORIGDST))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
- FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
- sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
- sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
- XT_CONNTRACK_REPLSRC))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_REPLDST &&
- FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
- sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
- sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
- XT_CONNTRACK_REPLDST))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_STATUS &&
- FWINV((ct->status & sinfo->statusmask) == 0,
- XT_CONNTRACK_STATUS))
- return 0;
-
- if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
- unsigned long expires = timer_pending(&ct->timeout) ?
- (ct->timeout.expires - jiffies)/HZ : 0;
-
- if (FWINV(!(expires >= sinfo->expires_min &&
- expires <= sinfo->expires_max),
- XT_CONNTRACK_EXPIRES))
- return 0;
- }
- return 1;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -220,8 +114,6 @@ match(const struct sk_buff *skb,
return 1;
}
-#endif /* CONFIG_NF_IP_CONNTRACK */
-
static int
checkentry(const char *tablename,
const void *ip,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26c7f4ad102..56b247ecc28 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
*
- * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
* (C) 2002 by Harald Welte <laforge@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -34,7 +32,7 @@ static int match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_dscp_info *info = matchinfo;
- u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
return (dscp == info->dscp) ^ !!info->invert;
}
@@ -49,7 +47,7 @@ static int match6(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_dscp_info *info = matchinfo;
- u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
return (dscp == info->dscp) ^ !!info->invert;
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9f37d593ca3..d3043fa32eb 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -216,10 +216,8 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
hinfo->pde->proc_fops = &dl_file_ops;
hinfo->pde->data = hinfo;
- init_timer(&hinfo->timer);
+ setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
- hinfo->timer.data = (unsigned long )hinfo;
- hinfo->timer.function = htable_gc;
add_timer(&hinfo->timer);
spin_lock_bh(&hashlimit_lock);
@@ -380,22 +378,22 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
switch (hinfo->family) {
case AF_INET:
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
- dst->addr.ip.dst = skb->nh.iph->daddr;
+ dst->addr.ip.dst = ip_hdr(skb)->daddr;
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
- dst->addr.ip.src = skb->nh.iph->saddr;
+ dst->addr.ip.src = ip_hdr(skb)->saddr;
if (!(hinfo->cfg.mode &
(XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
return 0;
- nexthdr = skb->nh.iph->protocol;
+ nexthdr = ip_hdr(skb)->protocol;
break;
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
case AF_INET6:
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
- memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr,
+ memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr,
sizeof(dst->addr.ip6.dst));
if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
- memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr,
+ memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr,
sizeof(dst->addr.ip6.src));
if (!(hinfo->cfg.mode &
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 407d1d5da8a..c139b2f43a1 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -5,26 +5,16 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- * - Port to newnat infrastructure
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#else
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#endif
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_helper.h>
-#include <net/netfilter/nf_conntrack_compat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -38,55 +28,6 @@ MODULE_ALIAS("ip6t_helper");
#define DEBUGP(format, args...)
#endif
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct xt_match *match,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct xt_helper_info *info = matchinfo;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- int ret = info->invert;
-
- ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
- if (!ct) {
- DEBUGP("xt_helper: Eek! invalid conntrack?\n");
- return ret;
- }
-
- if (!ct->master) {
- DEBUGP("xt_helper: conntrack %p has no master\n", ct);
- return ret;
- }
-
- read_lock_bh(&ip_conntrack_lock);
- if (!ct->master->helper) {
- DEBUGP("xt_helper: master ct %p has no helper\n",
- exp->expectant);
- goto out_unlock;
- }
-
- DEBUGP("master's name = %s , info->name = %s\n",
- ct->master->helper->name, info->name);
-
- if (info->name[0] == '\0')
- ret ^= 1;
- else
- ret ^= !strncmp(ct->master->helper->name, info->name,
- strlen(ct->master->helper->name));
-out_unlock:
- read_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -134,7 +75,6 @@ out_unlock:
read_unlock_bh(&nf_conntrack_lock);
return ret;
}
-#endif
static int check(const char *tablename,
const void *inf,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 32fb998d9ba..77288c5ada7 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_length_info *info = matchinfo;
- u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
+ u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
@@ -47,7 +47,8 @@ match6(const struct sk_buff *skb,
int *hotdrop)
{
const struct xt_length_info *info = matchinfo;
- u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+ const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr));
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 6fd8347c005..571a72ab89a 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,10 +1,3 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- * `limit', removed logging. Did I mention that
- * Alexey is a fucking genius?
- * Rusty Russell (rusty@rustcorp.com.au). */
-
/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
* (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
*
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d430d90d7b2..1d3a1d98b88 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,8 +37,8 @@ match(const struct sk_buff *skb,
const struct xt_mac_info *info = matchinfo;
/* Is mac pointer valid? */
- return (skb->mac.raw >= skb->head
- && (skb->mac.raw + ETH_HLEN) <= skb->data
+ return (skb_mac_header(skb) >= skb->head &&
+ (skb_mac_header(skb) + ETH_HLEN) <= skb->data
/* If so, compare... */
&& ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
^ info->invert));
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 16e7b080428..e1409fc5c28 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -34,7 +34,7 @@ static int match(const struct sk_buff *skb,
const struct xt_pkttype_info *info = matchinfo;
if (skb->pkt_type == PACKET_LOOPBACK)
- type = (MULTICAST(skb->nh.iph->daddr)
+ type = (MULTICAST(ip_hdr(skb)->daddr)
? PACKET_MULTICAST
: PACKET_BROADCAST);
else
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 97ffc2fbc19..c2017f8af9c 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -1,7 +1,5 @@
/* IP tables module for matching the routing realm
*
- * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
- *
* (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index df37b912163..149294f7df7 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -10,7 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_state.h>
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
if (nf_ct_is_untracked(skb))
statebit = XT_STATE_UNTRACKED;
- else if (!nf_ct_get_ctinfo(skb, &ctinfo))
+ else if (!nf_ct_get(skb, &ctinfo))
statebit = XT_STATE_INVALID;
else
statebit = XT_STATE_BIT(ctinfo);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index e03a3282c55..f2535e7f286 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -263,9 +263,6 @@ int netlbl_socket_setattr(const struct socket *sock,
int ret_val = -ENOENT;
struct netlbl_dom_map *dom_entry;
- if ((secattr->flags & NETLBL_SECATTR_DOMAIN) == 0)
- return -ENOENT;
-
rcu_read_lock();
dom_entry = netlbl_domhsh_getentry(secattr->domain);
if (dom_entry == NULL)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c48b0f49f00..507828d7d4a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
#include <linux/types.h>
#include <linux/audit.h>
#include <linux/selinux.h>
+#include <linux/mutex.h>
#include <net/sock.h>
#include <net/scm.h>
@@ -76,7 +77,8 @@ struct netlink_sock {
unsigned long state;
wait_queue_head_t wait;
struct netlink_callback *cb;
- spinlock_t cb_lock;
+ struct mutex *cb_mutex;
+ struct mutex cb_def_mutex;
void (*data_ready)(struct sock *sk, int bytes);
struct module *module;
};
@@ -108,6 +110,7 @@ struct netlink_table {
unsigned long *listeners;
unsigned int nl_nonroot;
unsigned int groups;
+ struct mutex *cb_mutex;
struct module *module;
int registered;
};
@@ -118,6 +121,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
static int netlink_dump(struct sock *sk);
static void netlink_destroy_callback(struct netlink_callback *cb);
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
static DEFINE_RWLOCK(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -136,6 +140,14 @@ static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
static void netlink_sock_destruct(struct sock *sk)
{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (nlk->cb) {
+ if (nlk->cb->done)
+ nlk->cb->done(nlk->cb);
+ netlink_destroy_callback(nlk->cb);
+ }
+
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -144,7 +156,6 @@ static void netlink_sock_destruct(struct sock *sk)
}
BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
- BUG_TRAP(!nlk_sk(sk)->cb);
BUG_TRAP(!nlk_sk(sk)->groups);
}
@@ -370,7 +381,8 @@ static struct proto netlink_proto = {
.obj_size = sizeof(struct netlink_sock),
};
-static int __netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+ int protocol)
{
struct sock *sk;
struct netlink_sock *nlk;
@@ -384,7 +396,12 @@ static int __netlink_create(struct socket *sock, int protocol)
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
- spin_lock_init(&nlk->cb_lock);
+ if (cb_mutex)
+ nlk->cb_mutex = cb_mutex;
+ else {
+ nlk->cb_mutex = &nlk->cb_def_mutex;
+ mutex_init(nlk->cb_mutex);
+ }
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
@@ -395,8 +412,8 @@ static int __netlink_create(struct socket *sock, int protocol)
static int netlink_create(struct socket *sock, int protocol)
{
struct module *module = NULL;
+ struct mutex *cb_mutex;
struct netlink_sock *nlk;
- unsigned int groups;
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -418,10 +435,10 @@ static int netlink_create(struct socket *sock, int protocol)
if (nl_table[protocol].registered &&
try_module_get(nl_table[protocol].module))
module = nl_table[protocol].module;
- groups = nl_table[protocol].groups;
+ cb_mutex = nl_table[protocol].cb_mutex;
netlink_unlock_table();
- if ((err = __netlink_create(sock, protocol)) < 0)
+ if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
goto out_module;
nlk = nlk_sk(sock->sk);
@@ -446,17 +463,10 @@ static int netlink_release(struct socket *sock)
sock_orphan(sk);
nlk = nlk_sk(sk);
- spin_lock(&nlk->cb_lock);
- if (nlk->cb) {
- if (nlk->cb->done)
- nlk->cb->done(nlk->cb);
- netlink_destroy_callback(nlk->cb);
- nlk->cb = NULL;
- }
- spin_unlock(&nlk->cb_lock);
-
- /* OK. Socket is unlinked, and, therefore,
- no new packets will arrive */
+ /*
+ * OK. Socket is unlinked, any packets that arrive now
+ * will be purged.
+ */
sock->sk = NULL;
wake_up_interruptible_all(&nlk->wait);
@@ -1215,7 +1225,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
copied = len;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
if (msg->msg_name) {
@@ -1235,13 +1245,14 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
siocb->scm = &scm;
}
siocb->scm->creds = *NETLINK_CREDS(skb);
+ if (flags & MSG_TRUNC)
+ copied = skb->len;
skb_free_datagram(sk, skb);
if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
netlink_dump(sk);
scm_recv(sock, msg, siocb->scm, flags);
-
out:
netlink_rcv_wake(sk);
return err ? : copied;
@@ -1265,7 +1276,7 @@ static void netlink_data_ready(struct sock *sk, int len)
struct sock *
netlink_kernel_create(int unit, unsigned int groups,
void (*input)(struct sock *sk, int len),
- struct module *module)
+ struct mutex *cb_mutex, struct module *module)
{
struct socket *sock;
struct sock *sk;
@@ -1280,7 +1291,7 @@ netlink_kernel_create(int unit, unsigned int groups,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(sock, unit) < 0)
+ if (__netlink_create(sock, cb_mutex, unit) < 0)
goto out_sock_release;
if (groups < 32)
@@ -1304,6 +1315,7 @@ netlink_kernel_create(int unit, unsigned int groups,
netlink_table_grab();
nl_table[unit].groups = groups;
nl_table[unit].listeners = listeners;
+ nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
netlink_table_ungrab();
@@ -1346,7 +1358,7 @@ static int netlink_dump(struct sock *sk)
if (!skb)
goto errout;
- spin_lock(&nlk->cb_lock);
+ mutex_lock(nlk->cb_mutex);
cb = nlk->cb;
if (cb == NULL) {
@@ -1357,7 +1369,7 @@ static int netlink_dump(struct sock *sk)
len = cb->dump(skb, cb);
if (len > 0) {
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, len);
return 0;
@@ -1375,13 +1387,13 @@ static int netlink_dump(struct sock *sk)
if (cb->done)
cb->done(cb);
nlk->cb = NULL;
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
return 0;
errout_skb:
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
kfree_skb(skb);
errout:
return err;
@@ -1412,20 +1424,25 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
return -ECONNREFUSED;
}
nlk = nlk_sk(sk);
- /* A dump or destruction is in progress... */
- spin_lock(&nlk->cb_lock);
- if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
- spin_unlock(&nlk->cb_lock);
+ /* A dump is in progress... */
+ mutex_lock(nlk->cb_mutex);
+ if (nlk->cb) {
+ mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
sock_put(sk);
return -EBUSY;
}
nlk->cb = cb;
- spin_unlock(&nlk->cb_lock);
+ mutex_unlock(nlk->cb_mutex);
netlink_dump(sk);
sock_put(sk);
- return 0;
+
+ /* We successfully started a dump, by returning -EINTR we
+ * signal the queue mangement to interrupt processing of
+ * any netlink messages so userspace gets a chance to read
+ * the results. */
+ return -EINTR;
}
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
@@ -1462,27 +1479,35 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
}
static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
- struct nlmsghdr *, int *))
+ struct nlmsghdr *))
{
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
- nlh = (struct nlmsghdr *) skb->data;
+ nlh = nlmsg_hdr(skb);
+ err = 0;
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
return 0;
- if (cb(skb, nlh, &err) < 0) {
- /* Not an error, but we have to interrupt processing
- * here. Note: that in this case we do not pull
- * message from skb, it will be processed later.
- */
- if (err == 0)
- return -1;
+ /* Only requests are handled by the kernel */
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+ goto skip;
+
+ /* Skip control messages */
+ if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+ goto skip;
+
+ err = cb(skb, nlh);
+ if (err == -EINTR) {
+ /* Not an error, but we interrupt processing */
+ netlink_queue_skip(nlh, skb);
+ return err;
+ }
+skip:
+ if (nlh->nlmsg_flags & NLM_F_ACK || err)
netlink_ack(skb, nlh, err);
- } else if (nlh->nlmsg_flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
netlink_queue_skip(nlh, skb);
}
@@ -1504,9 +1529,14 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
*
* qlen must be initialized to 0 before the initial entry, afterwards
* the function may be called repeatedly until qlen reaches 0.
+ *
+ * The callback function may return -EINTR to signal that processing
+ * of netlink messages shall be interrupted. In this case the message
+ * currently being processed will NOT be requeued onto the receive
+ * queue.
*/
void netlink_run_queue(struct sock *sk, unsigned int *qlen,
- int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
+ int (*cb)(struct sk_buff *, struct nlmsghdr *))
{
struct sk_buff *skb;
@@ -1537,7 +1567,7 @@ void netlink_run_queue(struct sock *sk, unsigned int *qlen,
* Pulls the given netlink message off the socket buffer so the next
* call to netlink_queue_run() will not reconsider the message.
*/
-void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
{
int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
@@ -1820,12 +1850,10 @@ core_initcall(netlink_proto_init);
EXPORT_SYMBOL(netlink_ack);
EXPORT_SYMBOL(netlink_run_queue);
-EXPORT_SYMBOL(netlink_queue_skip);
EXPORT_SYMBOL(netlink_broadcast);
EXPORT_SYMBOL(netlink_dump_start);
EXPORT_SYMBOL(netlink_kernel_create);
EXPORT_SYMBOL(netlink_register_notifier);
-EXPORT_SYMBOL(netlink_set_err);
EXPORT_SYMBOL(netlink_set_nonroot);
EXPORT_SYMBOL(netlink_unicast);
EXPORT_SYMBOL(netlink_unregister_notifier);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 004139557e0..df5f820a4c3 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -67,6 +67,11 @@ static int validate_nla(struct nlattr *nla, int maxtype,
}
break;
+ case NLA_BINARY:
+ if (pt->len && attrlen > pt->len)
+ return -ERANGE;
+ break;
+
default:
if (pt->len)
minlen = pt->len;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c2996794eb2..6e31234a419 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -295,66 +295,46 @@ int genl_unregister_family(struct genl_family *family)
return -ENOENT;
}
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
- int *errp)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct genl_ops *ops;
struct genl_family *family;
struct genl_info info;
struct genlmsghdr *hdr = nlmsg_data(nlh);
- int hdrlen, err = -EINVAL;
-
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
- goto ignore;
-
- if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
- goto ignore;
+ int hdrlen, err;
family = genl_family_find_byid(nlh->nlmsg_type);
- if (family == NULL) {
- err = -ENOENT;
- goto errout;
- }
+ if (family == NULL)
+ return -ENOENT;
hdrlen = GENL_HDRLEN + family->hdrsize;
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- goto errout;
+ return -EINVAL;
ops = genl_get_cmd(hdr->cmd, family);
- if (ops == NULL) {
- err = -EOPNOTSUPP;
- goto errout;
- }
+ if (ops == NULL)
+ return -EOPNOTSUPP;
- if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) {
- err = -EPERM;
- goto errout;
- }
+ if ((ops->flags & GENL_ADMIN_PERM) &&
+ security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- if (ops->dumpit == NULL) {
- err = -EOPNOTSUPP;
- goto errout;
- }
+ if (ops->dumpit == NULL)
+ return -EOPNOTSUPP;
- *errp = err = netlink_dump_start(genl_sock, skb, nlh,
- ops->dumpit, ops->done);
- if (err == 0)
- skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
- skb->len));
- return -1;
+ return netlink_dump_start(genl_sock, skb, nlh,
+ ops->dumpit, ops->done);
}
- if (ops->doit == NULL) {
- err = -EOPNOTSUPP;
- goto errout;
- }
+ if (ops->doit == NULL)
+ return -EOPNOTSUPP;
if (family->attrbuf) {
err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
ops->policy);
if (err < 0)
- goto errout;
+ return err;
}
info.snd_seq = nlh->nlmsg_seq;
@@ -364,15 +344,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = family->attrbuf;
- *errp = err = ops->doit(skb, &info);
- return err;
-
-ignore:
- return 0;
-
-errout:
- *errp = err;
- return -1;
+ return ops->doit(skb, &info);
}
static void genl_rcv(struct sock *sk, int len)
@@ -586,7 +558,7 @@ static int __init genl_init(void)
netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
- genl_rcv, THIS_MODULE);
+ genl_rcv, NULL, THIS_MODULE);
if (genl_sock == NULL)
panic("GENL: Cannot initialize generic netlink\n");
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index bf9837dd95c..5d4a26c2aa0 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -625,42 +625,42 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
ax25_address *source = NULL;
ax25_uid_assoc *user;
struct net_device *dev;
+ int err = 0;
lock_sock(sk);
if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
sock->state = SS_CONNECTED;
- release_sock(sk);
- return 0; /* Connect completed during a ERESTARTSYS event */
+ goto out_release; /* Connect completed during a ERESTARTSYS event */
}
if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
sock->state = SS_UNCONNECTED;
- release_sock(sk);
- return -ECONNREFUSED;
+ err = -ECONNREFUSED;
+ goto out_release;
}
if (sk->sk_state == TCP_ESTABLISHED) {
- release_sock(sk);
- return -EISCONN; /* No reconnect on a seqpacket socket */
+ err = -EISCONN; /* No reconnect on a seqpacket socket */
+ goto out_release;
}
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
- release_sock(sk);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_release;
}
if (addr->sax25_family != AF_NETROM) {
- release_sock(sk);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_release;
}
if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
sock_reset_flag(sk, SOCK_ZAPPED);
if ((dev = nr_dev_first()) == NULL) {
- release_sock(sk);
- return -ENETUNREACH;
+ err = -ENETUNREACH;
+ goto out_release;
}
source = (ax25_address *)dev->dev_addr;
@@ -671,8 +671,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
} else {
if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
dev_put(dev);
- release_sock(sk);
- return -EPERM;
+ err = -EPERM;
+ goto out_release;
}
nr->user_addr = *source;
}
@@ -707,8 +707,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
/* Now the loop */
if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
- release_sock(sk);
- return -EINPROGRESS;
+ err = -EINPROGRESS;
+ goto out_release;
}
/*
@@ -716,46 +716,46 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
* closed.
*/
if (sk->sk_state == TCP_SYN_SENT) {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk->sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
if (sk->sk_state != TCP_SYN_SENT)
break;
- release_sock(sk);
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+ if (err)
+ goto out_release;
}
if (sk->sk_state != TCP_ESTABLISHED) {
sock->state = SS_UNCONNECTED;
- release_sock(sk);
- return sock_error(sk); /* Always set at this point */
+ err = sock_error(sk); /* Always set at this point */
+ goto out_release;
}
sock->state = SS_CONNECTED;
+
+out_release:
release_sock(sk);
- return 0;
+ return err;
}
static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
struct sk_buff *skb;
struct sock *newsk;
+ DEFINE_WAIT(wait);
struct sock *sk;
int err = 0;
@@ -765,42 +765,40 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
lock_sock(sk);
if (sk->sk_type != SOCK_SEQPACKET) {
err = -EOPNOTSUPP;
- goto out;
+ goto out_release;
}
if (sk->sk_state != TCP_LISTEN) {
err = -EINVAL;
- goto out;
+ goto out_release;
}
/*
* The write queue this time is holding sockets ready to use
* hooked into the SABM we saved
*/
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
break;
- current->state = TASK_INTERRUPTIBLE;
- release_sock(sk);
if (flags & O_NONBLOCK) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -EWOULDBLOCK;
+ err = -EWOULDBLOCK;
+ break;
}
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+ if (err)
+ goto out_release;
newsk = skb->sk;
newsk->sk_socket = newsock;
@@ -811,8 +809,9 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
sk_acceptq_removed(sk);
newsock->sk = newsk;
-out:
+out_release:
release_sock(sk);
+
return err;
}
@@ -878,7 +877,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
if (frametype == NR_PROTOEXT &&
circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
return nr_rx_ip(skb, dev);
}
@@ -904,7 +903,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
}
if (sk != NULL) {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
if (frametype == NR_CONNACK && skb->len == 22)
nr_sk(sk)->bpqext = 1;
@@ -1074,6 +1073,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out;
skb_reserve(skb, size - len);
+ skb_reset_transport_header(skb);
/*
* Push down the NET/ROM header
@@ -1094,14 +1094,12 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
/*
* Put the data on the end
*/
+ skb_put(skb, len);
- skb->h.raw = skb_put(skb, len);
-
- asmptr = skb->h.raw;
SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
/* User data follows immediately after the NET/ROM transport header */
- if (memcpy_fromiovec(asmptr, msg->msg_iov, len)) {
+ if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
kfree_skb(skb);
err = -EFAULT;
goto out;
@@ -1149,7 +1147,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
return er;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
copied = skb->len;
if (copied > size) {
@@ -1161,7 +1159,8 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sax != NULL) {
sax->sax25_family = AF_NETROM;
- memcpy(sax->sax25_call.ax25_call, skb->data + 7, AX25_ADDR_LEN);
+ skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
+ AX25_ADDR_LEN);
}
msg->msg_namelen = sizeof(*sax);
@@ -1209,6 +1208,12 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
release_sock(sk);
return ret;
+ case SIOCGSTAMPNS:
+ lock_sock(sk);
+ ret = sock_get_timestampns(sk, argp);
+ release_sock(sk);
+ return ret;
+
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 9a97ed6e691..c7b5d930e73 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -56,8 +56,8 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
/* Spoof incoming device */
skb->dev = dev;
- skb->mac.raw = skb->nh.raw;
- skb->nh.raw = skb->data;
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
skb->pkt_type = PACKET_HOST;
netif_rx(skb);
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 5560acbaaa9..68176483617 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -51,10 +51,12 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
return 1;
- skbn->h.raw = skbn->data;
+ skb_reset_transport_header(skbn);
while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo,
+ skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
}
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index e856ae1b360..f324d5df418 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -34,8 +34,8 @@ int nr_loopback_queue(struct sk_buff *skb)
struct sk_buff *skbn;
if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
- memcpy(skb_put(skbn, skb->len), skb->data, skb->len);
- skbn->h.raw = skbn->data;
+ skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len);
+ skb_reset_transport_header(skbn);
skb_queue_tail(&loopback_queue, skbn);
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 0cbfb611465..e3e6c44e189 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -40,7 +40,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
/* Save a copy of the Transport Header */
- memcpy(transport, skb->data, NR_TRANSPORT_LEN);
+ skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN);
skb_pull(skb, NR_TRANSPORT_LEN);
frontlen = skb_headroom(skb);
@@ -54,13 +54,13 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
/* Copy the user data */
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
skb_pull(skb, len);
/* Duplicate the Transport Header */
skb_push(skbn, NR_TRANSPORT_LEN);
- memcpy(skbn->data, transport, NR_TRANSPORT_LEN);
-
+ skb_copy_to_linear_data(skbn, transport,
+ NR_TRANSPORT_LEN);
if (skb->len > 0)
skbn->data[4] |= NR_MORE_FLAG;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 8e6bd4e9d82..2f76e062609 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -598,7 +598,7 @@ struct net_device *nr_dev_first(void)
struct net_device *dev, *first = NULL;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
@@ -618,12 +618,13 @@ struct net_device *nr_dev_get(ax25_address *addr)
struct net_device *dev;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
dev_hold(dev);
goto out;
}
}
+ dev = NULL;
out:
read_unlock(&dev_base_lock);
return dev;
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 07b694d1887..04e7d0d2fd8 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -226,13 +226,13 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
- memcpy(dptr, skb->data + 7, AX25_ADDR_LEN);
+ skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN);
dptr[6] &= ~AX25_CBIT;
dptr[6] &= ~AX25_EBIT;
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- memcpy(dptr, skb->data + 0, AX25_ADDR_LEN);
+ skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN);
dptr[6] &= ~AX25_CBIT;
dptr[6] |= AX25_EBIT;
dptr[6] |= AX25_SSSID_SPARE;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 28d47e8f287..02e401cd683 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -114,22 +114,22 @@ On receive:
-----------
Incoming, dev->hard_header!=NULL
- mac.raw -> ll header
- data -> data
+ mac_header -> ll header
+ data -> data
Outgoing, dev->hard_header!=NULL
- mac.raw -> ll header
- data -> ll header
+ mac_header -> ll header
+ data -> ll header
Incoming, dev->hard_header==NULL
- mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
- PPP makes it, that is wrong, because introduce assymetry
- between rx and tx paths.
- data -> data
+ mac_header -> UNKNOWN position. It is very likely, that it points to ll
+ header. PPP makes it, that is wrong, because introduce
+ assymetry between rx and tx paths.
+ data -> data
Outgoing, dev->hard_header==NULL
- mac.raw -> data. ll header is still not built!
- data -> data
+ mac_header -> data. ll header is still not built!
+ data -> data
Resume
If dev->hard_header==NULL we are unlikely to restore sensible ll header.
@@ -139,12 +139,12 @@ On transmit:
------------
dev->hard_header != NULL
- mac.raw -> ll header
- data -> ll header
+ mac_header -> ll header
+ data -> ll header
dev->hard_header == NULL (ll header is added by device, we cannot control it)
- mac.raw -> data
- data -> data
+ mac_header -> data
+ data -> data
We should set nh.raw on output to correct posistion,
packet classifier depends on it.
@@ -201,7 +201,8 @@ struct packet_sock {
struct packet_type prot_hook;
spinlock_t bind_lock;
unsigned int running:1, /* prot_hook is attached*/
- auxdata:1;
+ auxdata:1,
+ origdev:1;
int ifindex; /* bound device */
__be16 num;
#ifdef CONFIG_PACKET_MULTICAST
@@ -284,7 +285,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
* Incoming packets have ll header pulled,
* push it back.
*
- * For outgoing ones skb->data == skb->mac.raw
+ * For outgoing ones skb->data == skb_mac_header(skb)
* so that this procedure is noop.
*/
@@ -303,7 +304,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
spkt = &PACKET_SKB_CB(skb)->sa.pkt;
- skb_push(skb, skb->data-skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
/*
* The SOCK_PACKET socket receives _all_ frames.
@@ -401,14 +402,14 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
* notable one here. This should really be fixed at the driver level.
*/
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Try to align data part correctly */
if (dev->hard_header) {
skb->data -= dev->hard_header_len;
skb->tail -= dev->hard_header_len;
if (len < dev->hard_header_len)
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
}
/* Returns -EFAULT on error */
@@ -488,10 +489,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
never delivered to user.
*/
if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
- skb_pull(skb, skb->nh.raw - skb->data);
+ skb_pull(skb, skb_network_offset(skb));
}
}
@@ -528,7 +529,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
+ if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
sll->sll_halen = 0;
if (dev->hard_header_parse)
@@ -582,6 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
unsigned short macoff, netoff;
struct sk_buff *copy_skb = NULL;
+ struct timeval tv;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -591,10 +596,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
if (dev->hard_header) {
if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
- skb_pull(skb, skb->nh.raw - skb->data);
+ skb_pull(skb, skb_network_offset(skb));
}
}
@@ -612,7 +617,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
if (sk->sk_type == SOCK_DGRAM) {
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
} else {
- unsigned maclen = skb->nh.raw - skb->data;
+ unsigned maclen = skb_network_offset(skb);
netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
macoff = netoff - maclen;
}
@@ -656,12 +661,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
h->tp_snaplen = snaplen;
h->tp_mac = macoff;
h->tp_net = netoff;
- if (skb->tstamp.off_sec == 0) {
+ if (skb->tstamp.tv64 == 0) {
__net_timestamp(skb);
sock_enable_timestamp(sk);
}
- h->tp_sec = skb->tstamp.off_sec;
- h->tp_usec = skb->tstamp.off_usec;
+ tv = ktime_to_timeval(skb->tstamp);
+ h->tp_sec = tv.tv_sec;
+ h->tp_usec = tv.tv_usec;
sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
sll->sll_halen = 0;
@@ -671,7 +677,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
+ if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
h->tp_status = status;
smp_mb();
@@ -766,14 +775,14 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
goto out_unlock;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
if (dev->hard_header) {
int res;
err = -EINVAL;
res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
if (sock->type != SOCK_DGRAM) {
- skb->tail = skb->data;
+ skb_reset_tail_pointer(skb);
skb->len = 0;
} else if (res < 0)
goto out_free;
@@ -1143,7 +1152,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_len = PACKET_SKB_CB(skb)->origlen;
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
- aux.tp_net = skb->nh.raw - skb->data;
+ aux.tp_net = skb_network_offset(skb);
put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
}
@@ -1411,6 +1420,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
po->auxdata = !!val;
return 0;
}
+ case PACKET_ORIGDEV:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->origdev = !!val;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -1454,6 +1475,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
data = &val;
break;
+ case PACKET_ORIGDEV:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = po->origdev;
+
+ data = &val;
+ break;
default:
return -ENOPROTOOPT;
}
@@ -1543,6 +1571,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
}
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *)arg);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *)arg);
#ifdef CONFIG_INET
case SIOCADDRT:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f92d5310847..d476c43d521 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -812,26 +812,26 @@ rose_try_next_neigh:
* closed.
*/
if (sk->sk_state == TCP_SYN_SENT) {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
+ DEFINE_WAIT(wait);
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk->sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
if (sk->sk_state != TCP_SYN_SENT)
break;
- release_sock(sk);
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+
+ if (err)
+ goto out_release;
}
if (sk->sk_state != TCP_ESTABLISHED) {
@@ -856,10 +856,9 @@ out_release:
static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
struct sk_buff *skb;
struct sock *newsk;
+ DEFINE_WAIT(wait);
struct sock *sk;
int err = 0;
@@ -869,42 +868,41 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
lock_sock(sk);
if (sk->sk_type != SOCK_SEQPACKET) {
err = -EOPNOTSUPP;
- goto out;
+ goto out_release;
}
if (sk->sk_state != TCP_LISTEN) {
err = -EINVAL;
- goto out;
+ goto out_release;
}
/*
* The write queue this time is holding sockets ready to use
* hooked into the SABM we saved
*/
- add_wait_queue(sk->sk_sleep, &wait);
for (;;) {
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
break;
- current->state = TASK_INTERRUPTIBLE;
- release_sock(sk);
if (flags & O_NONBLOCK) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -EWOULDBLOCK;
+ err = -EWOULDBLOCK;
+ break;
}
- if (!signal_pending(tsk)) {
+ if (!signal_pending(current)) {
+ release_sock(sk);
schedule();
lock_sock(sk);
continue;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
- return -ERESTARTSYS;
+ err = -ERESTARTSYS;
+ break;
}
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sk_sleep, &wait);
+ finish_wait(sk->sk_sleep, &wait);
+ if (err)
+ goto out_release;
newsk = skb->sk;
newsk->sk_socket = newsock;
@@ -916,7 +914,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
sk->sk_ack_backlog--;
newsock->sk = newsk;
-out:
+out_release:
release_sock(sk);
return err;
@@ -1105,9 +1103,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
*/
SOCK_DEBUG(sk, "ROSE: Appending user data\n");
- asmptr = skb->h.raw = skb_put(skb, len);
+ skb_reset_transport_header(skb);
+ skb_put(skb, len);
- err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (err) {
kfree_skb(skb);
return err;
@@ -1155,7 +1154,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
int lg;
/* Save a copy of the Header */
- memcpy(header, skb->data, ROSE_MIN_LEN);
+ skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN);
skb_pull(skb, ROSE_MIN_LEN);
frontlen = skb_headroom(skb);
@@ -1175,12 +1174,12 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
/* Copy the user data */
- memcpy(skb_put(skbn, lg), skb->data, lg);
+ skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg);
skb_pull(skb, lg);
/* Duplicate the Header */
skb_push(skbn, ROSE_MIN_LEN);
- memcpy(skbn->data, header, ROSE_MIN_LEN);
+ skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN);
if (skb->len > 0)
skbn->data[2] |= M_BIT;
@@ -1234,7 +1233,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
*asmptr = qbit;
}
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
copied = skb->len;
if (copied > size) {
@@ -1296,6 +1295,9 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *) argp);
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *) argp);
+
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 3e41bd93ab9..cd01642f049 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -77,7 +77,7 @@ static void rose_loopback_timer(unsigned long param)
dest = (rose_address *)(skb->data + 4);
lci_o = 0xFFF - lci_i;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
sk = rose_find_socket(lci_o, &rose_loopback_neigh);
if (sk) {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a1233e1b1ab..929a784a86d 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -596,7 +596,7 @@ struct net_device *rose_dev_first(void)
struct net_device *dev, *first = NULL;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
@@ -614,12 +614,13 @@ struct net_device *rose_dev_get(rose_address *addr)
struct net_device *dev;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
dev_hold(dev);
goto out;
}
}
+ dev = NULL;
out:
read_unlock(&dev_base_lock);
return dev;
@@ -630,10 +631,11 @@ static int rose_dev_exists(rose_address *addr)
struct net_device *dev;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev != NULL; dev = dev->next) {
+ for_each_netdev(dev) {
if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
goto out;
}
+ dev = NULL;
out:
read_unlock(&dev_base_lock);
return dev != NULL;
@@ -906,7 +908,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
}
}
else {
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
res = rose_process_rx_frame(sk, skb);
goto out;
}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
new file mode 100644
index 00000000000..91b3d52f6f1
--- /dev/null
+++ b/net/rxrpc/Kconfig
@@ -0,0 +1,43 @@
+#
+# RxRPC session sockets
+#
+
+config AF_RXRPC
+ tristate "RxRPC session sockets"
+ depends on EXPERIMENTAL
+ select KEYS
+ help
+ Say Y or M here to include support for RxRPC session sockets (just
+ the transport part, not the presentation part: (un)marshalling is
+ left to the application).
+
+ These are used for AFS kernel filesystem and userspace utilities.
+
+ This module at the moment only supports client operations and is
+ currently incomplete.
+
+ See Documentation/networking/rxrpc.txt.
+
+
+config AF_RXRPC_DEBUG
+ bool "RxRPC dynamic debugging"
+ depends on AF_RXRPC
+ help
+ Say Y here to make runtime controllable debugging messages appear.
+
+ See Documentation/networking/rxrpc.txt.
+
+
+config RXKAD
+ tristate "RxRPC Kerberos security"
+ depends on AF_RXRPC
+ select CRYPTO
+ select CRYPTO_MANAGER
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_PCBC
+ select CRYPTO_FCRYPT
+ help
+ Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
+ through the use of the key retention service.
+
+ See Documentation/networking/rxrpc.txt.
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6efcb6f162a..c46867c61c9 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,25 +1,29 @@
#
-# Makefile for Linux kernel Rx RPC
+# Makefile for Linux kernel RxRPC
#
-#CFLAGS += -finstrument-functions
-
-rxrpc-objs := \
- call.o \
- connection.o \
- krxiod.o \
- krxsecd.o \
- krxtimod.o \
- main.o \
- peer.o \
- rxrpc_syms.o \
- transport.o
+af-rxrpc-objs := \
+ af_rxrpc.o \
+ ar-accept.o \
+ ar-ack.o \
+ ar-call.o \
+ ar-connection.o \
+ ar-connevent.o \
+ ar-error.o \
+ ar-input.o \
+ ar-key.o \
+ ar-local.o \
+ ar-output.o \
+ ar-peer.o \
+ ar-recvmsg.o \
+ ar-security.o \
+ ar-skbuff.o \
+ ar-transport.o
ifeq ($(CONFIG_PROC_FS),y)
-rxrpc-objs += proc.o
-endif
-ifeq ($(CONFIG_SYSCTL),y)
-rxrpc-objs += sysctl.o
+af-rxrpc-objs += ar-proc.o
endif
-obj-$(CONFIG_RXRPC) := rxrpc.o
+obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
+
+obj-$(CONFIG_RXKAD) += rxkad.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
new file mode 100644
index 00000000000..2c57df9c131
--- /dev/null
+++ b/net/rxrpc/af_rxrpc.c
@@ -0,0 +1,879 @@
+/* AF_RXRPC implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+MODULE_DESCRIPTION("RxRPC network protocol");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_RXRPC);
+
+unsigned rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "RxRPC debugging mask");
+
+static int sysctl_rxrpc_max_qlen __read_mostly = 10;
+
+static struct proto rxrpc_proto;
+static const struct proto_ops rxrpc_rpc_ops;
+
+/* local epoch for detecting local-end reset */
+__be32 rxrpc_epoch;
+
+/* current debugging ID */
+atomic_t rxrpc_debug_id;
+
+/* count of skbs currently in use */
+atomic_t rxrpc_n_skbs;
+
+struct workqueue_struct *rxrpc_workqueue;
+
+static void rxrpc_sock_destructor(struct sock *);
+
+/*
+ * see if an RxRPC socket is currently writable
+ */
+static inline int rxrpc_writable(struct sock *sk)
+{
+ return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+}
+
+/*
+ * wait for write bufferage to become available
+ */
+static void rxrpc_write_space(struct sock *sk)
+{
+ _enter("%p", sk);
+ read_lock(&sk->sk_callback_lock);
+ if (rxrpc_writable(sk)) {
+ if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ wake_up_interruptible(sk->sk_sleep);
+ sk_wake_async(sk, 2, POLL_OUT);
+ }
+ read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * validate an RxRPC address
+ */
+static int rxrpc_validate_address(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
+ int len)
+{
+ if (len < sizeof(struct sockaddr_rxrpc))
+ return -EINVAL;
+
+ if (srx->srx_family != AF_RXRPC)
+ return -EAFNOSUPPORT;
+
+ if (srx->transport_type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ len -= offsetof(struct sockaddr_rxrpc, transport);
+ if (srx->transport_len < sizeof(sa_family_t) ||
+ srx->transport_len > len)
+ return -EINVAL;
+
+ if (srx->transport.family != rx->proto)
+ return -EAFNOSUPPORT;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ _debug("INET: %x @ %u.%u.%u.%u",
+ ntohs(srx->transport.sin.sin_port),
+ NIPQUAD(srx->transport.sin.sin_addr));
+ if (srx->transport_len > 8)
+ memset((void *)&srx->transport + 8, 0,
+ srx->transport_len - 8);
+ break;
+
+ case AF_INET6:
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ return 0;
+}
+
+/*
+ * bind a local address to an RxRPC socket
+ */
+static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr;
+ struct sock *sk = sock->sk;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+ __be16 service_id;
+ int ret;
+
+ _enter("%p,%p,%d", rx, saddr, len);
+
+ ret = rxrpc_validate_address(rx, srx, len);
+ if (ret < 0)
+ goto error;
+
+ lock_sock(&rx->sk);
+
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+ ret = -EINVAL;
+ goto error_unlock;
+ }
+
+ memcpy(&rx->srx, srx, sizeof(rx->srx));
+
+ /* find a local transport endpoint if we don't have one already */
+ local = rxrpc_lookup_local(&rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
+ }
+
+ rx->local = local;
+ if (srx->srx_service) {
+ service_id = htons(srx->srx_service);
+ write_lock_bh(&local->services_lock);
+ list_for_each_entry(prx, &local->services, listen_link) {
+ if (prx->service_id == service_id)
+ goto service_in_use;
+ }
+
+ rx->service_id = service_id;
+ list_add_tail(&rx->listen_link, &local->services);
+ write_unlock_bh(&local->services_lock);
+
+ rx->sk.sk_state = RXRPC_SERVER_BOUND;
+ } else {
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ }
+
+ release_sock(&rx->sk);
+ _leave(" = 0");
+ return 0;
+
+service_in_use:
+ ret = -EADDRINUSE;
+ write_unlock_bh(&local->services_lock);
+error_unlock:
+ release_sock(&rx->sk);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * set the number of pending calls permitted on a listening socket
+ */
+static int rxrpc_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret;
+
+ _enter("%p,%d", rx, backlog);
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNCONNECTED:
+ ret = -EADDRNOTAVAIL;
+ break;
+ case RXRPC_CLIENT_BOUND:
+ case RXRPC_CLIENT_CONNECTED:
+ default:
+ ret = -EBUSY;
+ break;
+ case RXRPC_SERVER_BOUND:
+ ASSERT(rx->local != NULL);
+ sk->sk_max_ack_backlog = backlog;
+ rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+ ret = 0;
+ break;
+ }
+
+ release_sock(&rx->sk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * find a transport by address
+ */
+static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
+ struct sockaddr *addr,
+ int addr_len, int flags,
+ gfp_t gfp)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+ struct rxrpc_transport *trans;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct rxrpc_peer *peer;
+
+ _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+ ASSERT(rx->local != NULL);
+ ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
+
+ if (rx->srx.transport_type != srx->transport_type)
+ return ERR_PTR(-ESOCKTNOSUPPORT);
+ if (rx->srx.transport.family != srx->transport.family)
+ return ERR_PTR(-EAFNOSUPPORT);
+
+ /* find a remote transport endpoint from the local one */
+ peer = rxrpc_get_peer(srx, gfp);
+ if (IS_ERR(peer))
+ return ERR_PTR(PTR_ERR(peer));
+
+ /* find a transport */
+ trans = rxrpc_get_transport(rx->local, peer, gfp);
+ rxrpc_put_peer(peer);
+ _leave(" = %p", trans);
+ return trans;
+}
+
+/**
+ * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
+ * @sock: The socket on which to make the call
+ * @srx: The address of the peer to contact (defaults to socket setting)
+ * @key: The security context to use (defaults to socket setting)
+ * @user_call_ID: The ID to use
+ *
+ * Allow a kernel service to begin a call on the nominated socket. This just
+ * sets up all the internal tracking structures and allocates connection and
+ * call IDs as appropriate. The call to be used is returned.
+ *
+ * The default socket destination address and security may be overridden by
+ * supplying @srx and @key.
+ */
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+ struct sockaddr_rxrpc *srx,
+ struct key *key,
+ unsigned long user_call_ID,
+ gfp_t gfp)
+{
+ struct rxrpc_conn_bundle *bundle;
+ struct rxrpc_transport *trans;
+ struct rxrpc_call *call;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ __be16 service_id;
+
+ _enter(",,%x,%lx", key_serial(key), user_call_ID);
+
+ lock_sock(&rx->sk);
+
+ if (srx) {
+ trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
+ sizeof(*srx), 0, gfp);
+ if (IS_ERR(trans)) {
+ call = ERR_PTR(PTR_ERR(trans));
+ trans = NULL;
+ goto out;
+ }
+ } else {
+ trans = rx->trans;
+ if (!trans) {
+ call = ERR_PTR(-ENOTCONN);
+ goto out;
+ }
+ atomic_inc(&trans->usage);
+ }
+
+ service_id = rx->service_id;
+ if (srx)
+ service_id = htons(srx->srx_service);
+
+ if (!key)
+ key = rx->key;
+ if (key && !key->payload.data)
+ key = NULL; /* a no-security key */
+
+ bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
+ if (IS_ERR(bundle)) {
+ call = ERR_PTR(PTR_ERR(bundle));
+ goto out;
+ }
+
+ call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
+ gfp);
+ rxrpc_put_bundle(trans, bundle);
+out:
+ rxrpc_put_transport(trans);
+ release_sock(&rx->sk);
+ _leave(" = %p", call);
+ return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+
+/**
+ * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @call: The call to end
+ *
+ * Allow a kernel service to end a call it was using. The call must be
+ * complete before this is called (the call should be aborted if necessary).
+ */
+void rxrpc_kernel_end_call(struct rxrpc_call *call)
+{
+ _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+ rxrpc_remove_user_ID(call->socket, call);
+ rxrpc_put_call(call);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_end_call);
+
+/**
+ * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * @sock: The socket to intercept received messages on
+ * @interceptor: The function to pass the messages to
+ *
+ * Allow a kernel service to intercept messages heading for the Rx queue on an
+ * RxRPC socket. They get passed to the specified function instead.
+ * @interceptor should free the socket buffers it is given. @interceptor is
+ * called with the socket receive queue spinlock held and softirqs disabled -
+ * this ensures that the messages will be delivered in the right order.
+ */
+void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+ rxrpc_interceptor_t interceptor)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+
+ _enter("");
+ rx->interceptor = interceptor;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+
+/*
+ * connect an RxRPC socket
+ * - this just targets it at a specific destination; no actual connection
+ * negotiation takes place
+ */
+static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+ struct sock *sk = sock->sk;
+ struct rxrpc_transport *trans;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret;
+
+ _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+ ret = rxrpc_validate_address(rx, srx, addr_len);
+ if (ret < 0) {
+ _leave(" = %d [bad addr]", ret);
+ return ret;
+ }
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNCONNECTED:
+ /* find a local transport endpoint if we don't have one already */
+ ASSERTCMP(rx->local, ==, NULL);
+ rx->srx.srx_family = AF_RXRPC;
+ rx->srx.srx_service = 0;
+ rx->srx.transport_type = srx->transport_type;
+ rx->srx.transport_len = sizeof(sa_family_t);
+ rx->srx.transport.family = srx->transport.family;
+ local = rxrpc_lookup_local(&rx->srx);
+ if (IS_ERR(local)) {
+ release_sock(&rx->sk);
+ return PTR_ERR(local);
+ }
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ case RXRPC_CLIENT_BOUND:
+ break;
+ case RXRPC_CLIENT_CONNECTED:
+ release_sock(&rx->sk);
+ return -EISCONN;
+ default:
+ release_sock(&rx->sk);
+ return -EBUSY; /* server sockets can't connect as well */
+ }
+
+ trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
+ GFP_KERNEL);
+ if (IS_ERR(trans)) {
+ release_sock(&rx->sk);
+ _leave(" = %ld", PTR_ERR(trans));
+ return PTR_ERR(trans);
+ }
+
+ rx->trans = trans;
+ rx->service_id = htons(srx->srx_service);
+ rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+
+ release_sock(&rx->sk);
+ return 0;
+}
+
+/*
+ * send a message through an RxRPC socket
+ * - in a client this does a number of things:
+ * - finds/sets up a connection for the security specified (if any)
+ * - initiates a call (ID in control data)
+ * - ends the request phase of a call (if MSG_MORE is not set)
+ * - sends a call data packet
+ * - may send an abort (abort code in control data)
+ */
+static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t len)
+{
+ struct rxrpc_transport *trans;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ _enter(",{%d},,%zu", rx->sk.sk_state, len);
+
+ if (m->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ if (m->msg_name) {
+ ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
+ if (ret < 0) {
+ _leave(" = %d [bad addr]", ret);
+ return ret;
+ }
+ }
+
+ trans = NULL;
+ lock_sock(&rx->sk);
+
+ if (m->msg_name) {
+ ret = -EISCONN;
+ trans = rxrpc_name_to_transport(sock, m->msg_name,
+ m->msg_namelen, 0, GFP_KERNEL);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
+ } else {
+ trans = rx->trans;
+ if (trans)
+ atomic_inc(&trans->usage);
+ }
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_SERVER_LISTENING:
+ if (!m->msg_name) {
+ ret = rxrpc_server_sendmsg(iocb, rx, m, len);
+ break;
+ }
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_CLIENT_BOUND:
+ if (!m->msg_name) {
+ ret = -ENOTCONN;
+ break;
+ }
+ case RXRPC_CLIENT_CONNECTED:
+ ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
+ break;
+ default:
+ ret = -ENOTCONN;
+ break;
+ }
+
+out:
+ release_sock(&rx->sk);
+ if (trans)
+ rxrpc_put_transport(trans);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * set RxRPC socket options
+ */
+static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int optlen)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ unsigned min_sec_level;
+ int ret;
+
+ _enter(",%d,%d,,%d", level, optname, optlen);
+
+ lock_sock(&rx->sk);
+ ret = -EOPNOTSUPP;
+
+ if (level == SOL_RXRPC) {
+ switch (optname) {
+ case RXRPC_EXCLUSIVE_CONNECTION:
+ ret = -EINVAL;
+ if (optlen != 0)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
+ goto success;
+
+ case RXRPC_SECURITY_KEY:
+ ret = -EINVAL;
+ if (rx->key)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ ret = rxrpc_request_key(rx, optval, optlen);
+ goto error;
+
+ case RXRPC_SECURITY_KEYRING:
+ ret = -EINVAL;
+ if (rx->key)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ ret = rxrpc_server_keyring(rx, optval, optlen);
+ goto error;
+
+ case RXRPC_MIN_SECURITY_LEVEL:
+ ret = -EINVAL;
+ if (optlen != sizeof(unsigned))
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+ goto error;
+ ret = get_user(min_sec_level,
+ (unsigned __user *) optval);
+ if (ret < 0)
+ goto error;
+ ret = -EINVAL;
+ if (min_sec_level > RXRPC_SECURITY_MAX)
+ goto error;
+ rx->min_sec_level = min_sec_level;
+ goto success;
+
+ default:
+ break;
+ }
+ }
+
+success:
+ ret = 0;
+error:
+ release_sock(&rx->sk);
+ return ret;
+}
+
+/*
+ * permit an RxRPC socket to be polled
+ */
+static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ unsigned int mask;
+ struct sock *sk = sock->sk;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+
+ /* the socket is readable if there are any messages waiting on the Rx
+ * queue */
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* the socket is writable if there is space to add new data to the
+ * socket; there is no guarantee that any particular call in progress
+ * on the socket may have space in the Tx ACK window */
+ if (rxrpc_writable(sk))
+ mask |= POLLOUT | POLLWRNORM;
+
+ return mask;
+}
+
+/*
+ * create an RxRPC socket
+ */
+static int rxrpc_create(struct socket *sock, int protocol)
+{
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ _enter("%p,%d", sock, protocol);
+
+ /* we support transport protocol UDP only */
+ if (protocol != PF_INET)
+ return -EPROTONOSUPPORT;
+
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ sock->ops = &rxrpc_rpc_ops;
+ sock->state = SS_UNCONNECTED;
+
+ sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+ sk->sk_state = RXRPC_UNCONNECTED;
+ sk->sk_write_space = rxrpc_write_space;
+ sk->sk_max_ack_backlog = sysctl_rxrpc_max_qlen;
+ sk->sk_destruct = rxrpc_sock_destructor;
+
+ rx = rxrpc_sk(sk);
+ rx->proto = protocol;
+ rx->calls = RB_ROOT;
+
+ INIT_LIST_HEAD(&rx->listen_link);
+ INIT_LIST_HEAD(&rx->secureq);
+ INIT_LIST_HEAD(&rx->acceptq);
+ rwlock_init(&rx->call_lock);
+ memset(&rx->srx, 0, sizeof(rx->srx));
+
+ _leave(" = 0 [%p]", rx);
+ return 0;
+}
+
+/*
+ * RxRPC socket destructor
+ */
+static void rxrpc_sock_destructor(struct sock *sk)
+{
+ _enter("%p", sk);
+
+ rxrpc_purge_queue(&sk->sk_receive_queue);
+
+ BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+ BUG_TRAP(sk_unhashed(sk));
+ BUG_TRAP(!sk->sk_socket);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ printk("Attempt to release alive rxrpc socket: %p\n", sk);
+ return;
+ }
+}
+
+/*
+ * release an RxRPC socket
+ */
+static int rxrpc_release_sock(struct sock *sk)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+
+ _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+
+ /* declare the socket closed for business */
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ sk->sk_state = RXRPC_CLOSE;
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
+
+ if (!list_empty(&rx->listen_link)) {
+ write_lock_bh(&rx->local->services_lock);
+ list_del(&rx->listen_link);
+ write_unlock_bh(&rx->local->services_lock);
+ }
+
+ /* try to flush out this socket */
+ rxrpc_release_calls_on_socket(rx);
+ flush_workqueue(rxrpc_workqueue);
+ rxrpc_purge_queue(&sk->sk_receive_queue);
+
+ if (rx->conn) {
+ rxrpc_put_connection(rx->conn);
+ rx->conn = NULL;
+ }
+
+ if (rx->bundle) {
+ rxrpc_put_bundle(rx->trans, rx->bundle);
+ rx->bundle = NULL;
+ }
+ if (rx->trans) {
+ rxrpc_put_transport(rx->trans);
+ rx->trans = NULL;
+ }
+ if (rx->local) {
+ rxrpc_put_local(rx->local);
+ rx->local = NULL;
+ }
+
+ key_put(rx->key);
+ rx->key = NULL;
+ key_put(rx->securities);
+ rx->securities = NULL;
+ sock_put(sk);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * release an RxRPC BSD socket on close() or equivalent
+ */
+static int rxrpc_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ _enter("%p{%p}", sock, sk);
+
+ if (!sk)
+ return 0;
+
+ sock->sk = NULL;
+
+ return rxrpc_release_sock(sk);
+}
+
+/*
+ * RxRPC network protocol
+ */
+static const struct proto_ops rxrpc_rpc_ops = {
+ .family = PF_UNIX,
+ .owner = THIS_MODULE,
+ .release = rxrpc_release,
+ .bind = rxrpc_bind,
+ .connect = rxrpc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = rxrpc_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = rxrpc_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = rxrpc_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = rxrpc_sendmsg,
+ .recvmsg = rxrpc_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct proto rxrpc_proto = {
+ .name = "RXRPC",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct rxrpc_sock),
+ .max_header = sizeof(struct rxrpc_header),
+};
+
+static struct net_proto_family rxrpc_family_ops = {
+ .family = PF_RXRPC,
+ .create = rxrpc_create,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * initialise and register the RxRPC protocol
+ */
+static int __init af_rxrpc_init(void)
+{
+ struct sk_buff *dummy_skb;
+ int ret = -1;
+
+ BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
+
+ rxrpc_epoch = htonl(xtime.tv_sec);
+
+ ret = -ENOMEM;
+ rxrpc_call_jar = kmem_cache_create(
+ "rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!rxrpc_call_jar) {
+ printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+ goto error_call_jar;
+ }
+
+ rxrpc_workqueue = create_workqueue("krxrpcd");
+ if (!rxrpc_workqueue) {
+ printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+ goto error_work_queue;
+ }
+
+ ret = proto_register(&rxrpc_proto, 1);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+ goto error_proto;
+ }
+
+ ret = sock_register(&rxrpc_family_ops);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+ goto error_sock;
+ }
+
+ ret = register_key_type(&key_type_rxrpc);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+ goto error_key_type;
+ }
+
+ ret = register_key_type(&key_type_rxrpc_s);
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+ goto error_key_type_s;
+ }
+
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
+ proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+#endif
+ return 0;
+
+error_key_type_s:
+ unregister_key_type(&key_type_rxrpc);
+error_key_type:
+ sock_unregister(PF_RXRPC);
+error_sock:
+ proto_unregister(&rxrpc_proto);
+error_proto:
+ destroy_workqueue(rxrpc_workqueue);
+error_work_queue:
+ kmem_cache_destroy(rxrpc_call_jar);
+error_call_jar:
+ return ret;
+}
+
+/*
+ * unregister the RxRPC protocol
+ */
+static void __exit af_rxrpc_exit(void)
+{
+ _enter("");
+ unregister_key_type(&key_type_rxrpc_s);
+ unregister_key_type(&key_type_rxrpc);
+ sock_unregister(PF_RXRPC);
+ proto_unregister(&rxrpc_proto);
+ rxrpc_destroy_all_calls();
+ rxrpc_destroy_all_connections();
+ rxrpc_destroy_all_transports();
+ rxrpc_destroy_all_peers();
+ rxrpc_destroy_all_locals();
+
+ ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+
+ _debug("flush scheduled work");
+ flush_workqueue(rxrpc_workqueue);
+ proc_net_remove("rxrpc_conns");
+ proc_net_remove("rxrpc_calls");
+ destroy_workqueue(rxrpc_workqueue);
+ kmem_cache_destroy(rxrpc_call_jar);
+ _leave("");
+}
+
+module_init(af_rxrpc_init);
+module_exit(af_rxrpc_exit);
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
new file mode 100644
index 00000000000..92a87fde8bf
--- /dev/null
+++ b/net/rxrpc/ar-accept.c
@@ -0,0 +1,504 @@
+/* incoming call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
+ struct rxrpc_header *hdr)
+{
+ struct msghdr msg;
+ struct kvec iov[1];
+ size_t len;
+ int ret;
+
+ _enter("%d,,", local->debug_id);
+
+ msg.msg_name = &srx->transport.sin;
+ msg.msg_namelen = sizeof(srx->transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr->seq = 0;
+ hdr->type = RXRPC_PACKET_TYPE_BUSY;
+ hdr->flags = 0;
+ hdr->userStatus = 0;
+ hdr->_rsvd = 0;
+
+ iov[0].iov_base = hdr;
+ iov[0].iov_len = sizeof(*hdr);
+
+ len = iov[0].iov_len;
+
+ hdr->serial = htonl(1);
+ _proto("Tx BUSY %%%u", ntohl(hdr->serial));
+
+ ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
+ if (ret < 0) {
+ _leave(" = -EAGAIN [sendmsg failed: %d]", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * accept an incoming call that needs peer, transport and/or connection setting
+ * up
+ */
+static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_sock *rx,
+ struct sk_buff *skb,
+ struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_transport *trans;
+ struct rxrpc_skb_priv *sp, *nsp;
+ struct rxrpc_peer *peer;
+ struct rxrpc_call *call;
+ struct sk_buff *notification;
+ int ret;
+
+ _enter("");
+
+ sp = rxrpc_skb(skb);
+
+ /* get a notification message to send to the server app */
+ notification = alloc_skb(0, GFP_NOFS);
+ rxrpc_new_skb(notification);
+ notification->mark = RXRPC_SKB_MARK_NEW_CALL;
+
+ peer = rxrpc_get_peer(srx, GFP_NOIO);
+ if (IS_ERR(peer)) {
+ _debug("no peer");
+ ret = -EBUSY;
+ goto error;
+ }
+
+ trans = rxrpc_get_transport(local, peer, GFP_NOIO);
+ rxrpc_put_peer(peer);
+ if (!trans) {
+ _debug("no trans");
+ ret = -EBUSY;
+ goto error;
+ }
+
+ conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO);
+ rxrpc_put_transport(trans);
+ if (IS_ERR(conn)) {
+ _debug("no conn");
+ ret = PTR_ERR(conn);
+ goto error;
+ }
+
+ call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO);
+ rxrpc_put_connection(conn);
+ if (IS_ERR(call)) {
+ _debug("no call");
+ ret = PTR_ERR(call);
+ goto error;
+ }
+
+ /* attach the call to the socket */
+ read_lock_bh(&local->services_lock);
+ if (rx->sk.sk_state == RXRPC_CLOSE)
+ goto invalid_service;
+
+ write_lock(&rx->call_lock);
+ if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
+ rxrpc_get_call(call);
+
+ spin_lock(&call->conn->state_lock);
+ if (sp->hdr.securityIndex > 0 &&
+ call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+ _debug("await conn sec");
+ list_add_tail(&call->accept_link, &rx->secureq);
+ call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
+ atomic_inc(&call->conn->usage);
+ set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+ rxrpc_queue_conn(call->conn);
+ } else {
+ _debug("conn ready");
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ list_add_tail(&call->accept_link, &rx->acceptq);
+ rxrpc_get_call(call);
+ nsp = rxrpc_skb(notification);
+ nsp->call = call;
+
+ ASSERTCMP(atomic_read(&call->usage), >=, 3);
+
+ _debug("notify");
+ spin_lock(&call->lock);
+ ret = rxrpc_queue_rcv_skb(call, notification, true,
+ false);
+ spin_unlock(&call->lock);
+ notification = NULL;
+ if (ret < 0)
+ BUG();
+ }
+ spin_unlock(&call->conn->state_lock);
+
+ _debug("queued");
+ }
+ write_unlock(&rx->call_lock);
+
+ _debug("process");
+ rxrpc_fast_process_packet(call, skb);
+
+ _debug("done");
+ read_unlock_bh(&local->services_lock);
+ rxrpc_free_skb(notification);
+ rxrpc_put_call(call);
+ _leave(" = 0");
+ return 0;
+
+invalid_service:
+ _debug("invalid");
+ read_unlock_bh(&local->services_lock);
+
+ read_lock_bh(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) {
+ rxrpc_get_call(call);
+ rxrpc_queue_call(call);
+ }
+ read_unlock_bh(&call->state_lock);
+ rxrpc_put_call(call);
+ ret = -ECONNREFUSED;
+error:
+ rxrpc_free_skb(notification);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * accept incoming calls that need peer, transport and/or connection setting up
+ * - the packets we get are all incoming client DATA packets that have seq == 1
+ */
+void rxrpc_accept_incoming_calls(struct work_struct *work)
+{
+ struct rxrpc_local *local =
+ container_of(work, struct rxrpc_local, acceptor);
+ struct rxrpc_skb_priv *sp;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_sock *rx;
+ struct sk_buff *skb;
+ __be16 service_id;
+ int ret;
+
+ _enter("%d", local->debug_id);
+
+ read_lock_bh(&rxrpc_local_lock);
+ if (atomic_read(&local->usage) > 0)
+ rxrpc_get_local(local);
+ else
+ local = NULL;
+ read_unlock_bh(&rxrpc_local_lock);
+ if (!local) {
+ _leave(" [local dead]");
+ return;
+ }
+
+process_next_packet:
+ skb = skb_dequeue(&local->accept_queue);
+ if (!skb) {
+ rxrpc_put_local(local);
+ _leave("\n");
+ return;
+ }
+
+ _net("incoming call skb %p", skb);
+
+ sp = rxrpc_skb(skb);
+
+ /* determine the remote address */
+ memset(&srx, 0, sizeof(srx));
+ srx.srx_family = AF_RXRPC;
+ srx.transport.family = local->srx.transport.family;
+ srx.transport_type = local->srx.transport_type;
+ switch (srx.transport.family) {
+ case AF_INET:
+ srx.transport_len = sizeof(struct sockaddr_in);
+ srx.transport.sin.sin_port = udp_hdr(skb)->source;
+ srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ break;
+ default:
+ goto busy;
+ }
+
+ /* get the socket providing the service */
+ service_id = sp->hdr.serviceId;
+ read_lock_bh(&local->services_lock);
+ list_for_each_entry(rx, &local->services, listen_link) {
+ if (rx->service_id == service_id &&
+ rx->sk.sk_state != RXRPC_CLOSE)
+ goto found_service;
+ }
+ read_unlock_bh(&local->services_lock);
+ goto invalid_service;
+
+found_service:
+ _debug("found service %hd", ntohs(rx->service_id));
+ if (sk_acceptq_is_full(&rx->sk))
+ goto backlog_full;
+ sk_acceptq_added(&rx->sk);
+ sock_hold(&rx->sk);
+ read_unlock_bh(&local->services_lock);
+
+ ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
+ if (ret < 0)
+ sk_acceptq_removed(&rx->sk);
+ sock_put(&rx->sk);
+ switch (ret) {
+ case -ECONNRESET: /* old calls are ignored */
+ case -ECONNABORTED: /* aborted calls are reaborted or ignored */
+ case 0:
+ goto process_next_packet;
+ case -ECONNREFUSED:
+ goto invalid_service;
+ case -EBUSY:
+ goto busy;
+ case -EKEYREJECTED:
+ goto security_mismatch;
+ default:
+ BUG();
+ }
+
+backlog_full:
+ read_unlock_bh(&local->services_lock);
+busy:
+ rxrpc_busy(local, &srx, &sp->hdr);
+ rxrpc_free_skb(skb);
+ goto process_next_packet;
+
+invalid_service:
+ skb->priority = RX_INVALID_OPERATION;
+ rxrpc_reject_packet(local, skb);
+ goto process_next_packet;
+
+ /* can't change connection security type mid-flow */
+security_mismatch:
+ skb->priority = RX_PROTOCOL_ERROR;
+ rxrpc_reject_packet(local, skb);
+ goto process_next_packet;
+}
+
+/*
+ * handle acceptance of a call by userspace
+ * - assign the user call ID to the call at the front of the queue
+ */
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *parent, **pp;
+ int ret;
+
+ _enter(",%lx", user_call_ID);
+
+ ASSERT(!irqs_disabled());
+
+ write_lock(&rx->call_lock);
+
+ ret = -ENODATA;
+ if (list_empty(&rx->acceptq))
+ goto out;
+
+ /* check the user ID isn't already in use */
+ ret = -EBADSLT;
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto out;
+ }
+
+ /* dequeue the first call and check it's still valid */
+ call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+ list_del_init(&call->accept_link);
+ sk_acceptq_removed(&rx->sk);
+
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ break;
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ ret = -ECONNABORTED;
+ goto out_release;
+ case RXRPC_CALL_NETWORK_ERROR:
+ ret = call->conn->error;
+ goto out_release;
+ case RXRPC_CALL_DEAD:
+ ret = -ETIME;
+ goto out_discard;
+ default:
+ BUG();
+ }
+
+ /* formalise the acceptance */
+ call->user_call_ID = user_call_ID;
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
+ BUG();
+ if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events))
+ BUG();
+ rxrpc_queue_call(call);
+
+ rxrpc_get_call(call);
+ write_unlock_bh(&call->state_lock);
+ write_unlock(&rx->call_lock);
+ _leave(" = %p{%d}", call, call->debug_id);
+ return call;
+
+ /* if the call is already dying or dead, then we leave the socket's ref
+ * on it to be released by rxrpc_dead_call_expired() as induced by
+ * rxrpc_release_call() */
+out_release:
+ _debug("release %p", call);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+out_discard:
+ write_unlock_bh(&call->state_lock);
+ _debug("discard %p", call);
+out:
+ write_unlock(&rx->call_lock);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * handle rejectance of a call by userspace
+ * - reject the call at the front of the queue
+ */
+int rxrpc_reject_call(struct rxrpc_sock *rx)
+{
+ struct rxrpc_call *call;
+ int ret;
+
+ _enter("");
+
+ ASSERT(!irqs_disabled());
+
+ write_lock(&rx->call_lock);
+
+ ret = -ENODATA;
+ if (list_empty(&rx->acceptq))
+ goto out;
+
+ /* dequeue the first call and check it's still valid */
+ call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+ list_del_init(&call->accept_link);
+ sk_acceptq_removed(&rx->sk);
+
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ call->state = RXRPC_CALL_SERVER_BUSY;
+ if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events))
+ rxrpc_queue_call(call);
+ ret = 0;
+ goto out_release;
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ ret = -ECONNABORTED;
+ goto out_release;
+ case RXRPC_CALL_NETWORK_ERROR:
+ ret = call->conn->error;
+ goto out_release;
+ case RXRPC_CALL_DEAD:
+ ret = -ETIME;
+ goto out_discard;
+ default:
+ BUG();
+ }
+
+ /* if the call is already dying or dead, then we leave the socket's ref
+ * on it to be released by rxrpc_dead_call_expired() as induced by
+ * rxrpc_release_call() */
+out_release:
+ _debug("release %p", call);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+out_discard:
+ write_unlock_bh(&call->state_lock);
+ _debug("discard %p", call);
+out:
+ write_unlock(&rx->call_lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
+ * @sock: The socket on which the impending call is waiting
+ * @user_call_ID: The tag to attach to the call
+ *
+ * Allow a kernel service to accept an incoming call, assuming the incoming
+ * call is still valid.
+ */
+struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+
+ _enter(",%lx", user_call_ID);
+ call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
+ _leave(" = %p", call);
+ return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_accept_call);
+
+/**
+ * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
+ * @sock: The socket on which the impending call is waiting
+ *
+ * Allow a kernel service to reject an incoming call with a BUSY message,
+ * assuming the incoming call is still valid.
+ */
+int rxrpc_kernel_reject_call(struct socket *sock)
+{
+ int ret;
+
+ _enter("");
+ ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
+ _leave(" = %d", ret);
+ return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
new file mode 100644
index 00000000000..657ee69f213
--- /dev/null
+++ b/net/rxrpc/ar-ack.c
@@ -0,0 +1,1306 @@
+/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static unsigned rxrpc_ack_defer = 1;
+
+static const char *rxrpc_acks[] = {
+ "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
+ "-?-"
+};
+
+static const s8 rxrpc_ack_priority[] = {
+ [0] = 0,
+ [RXRPC_ACK_DELAY] = 1,
+ [RXRPC_ACK_REQUESTED] = 2,
+ [RXRPC_ACK_IDLE] = 3,
+ [RXRPC_ACK_PING_RESPONSE] = 4,
+ [RXRPC_ACK_DUPLICATE] = 5,
+ [RXRPC_ACK_OUT_OF_SEQUENCE] = 6,
+ [RXRPC_ACK_EXCEEDS_WINDOW] = 7,
+ [RXRPC_ACK_NOSPACE] = 8,
+};
+
+/*
+ * propose an ACK be sent
+ */
+void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+ __be32 serial, bool immediate)
+{
+ unsigned long expiry;
+ s8 prior = rxrpc_ack_priority[ack_reason];
+
+ ASSERTCMP(prior, >, 0);
+
+ _enter("{%d},%s,%%%x,%u",
+ call->debug_id, rxrpc_acks[ack_reason], ntohl(serial),
+ immediate);
+
+ if (prior < rxrpc_ack_priority[call->ackr_reason]) {
+ if (immediate)
+ goto cancel_timer;
+ return;
+ }
+
+ /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+ * numbers */
+ if (prior == rxrpc_ack_priority[call->ackr_reason]) {
+ if (prior <= 4)
+ call->ackr_serial = serial;
+ if (immediate)
+ goto cancel_timer;
+ return;
+ }
+
+ call->ackr_reason = ack_reason;
+ call->ackr_serial = serial;
+
+ switch (ack_reason) {
+ case RXRPC_ACK_DELAY:
+ _debug("run delay timer");
+ call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
+ add_timer(&call->ack_timer);
+ return;
+
+ case RXRPC_ACK_IDLE:
+ if (!immediate) {
+ _debug("run defer timer");
+ expiry = 1;
+ goto run_timer;
+ }
+ goto cancel_timer;
+
+ case RXRPC_ACK_REQUESTED:
+ if (!rxrpc_ack_defer)
+ goto cancel_timer;
+ if (!immediate || serial == cpu_to_be32(1)) {
+ _debug("run defer timer");
+ expiry = rxrpc_ack_defer;
+ goto run_timer;
+ }
+
+ default:
+ _debug("immediate ACK");
+ goto cancel_timer;
+ }
+
+run_timer:
+ expiry += jiffies;
+ if (!timer_pending(&call->ack_timer) ||
+ time_after(call->ack_timer.expires, expiry))
+ mod_timer(&call->ack_timer, expiry);
+ return;
+
+cancel_timer:
+ _debug("cancel timer %%%u", ntohl(serial));
+ try_to_del_timer_sync(&call->ack_timer);
+ read_lock_bh(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * propose an ACK be sent, locking the call structure
+ */
+void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+ __be32 serial, bool immediate)
+{
+ s8 prior = rxrpc_ack_priority[ack_reason];
+
+ if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+ spin_lock_bh(&call->lock);
+ __rxrpc_propose_ACK(call, ack_reason, serial, immediate);
+ spin_unlock_bh(&call->lock);
+ }
+}
+
+/*
+ * set the resend timer
+ */
+static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
+ unsigned long resend_at)
+{
+ read_lock_bh(&call->state_lock);
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ resend = 0;
+
+ if (resend & 1) {
+ _debug("SET RESEND");
+ set_bit(RXRPC_CALL_RESEND, &call->events);
+ }
+
+ if (resend & 2) {
+ _debug("MODIFY RESEND TIMER");
+ set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ mod_timer(&call->resend_timer, resend_at);
+ } else {
+ _debug("KILL RESEND TIMER");
+ del_timer_sync(&call->resend_timer);
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ }
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * resend packets
+ */
+static void rxrpc_resend(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_header *hdr;
+ struct sk_buff *txb;
+ unsigned long *p_txb, resend_at;
+ int loop, stop;
+ u8 resend;
+
+ _enter("{%d,%d,%d,%d},",
+ call->acks_hard, call->acks_unacked,
+ atomic_read(&call->sequence),
+ CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+ stop = 0;
+ resend = 0;
+ resend_at = 0;
+
+ for (loop = call->acks_tail;
+ loop != call->acks_head || stop;
+ loop = (loop + 1) & (call->acks_winsz - 1)
+ ) {
+ p_txb = call->acks_window + loop;
+ smp_read_barrier_depends();
+ if (*p_txb & 1)
+ continue;
+
+ txb = (struct sk_buff *) *p_txb;
+ sp = rxrpc_skb(txb);
+
+ if (sp->need_resend) {
+ sp->need_resend = 0;
+
+ /* each Tx packet has a new serial number */
+ sp->hdr.serial =
+ htonl(atomic_inc_return(&call->conn->serial));
+
+ hdr = (struct rxrpc_header *) txb->head;
+ hdr->serial = sp->hdr.serial;
+
+ _proto("Tx DATA %%%u { #%d }",
+ ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+ if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
+ stop = 0;
+ sp->resend_at = jiffies + 3;
+ } else {
+ sp->resend_at =
+ jiffies + rxrpc_resend_timeout * HZ;
+ }
+ }
+
+ if (time_after_eq(jiffies + 1, sp->resend_at)) {
+ sp->need_resend = 1;
+ resend |= 1;
+ } else if (resend & 2) {
+ if (time_before(sp->resend_at, resend_at))
+ resend_at = sp->resend_at;
+ } else {
+ resend_at = sp->resend_at;
+ resend |= 2;
+ }
+ }
+
+ rxrpc_set_resend(call, resend, resend_at);
+ _leave("");
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_timer(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *txb;
+ unsigned long *p_txb, resend_at;
+ int loop;
+ u8 resend;
+
+ _enter("%d,%d,%d",
+ call->acks_tail, call->acks_unacked, call->acks_head);
+
+ resend = 0;
+ resend_at = 0;
+
+ for (loop = call->acks_unacked;
+ loop != call->acks_head;
+ loop = (loop + 1) & (call->acks_winsz - 1)
+ ) {
+ p_txb = call->acks_window + loop;
+ smp_read_barrier_depends();
+ txb = (struct sk_buff *) (*p_txb & ~1);
+ sp = rxrpc_skb(txb);
+
+ ASSERT(!(*p_txb & 1));
+
+ if (sp->need_resend) {
+ ;
+ } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+ sp->need_resend = 1;
+ resend |= 1;
+ } else if (resend & 2) {
+ if (time_before(sp->resend_at, resend_at))
+ resend_at = sp->resend_at;
+ } else {
+ resend_at = sp->resend_at;
+ resend |= 2;
+ }
+ }
+
+ rxrpc_set_resend(call, resend, resend_at);
+ _leave("");
+}
+
+/*
+ * process soft ACKs of our transmitted packets
+ * - these indicate packets the peer has or has not received, but hasn't yet
+ * given to the consumer, and so can still be discarded and re-requested
+ */
+static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
+ struct rxrpc_ackpacket *ack,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *txb;
+ unsigned long *p_txb, resend_at;
+ int loop;
+ u8 sacks[RXRPC_MAXACKS], resend;
+
+ _enter("{%d,%d},{%d},",
+ call->acks_hard,
+ CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
+ ack->nAcks);
+
+ if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
+ goto protocol_error;
+
+ resend = 0;
+ resend_at = 0;
+ for (loop = 0; loop < ack->nAcks; loop++) {
+ p_txb = call->acks_window;
+ p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
+ smp_read_barrier_depends();
+ txb = (struct sk_buff *) (*p_txb & ~1);
+ sp = rxrpc_skb(txb);
+
+ switch (sacks[loop]) {
+ case RXRPC_ACK_TYPE_ACK:
+ sp->need_resend = 0;
+ *p_txb |= 1;
+ break;
+ case RXRPC_ACK_TYPE_NACK:
+ sp->need_resend = 1;
+ *p_txb &= ~1;
+ resend = 1;
+ break;
+ default:
+ _debug("Unsupported ACK type %d", sacks[loop]);
+ goto protocol_error;
+ }
+ }
+
+ smp_mb();
+ call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
+
+ /* anything not explicitly ACK'd is implicitly NACK'd, but may just not
+ * have been received or processed yet by the far end */
+ for (loop = call->acks_unacked;
+ loop != call->acks_head;
+ loop = (loop + 1) & (call->acks_winsz - 1)
+ ) {
+ p_txb = call->acks_window + loop;
+ smp_read_barrier_depends();
+ txb = (struct sk_buff *) (*p_txb & ~1);
+ sp = rxrpc_skb(txb);
+
+ if (*p_txb & 1) {
+ /* packet must have been discarded */
+ sp->need_resend = 1;
+ *p_txb &= ~1;
+ resend |= 1;
+ } else if (sp->need_resend) {
+ ;
+ } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+ sp->need_resend = 1;
+ resend |= 1;
+ } else if (resend & 2) {
+ if (time_before(sp->resend_at, resend_at))
+ resend_at = sp->resend_at;
+ } else {
+ resend_at = sp->resend_at;
+ resend |= 2;
+ }
+ }
+
+ rxrpc_set_resend(call, resend, resend_at);
+ _leave(" = 0");
+ return 0;
+
+protocol_error:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+}
+
+/*
+ * discard hard-ACK'd packets from the Tx window
+ */
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
+{
+ struct rxrpc_skb_priv *sp;
+ unsigned long _skb;
+ int tail = call->acks_tail, old_tail;
+ int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
+
+ _enter("{%u,%u},%u", call->acks_hard, win, hard);
+
+ ASSERTCMP(hard - call->acks_hard, <=, win);
+
+ while (call->acks_hard < hard) {
+ smp_read_barrier_depends();
+ _skb = call->acks_window[tail] & ~1;
+ sp = rxrpc_skb((struct sk_buff *) _skb);
+ rxrpc_free_skb((struct sk_buff *) _skb);
+ old_tail = tail;
+ tail = (tail + 1) & (call->acks_winsz - 1);
+ call->acks_tail = tail;
+ if (call->acks_unacked == old_tail)
+ call->acks_unacked = tail;
+ call->acks_hard++;
+ }
+
+ wake_up(&call->tx_waitq);
+}
+
+/*
+ * clear the Tx window in the event of a failure
+ */
+static void rxrpc_clear_tx_window(struct rxrpc_call *call)
+{
+ rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
+}
+
+/*
+ * drain the out of sequence received packet queue into the packet Rx queue
+ */
+static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ bool terminal;
+ int ret;
+
+ _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
+
+ spin_lock_bh(&call->lock);
+
+ ret = -ECONNRESET;
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
+ goto socket_unavailable;
+
+ skb = skb_dequeue(&call->rx_oos_queue);
+ if (skb) {
+ sp = rxrpc_skb(skb);
+
+ _debug("drain OOS packet %d [%d]",
+ ntohl(sp->hdr.seq), call->rx_first_oos);
+
+ if (ntohl(sp->hdr.seq) != call->rx_first_oos) {
+ skb_queue_head(&call->rx_oos_queue, skb);
+ call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq);
+ _debug("requeue %p {%u}", skb, call->rx_first_oos);
+ } else {
+ skb->mark = RXRPC_SKB_MARK_DATA;
+ terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+ !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+ ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
+ BUG_ON(ret < 0);
+ _debug("drain #%u", call->rx_data_post);
+ call->rx_data_post++;
+
+ /* find out what the next packet is */
+ skb = skb_peek(&call->rx_oos_queue);
+ if (skb)
+ call->rx_first_oos =
+ ntohl(rxrpc_skb(skb)->hdr.seq);
+ else
+ call->rx_first_oos = 0;
+ _debug("peek %p {%u}", skb, call->rx_first_oos);
+ }
+ }
+
+ ret = 0;
+socket_unavailable:
+ spin_unlock_bh(&call->lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * insert an out of sequence packet into the buffer
+ */
+static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp, *psp;
+ struct sk_buff *p;
+ u32 seq;
+
+ sp = rxrpc_skb(skb);
+ seq = ntohl(sp->hdr.seq);
+ _enter(",,{%u}", seq);
+
+ skb->destructor = rxrpc_packet_destructor;
+ ASSERTCMP(sp->call, ==, NULL);
+ sp->call = call;
+ rxrpc_get_call(call);
+
+ /* insert into the buffer in sequence order */
+ spin_lock_bh(&call->lock);
+
+ skb_queue_walk(&call->rx_oos_queue, p) {
+ psp = rxrpc_skb(p);
+ if (ntohl(psp->hdr.seq) > seq) {
+ _debug("insert oos #%u before #%u",
+ seq, ntohl(psp->hdr.seq));
+ skb_insert(p, skb, &call->rx_oos_queue);
+ goto inserted;
+ }
+ }
+
+ _debug("append oos #%u", seq);
+ skb_queue_tail(&call->rx_oos_queue, skb);
+inserted:
+
+ /* we might now have a new front to the queue */
+ if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
+ call->rx_first_oos = seq;
+
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ call->rx_data_post == call->rx_first_oos) {
+ _debug("drain rx oos now");
+ set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+ }
+ read_unlock(&call->state_lock);
+
+ spin_unlock_bh(&call->lock);
+ _leave(" [stored #%u]", call->rx_first_oos);
+}
+
+/*
+ * clear the Tx window on final ACK reception
+ */
+static void rxrpc_zap_tx_window(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ unsigned long _skb, *acks_window;
+ uint8_t winsz = call->acks_winsz;
+ int tail;
+
+ acks_window = call->acks_window;
+ call->acks_window = NULL;
+
+ while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
+ tail = call->acks_tail;
+ smp_read_barrier_depends();
+ _skb = acks_window[tail] & ~1;
+ smp_mb();
+ call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
+
+ skb = (struct sk_buff *) _skb;
+ sp = rxrpc_skb(skb);
+ _debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+ rxrpc_free_skb(skb);
+ }
+
+ kfree(acks_window);
+}
+
+/*
+ * process the extra information that may be appended to an ACK packet
+ */
+static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned latest, int nAcks)
+{
+ struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_peer *peer;
+ unsigned mtu;
+
+ if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
+ _leave(" [no ackinfo]");
+ return;
+ }
+
+ _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
+ latest,
+ ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
+ ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
+
+ mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
+
+ peer = call->conn->trans->peer;
+ if (mtu < peer->maxdata) {
+ spin_lock_bh(&peer->lock);
+ peer->maxdata = mtu;
+ peer->mtu = mtu + peer->hdrsize;
+ spin_unlock_bh(&peer->lock);
+ _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+ }
+}
+
+/*
+ * process packets in the reception queue
+ */
+static int rxrpc_process_rx_queue(struct rxrpc_call *call,
+ u32 *_abort_code)
+{
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ bool post_ACK;
+ int latest;
+ u32 hard, tx;
+
+ _enter("");
+
+process_further:
+ skb = skb_dequeue(&call->rx_queue);
+ if (!skb)
+ return -EAGAIN;
+
+ _net("deferred skb %p", skb);
+
+ sp = rxrpc_skb(skb);
+
+ _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
+
+ post_ACK = false;
+
+ switch (sp->hdr.type) {
+ /* data packets that wind up here have been received out of
+ * order, need security processing or are jumbo packets */
+ case RXRPC_PACKET_TYPE_DATA:
+ _proto("OOSQ DATA %%%u { #%u }",
+ ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+ /* secured packets must be verified and possibly decrypted */
+ if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
+ goto protocol_error;
+
+ rxrpc_insert_oos_packet(call, skb);
+ goto process_further;
+
+ /* partial ACK to process */
+ case RXRPC_PACKET_TYPE_ACK:
+ if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
+ _debug("extraction failure");
+ goto protocol_error;
+ }
+ if (!skb_pull(skb, sizeof(ack)))
+ BUG();
+
+ latest = ntohl(sp->hdr.serial);
+ hard = ntohl(ack.firstPacket);
+ tx = atomic_read(&call->sequence);
+
+ _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+ latest,
+ ntohs(ack.maxSkew),
+ hard,
+ ntohl(ack.previousPacket),
+ ntohl(ack.serial),
+ rxrpc_acks[ack.reason],
+ ack.nAcks);
+
+ rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
+
+ if (ack.reason == RXRPC_ACK_PING) {
+ _proto("Rx ACK %%%u PING Request", latest);
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+ sp->hdr.serial, true);
+ }
+
+ /* discard any out-of-order or duplicate ACKs */
+ if (latest - call->acks_latest <= 0) {
+ _debug("discard ACK %d <= %d",
+ latest, call->acks_latest);
+ goto discard;
+ }
+ call->acks_latest = latest;
+
+ if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
+ call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
+ goto discard;
+
+ _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
+
+ if (hard > 0) {
+ if (hard - 1 > tx) {
+ _debug("hard-ACK'd packet %d not transmitted"
+ " (%d top)",
+ hard - 1, tx);
+ goto protocol_error;
+ }
+
+ if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
+ call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
+ hard > tx)
+ goto all_acked;
+
+ smp_rmb();
+ rxrpc_rotate_tx_window(call, hard - 1);
+ }
+
+ if (ack.nAcks > 0) {
+ if (hard - 1 + ack.nAcks > tx) {
+ _debug("soft-ACK'd packet %d+%d not"
+ " transmitted (%d top)",
+ hard - 1, ack.nAcks, tx);
+ goto protocol_error;
+ }
+
+ if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
+ goto protocol_error;
+ }
+ goto discard;
+
+ /* complete ACK to process */
+ case RXRPC_PACKET_TYPE_ACKALL:
+ goto all_acked;
+
+ /* abort and busy are handled elsewhere */
+ case RXRPC_PACKET_TYPE_BUSY:
+ case RXRPC_PACKET_TYPE_ABORT:
+ BUG();
+
+ /* connection level events - also handled elsewhere */
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ case RXRPC_PACKET_TYPE_DEBUG:
+ BUG();
+ }
+
+ /* if we've had a hard ACK that covers all the packets we've sent, then
+ * that ends that phase of the operation */
+all_acked:
+ write_lock_bh(&call->state_lock);
+ _debug("ack all %d", call->state);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+ break;
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ _debug("srv complete");
+ call->state = RXRPC_CALL_COMPLETE;
+ post_ACK = true;
+ break;
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ goto protocol_error_unlock; /* can't occur yet */
+ default:
+ write_unlock_bh(&call->state_lock);
+ goto discard; /* assume packet left over from earlier phase */
+ }
+
+ write_unlock_bh(&call->state_lock);
+
+ /* if all the packets we sent are hard-ACK'd, then we can discard
+ * whatever we've got left */
+ _debug("clear Tx %d",
+ CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+ del_timer_sync(&call->resend_timer);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+
+ if (call->acks_window)
+ rxrpc_zap_tx_window(call);
+
+ if (post_ACK) {
+ /* post the final ACK message for userspace to pick up */
+ _debug("post ACK");
+ skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
+ sp->call = call;
+ rxrpc_get_call(call);
+ spin_lock_bh(&call->lock);
+ if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
+ BUG();
+ spin_unlock_bh(&call->lock);
+ goto process_further;
+ }
+
+discard:
+ rxrpc_free_skb(skb);
+ goto process_further;
+
+protocol_error_unlock:
+ write_unlock_bh(&call->state_lock);
+protocol_error:
+ rxrpc_free_skb(skb);
+ _leave(" = -EPROTO");
+ return -EPROTO;
+}
+
+/*
+ * post a message to the socket Rx queue for recvmsg() to pick up
+ */
+static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
+ bool fatal)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ int ret;
+
+ _enter("{%d,%lx},%u,%u,%d",
+ call->debug_id, call->flags, mark, error, fatal);
+
+ /* remove timers and things for fatal messages */
+ if (fatal) {
+ del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->ack_timer);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ }
+
+ if (mark != RXRPC_SKB_MARK_NEW_CALL &&
+ !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ _leave("[no userid]");
+ return 0;
+ }
+
+ if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+ skb = alloc_skb(0, GFP_NOFS);
+ if (!skb)
+ return -ENOMEM;
+
+ rxrpc_new_skb(skb);
+
+ skb->mark = mark;
+
+ sp = rxrpc_skb(skb);
+ memset(sp, 0, sizeof(*sp));
+ sp->error = error;
+ sp->call = call;
+ rxrpc_get_call(call);
+
+ spin_lock_bh(&call->lock);
+ ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
+ spin_unlock_bh(&call->lock);
+ if (ret < 0)
+ BUG();
+ }
+
+ return 0;
+}
+
+/*
+ * handle background processing of incoming call packets and ACK / abort
+ * generation
+ */
+void rxrpc_process_call(struct work_struct *work)
+{
+ struct rxrpc_call *call =
+ container_of(work, struct rxrpc_call, processor);
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_header hdr;
+ struct msghdr msg;
+ struct kvec iov[5];
+ unsigned long bits;
+ __be32 data, pad;
+ size_t len;
+ int genbit, loop, nbit, ioc, ret, mtu;
+ u32 abort_code = RX_PROTOCOL_ERROR;
+ u8 *acks = NULL;
+
+ //printk("\n--------------------\n");
+ _enter("{%d,%s,%lx} [%lu]",
+ call->debug_id, rxrpc_call_states[call->state], call->events,
+ (jiffies - call->creation_jif) / (HZ / 10));
+
+ if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
+ _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
+ return;
+ }
+
+ /* there's a good chance we're going to have to send a message, so set
+ * one up in advance */
+ msg.msg_name = &call->conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(call->conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr.epoch = call->conn->epoch;
+ hdr.cid = call->cid;
+ hdr.callNumber = call->call_id;
+ hdr.seq = 0;
+ hdr.type = RXRPC_PACKET_TYPE_ACK;
+ hdr.flags = call->conn->out_clientflag;
+ hdr.userStatus = 0;
+ hdr.securityIndex = call->conn->security_ix;
+ hdr._rsvd = 0;
+ hdr.serviceId = call->conn->service_id;
+
+ memset(iov, 0, sizeof(iov));
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+
+ /* deal with events of a final nature */
+ if (test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+ rxrpc_release_call(call);
+ clear_bit(RXRPC_CALL_RELEASE, &call->events);
+ }
+
+ if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) {
+ int error;
+
+ clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+ clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+ clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+ error = call->conn->trans->peer->net_error;
+ _debug("post net error %d", error);
+
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
+ error, true) < 0)
+ goto no_mem;
+ clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+ goto kill_ACKs;
+ }
+
+ if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) {
+ ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+ clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+ clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+ _debug("post conn abort");
+
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+ call->conn->error, true) < 0)
+ goto no_mem;
+ clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+ goto kill_ACKs;
+ }
+
+ if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) {
+ hdr.type = RXRPC_PACKET_TYPE_BUSY;
+ genbit = RXRPC_CALL_REJECT_BUSY;
+ goto send_message;
+ }
+
+ if (test_bit(RXRPC_CALL_ABORT, &call->events)) {
+ ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+ ECONNABORTED, true) < 0)
+ goto no_mem;
+ hdr.type = RXRPC_PACKET_TYPE_ABORT;
+ data = htonl(call->abort_code);
+ iov[1].iov_base = &data;
+ iov[1].iov_len = sizeof(data);
+ genbit = RXRPC_CALL_ABORT;
+ goto send_message;
+ }
+
+ if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
+ genbit = RXRPC_CALL_ACK_FINAL;
+
+ ack.bufferSpace = htons(8);
+ ack.maxSkew = 0;
+ ack.serial = 0;
+ ack.reason = RXRPC_ACK_IDLE;
+ ack.nAcks = 0;
+ call->ackr_reason = 0;
+
+ spin_lock_bh(&call->lock);
+ ack.serial = call->ackr_serial;
+ ack.previousPacket = call->ackr_prev_seq;
+ ack.firstPacket = htonl(call->rx_data_eaten + 1);
+ spin_unlock_bh(&call->lock);
+
+ pad = 0;
+
+ iov[1].iov_base = &ack;
+ iov[1].iov_len = sizeof(ack);
+ iov[2].iov_base = &pad;
+ iov[2].iov_len = 3;
+ iov[3].iov_base = &ackinfo;
+ iov[3].iov_len = sizeof(ackinfo);
+ goto send_ACK;
+ }
+
+ if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
+ (1 << RXRPC_CALL_RCVD_ABORT))
+ ) {
+ u32 mark;
+
+ if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events))
+ mark = RXRPC_SKB_MARK_REMOTE_ABORT;
+ else
+ mark = RXRPC_SKB_MARK_BUSY;
+
+ _debug("post abort/busy");
+ rxrpc_clear_tx_window(call);
+ if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
+ goto no_mem;
+
+ clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+ clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ goto kill_ACKs;
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) {
+ _debug("do implicit ackall");
+ rxrpc_clear_tx_window(call);
+ }
+
+ if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) {
+ write_lock_bh(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_CALL_TIMEOUT;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ }
+ write_unlock_bh(&call->state_lock);
+
+ _debug("post timeout");
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+ ETIME, true) < 0)
+ goto no_mem;
+
+ clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+ goto kill_ACKs;
+ }
+
+ /* deal with assorted inbound messages */
+ if (!skb_queue_empty(&call->rx_queue)) {
+ switch (rxrpc_process_rx_queue(call, &abort_code)) {
+ case 0:
+ case -EAGAIN:
+ break;
+ case -ENOMEM:
+ goto no_mem;
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EPROTO:
+ rxrpc_abort_call(call, abort_code);
+ goto kill_ACKs;
+ }
+ }
+
+ /* handle resending */
+ if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+ rxrpc_resend_timer(call);
+ if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events))
+ rxrpc_resend(call);
+
+ /* consider sending an ordinary ACK */
+ if (test_bit(RXRPC_CALL_ACK, &call->events)) {
+ _debug("send ACK: window: %d - %d { %lx }",
+ call->rx_data_eaten, call->ackr_win_top,
+ call->ackr_window[0]);
+
+ if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
+ /* ACK by sending reply DATA packet in this state */
+ clear_bit(RXRPC_CALL_ACK, &call->events);
+ goto maybe_reschedule;
+ }
+
+ genbit = RXRPC_CALL_ACK;
+
+ acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
+ GFP_NOFS);
+ if (!acks)
+ goto no_mem;
+
+ //hdr.flags = RXRPC_SLOW_START_OK;
+ ack.bufferSpace = htons(8);
+ ack.maxSkew = 0;
+ ack.serial = 0;
+ ack.reason = 0;
+
+ spin_lock_bh(&call->lock);
+ ack.reason = call->ackr_reason;
+ ack.serial = call->ackr_serial;
+ ack.previousPacket = call->ackr_prev_seq;
+ ack.firstPacket = htonl(call->rx_data_eaten + 1);
+
+ ack.nAcks = 0;
+ for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+ nbit = loop * BITS_PER_LONG;
+ for (bits = call->ackr_window[loop]; bits; bits >>= 1
+ ) {
+ _debug("- l=%d n=%d b=%lx", loop, nbit, bits);
+ if (bits & 1) {
+ acks[nbit] = RXRPC_ACK_TYPE_ACK;
+ ack.nAcks = nbit + 1;
+ }
+ nbit++;
+ }
+ }
+ call->ackr_reason = 0;
+ spin_unlock_bh(&call->lock);
+
+ pad = 0;
+
+ iov[1].iov_base = &ack;
+ iov[1].iov_len = sizeof(ack);
+ iov[2].iov_base = acks;
+ iov[2].iov_len = ack.nAcks;
+ iov[3].iov_base = &pad;
+ iov[3].iov_len = 3;
+ iov[4].iov_base = &ackinfo;
+ iov[4].iov_len = sizeof(ackinfo);
+
+ switch (ack.reason) {
+ case RXRPC_ACK_REQUESTED:
+ case RXRPC_ACK_DUPLICATE:
+ case RXRPC_ACK_OUT_OF_SEQUENCE:
+ case RXRPC_ACK_EXCEEDS_WINDOW:
+ case RXRPC_ACK_NOSPACE:
+ case RXRPC_ACK_PING:
+ case RXRPC_ACK_PING_RESPONSE:
+ goto send_ACK_with_skew;
+ case RXRPC_ACK_DELAY:
+ case RXRPC_ACK_IDLE:
+ goto send_ACK;
+ }
+ }
+
+ /* handle completion of security negotiations on an incoming
+ * connection */
+ if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) {
+ _debug("secured");
+ spin_lock_bh(&call->lock);
+
+ if (call->state == RXRPC_CALL_SERVER_SECURING) {
+ _debug("securing");
+ write_lock(&call->conn->lock);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+ _debug("not released");
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ list_move_tail(&call->accept_link,
+ &call->socket->acceptq);
+ }
+ write_unlock(&call->conn->lock);
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE)
+ set_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+ read_unlock(&call->state_lock);
+ }
+
+ spin_unlock_bh(&call->lock);
+ if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events))
+ goto maybe_reschedule;
+ }
+
+ /* post a notification of an acceptable connection to the app */
+ if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) {
+ _debug("post accept");
+ if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
+ 0, false) < 0)
+ goto no_mem;
+ clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+ goto maybe_reschedule;
+ }
+
+ /* handle incoming call acceptance */
+ if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) {
+ _debug("accepted");
+ ASSERTCMP(call->rx_data_post, ==, 0);
+ call->rx_data_post = 1;
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE)
+ set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+ read_unlock_bh(&call->state_lock);
+ }
+
+ /* drain the out of sequence received packet queue into the packet Rx
+ * queue */
+ if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) {
+ while (call->rx_data_post == call->rx_first_oos)
+ if (rxrpc_drain_rx_oos_queue(call) < 0)
+ break;
+ goto maybe_reschedule;
+ }
+
+ /* other events may have been raised since we started checking */
+ goto maybe_reschedule;
+
+send_ACK_with_skew:
+ ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
+ ntohl(ack.serial));
+send_ACK:
+ mtu = call->conn->trans->peer->if_mtu;
+ mtu -= call->conn->trans->peer->hdrsize;
+ ackinfo.maxMTU = htonl(mtu);
+ ackinfo.rwind = htonl(32);
+
+ /* permit the peer to send us jumbo packets if it wants to */
+ ackinfo.rxMTU = htonl(5692);
+ ackinfo.jumbo_max = htonl(4);
+
+ hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+ _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+ ntohl(hdr.serial),
+ ntohs(ack.maxSkew),
+ ntohl(ack.firstPacket),
+ ntohl(ack.previousPacket),
+ ntohl(ack.serial),
+ rxrpc_acks[ack.reason],
+ ack.nAcks);
+
+ del_timer_sync(&call->ack_timer);
+ if (ack.nAcks > 0)
+ set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
+ goto send_message_2;
+
+send_message:
+ _debug("send message");
+
+ hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+ _proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial));
+send_message_2:
+
+ len = iov[0].iov_len;
+ ioc = 1;
+ if (iov[4].iov_len) {
+ ioc = 5;
+ len += iov[4].iov_len;
+ len += iov[3].iov_len;
+ len += iov[2].iov_len;
+ len += iov[1].iov_len;
+ } else if (iov[3].iov_len) {
+ ioc = 4;
+ len += iov[3].iov_len;
+ len += iov[2].iov_len;
+ len += iov[1].iov_len;
+ } else if (iov[2].iov_len) {
+ ioc = 3;
+ len += iov[2].iov_len;
+ len += iov[1].iov_len;
+ } else if (iov[1].iov_len) {
+ ioc = 2;
+ len += iov[1].iov_len;
+ }
+
+ ret = kernel_sendmsg(call->conn->trans->local->socket,
+ &msg, iov, ioc, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD)
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+ goto error;
+ }
+
+ switch (genbit) {
+ case RXRPC_CALL_ABORT:
+ clear_bit(genbit, &call->events);
+ clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ goto kill_ACKs;
+
+ case RXRPC_CALL_ACK_FINAL:
+ write_lock_bh(&call->state_lock);
+ if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
+ call->state = RXRPC_CALL_COMPLETE;
+ write_unlock_bh(&call->state_lock);
+ goto kill_ACKs;
+
+ default:
+ clear_bit(genbit, &call->events);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ _debug("start ACK timer");
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
+ call->ackr_serial, false);
+ default:
+ break;
+ }
+ goto maybe_reschedule;
+ }
+
+kill_ACKs:
+ del_timer_sync(&call->ack_timer);
+ if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events))
+ rxrpc_put_call(call);
+ clear_bit(RXRPC_CALL_ACK, &call->events);
+
+maybe_reschedule:
+ if (call->events || !skb_queue_empty(&call->rx_queue)) {
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD)
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+ }
+
+ /* don't leave aborted connections on the accept queue */
+ if (call->state >= RXRPC_CALL_COMPLETE &&
+ !list_empty(&call->accept_link)) {
+ _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
+ call, call->events, call->flags,
+ ntohl(call->conn->cid));
+
+ read_lock_bh(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+ }
+
+error:
+ clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
+ kfree(acks);
+
+ /* because we don't want two CPUs both processing the work item for one
+ * call at the same time, we use a flag to note when it's busy; however
+ * this means there's a race between clearing the flag and setting the
+ * work pending bit and the work item being processed again */
+ if (call->events && !work_pending(&call->processor)) {
+ _debug("jumpstart %x", ntohl(call->conn->cid));
+ rxrpc_queue_call(call);
+ }
+
+ _leave("");
+ return;
+
+no_mem:
+ _debug("out of memory");
+ goto maybe_reschedule;
+}
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
new file mode 100644
index 00000000000..4d92d88ff1f
--- /dev/null
+++ b/net/rxrpc/ar-call.c
@@ -0,0 +1,804 @@
+/* RxRPC individual remote procedure call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+struct kmem_cache *rxrpc_call_jar;
+LIST_HEAD(rxrpc_calls);
+DEFINE_RWLOCK(rxrpc_call_lock);
+static unsigned rxrpc_call_max_lifetime = 60;
+static unsigned rxrpc_dead_call_timeout = 2;
+
+static void rxrpc_destroy_call(struct work_struct *work);
+static void rxrpc_call_life_expired(unsigned long _call);
+static void rxrpc_dead_call_expired(unsigned long _call);
+static void rxrpc_ack_time_expired(unsigned long _call);
+static void rxrpc_resend_time_expired(unsigned long _call);
+
+/*
+ * allocate a new call
+ */
+static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+{
+ struct rxrpc_call *call;
+
+ call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
+ if (!call)
+ return NULL;
+
+ call->acks_winsz = 16;
+ call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+ gfp);
+ if (!call->acks_window) {
+ kmem_cache_free(rxrpc_call_jar, call);
+ return NULL;
+ }
+
+ setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
+ (unsigned long) call);
+ setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
+ (unsigned long) call);
+ setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
+ (unsigned long) call);
+ setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
+ (unsigned long) call);
+ INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+ INIT_WORK(&call->processor, &rxrpc_process_call);
+ INIT_LIST_HEAD(&call->accept_link);
+ skb_queue_head_init(&call->rx_queue);
+ skb_queue_head_init(&call->rx_oos_queue);
+ init_waitqueue_head(&call->tx_waitq);
+ spin_lock_init(&call->lock);
+ rwlock_init(&call->state_lock);
+ atomic_set(&call->usage, 1);
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+
+ memset(&call->sock_node, 0xed, sizeof(call->sock_node));
+
+ call->rx_data_expect = 1;
+ call->rx_data_eaten = 0;
+ call->rx_first_oos = 0;
+ call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+ call->creation_jif = jiffies;
+ return call;
+}
+
+/*
+ * allocate a new client call and attempt to to get a connection slot for it
+ */
+static struct rxrpc_call *rxrpc_alloc_client_call(
+ struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle,
+ gfp_t gfp)
+{
+ struct rxrpc_call *call;
+ int ret;
+
+ _enter("");
+
+ ASSERT(rx != NULL);
+ ASSERT(trans != NULL);
+ ASSERT(bundle != NULL);
+
+ call = rxrpc_alloc_call(gfp);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+
+ sock_hold(&rx->sk);
+ call->socket = rx;
+ call->rx_data_post = 1;
+
+ ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
+ if (ret < 0) {
+ kmem_cache_free(rxrpc_call_jar, call);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&call->conn->trans->peer->lock);
+ list_add(&call->error_link, &call->conn->trans->peer->error_targets);
+ spin_unlock(&call->conn->trans->peer->lock);
+
+ call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ add_timer(&call->lifetimer);
+
+ _leave(" = %p", call);
+ return call;
+}
+
+/*
+ * set up a call for the given data
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle,
+ unsigned long user_call_ID,
+ int create,
+ gfp_t gfp)
+{
+ struct rxrpc_call *call, *candidate;
+ struct rb_node *p, *parent, **pp;
+
+ _enter("%p,%d,%d,%lx,%d",
+ rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
+ user_call_ID, create);
+
+ /* search the extant calls first for one that matches the specified
+ * user ID */
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+
+ if (!create || !trans)
+ return ERR_PTR(-EBADSLT);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
+ if (IS_ERR(candidate)) {
+ _leave(" = %ld", PTR_ERR(candidate));
+ return candidate;
+ }
+
+ candidate->user_call_ID = user_call_ID;
+ __set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+
+ write_lock(&rx->call_lock);
+
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* second search also failed; add the new call */
+ call = candidate;
+ candidate = NULL;
+ rxrpc_get_call(call);
+
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ write_unlock(&rx->call_lock);
+
+ write_lock_bh(&rxrpc_call_lock);
+ list_add_tail(&call->link, &rxrpc_calls);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+ _leave(" = %p [new]", call);
+ return call;
+
+ /* we found the call in the list immediately */
+found_extant_call:
+ rxrpc_get_call(call);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [extant %d]", call, atomic_read(&call->usage));
+ return call;
+
+ /* we found the call on the second time through the list */
+found_extant_second:
+ rxrpc_get_call(call);
+ write_unlock(&rx->call_lock);
+ rxrpc_put_call(candidate);
+ _leave(" = %p [second %d]", call, atomic_read(&call->usage));
+ return call;
+}
+
+/*
+ * set up an incoming call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
+ struct rxrpc_connection *conn,
+ struct rxrpc_header *hdr,
+ gfp_t gfp)
+{
+ struct rxrpc_call *call, *candidate;
+ struct rb_node **p, *parent;
+ __be32 call_id;
+
+ _enter(",%d,,%x", conn->debug_id, gfp);
+
+ ASSERT(rx != NULL);
+
+ candidate = rxrpc_alloc_call(gfp);
+ if (!candidate)
+ return ERR_PTR(-EBUSY);
+
+ candidate->socket = rx;
+ candidate->conn = conn;
+ candidate->cid = hdr->cid;
+ candidate->call_id = hdr->callNumber;
+ candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK;
+ candidate->rx_data_post = 0;
+ candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+ if (conn->security_ix > 0)
+ candidate->state = RXRPC_CALL_SERVER_SECURING;
+
+ write_lock_bh(&conn->lock);
+
+ /* set the channel for this call */
+ call = conn->channels[candidate->channel];
+ _debug("channel[%u] is %p", candidate->channel, call);
+ if (call && call->call_id == hdr->callNumber) {
+ /* already set; must've been a duplicate packet */
+ _debug("extant call [%d]", call->state);
+ ASSERTCMP(call->conn, ==, conn);
+
+ read_lock(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ rxrpc_queue_call(call);
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ read_unlock(&call->state_lock);
+ goto aborted_call;
+ default:
+ rxrpc_get_call(call);
+ read_unlock(&call->state_lock);
+ goto extant_call;
+ }
+ }
+
+ if (call) {
+ /* it seems the channel is still in use from the previous call
+ * - ditch the old binding if its call is now complete */
+ _debug("CALL: %u { %s }",
+ call->debug_id, rxrpc_call_states[call->state]);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ conn->channels[call->channel] = NULL;
+ } else {
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = -EBUSY");
+ return ERR_PTR(-EBUSY);
+ }
+ }
+
+ /* check the call number isn't duplicate */
+ _debug("check dup");
+ call_id = hdr->callNumber;
+ p = &conn->calls.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ call = rb_entry(parent, struct rxrpc_call, conn_node);
+
+ if (call_id < call->call_id)
+ p = &(*p)->rb_left;
+ else if (call_id > call->call_id)
+ p = &(*p)->rb_right;
+ else
+ goto old_call;
+ }
+
+ /* make the call available */
+ _debug("new call");
+ call = candidate;
+ candidate = NULL;
+ rb_link_node(&call->conn_node, parent, p);
+ rb_insert_color(&call->conn_node, &conn->calls);
+ conn->channels[call->channel] = call;
+ sock_hold(&rx->sk);
+ atomic_inc(&conn->usage);
+ write_unlock_bh(&conn->lock);
+
+ spin_lock(&conn->trans->peer->lock);
+ list_add(&call->error_link, &conn->trans->peer->error_targets);
+ spin_unlock(&conn->trans->peer->lock);
+
+ write_lock_bh(&rxrpc_call_lock);
+ list_add_tail(&call->link, &rxrpc_calls);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+ call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ add_timer(&call->lifetimer);
+ _leave(" = %p {%d} [new]", call, call->debug_id);
+ return call;
+
+extant_call:
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
+ return call;
+
+aborted_call:
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = -ECONNABORTED");
+ return ERR_PTR(-ECONNABORTED);
+
+old_call:
+ write_unlock_bh(&conn->lock);
+ kmem_cache_free(rxrpc_call_jar, candidate);
+ _leave(" = -ECONNRESET [old]");
+ return ERR_PTR(-ECONNRESET);
+}
+
+/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p,%lx", rx, user_call_ID);
+
+ /* search the extant calls for one that matches the specified user
+ * ID */
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+ _leave(" = NULL");
+ return NULL;
+
+ /* we found the call in the list immediately */
+found_extant_call:
+ rxrpc_get_call(call);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ return call;
+}
+
+/*
+ * detach a call from a socket and set up for release
+ */
+void rxrpc_release_call(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_sock *rx = call->socket;
+
+ _enter("{%d,%d,%d,%d}",
+ call->debug_id, atomic_read(&call->usage),
+ atomic_read(&call->ackr_not_idle),
+ call->rx_first_oos);
+
+ spin_lock_bh(&call->lock);
+ if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
+ BUG();
+ spin_unlock_bh(&call->lock);
+
+ /* dissociate from the socket
+ * - the socket's ref on the call is passed to the death timer
+ */
+ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+
+ write_lock_bh(&rx->call_lock);
+ if (!list_empty(&call->accept_link)) {
+ _debug("unlinking once-pending call %p { e=%lx f=%lx }",
+ call, call->events, call->flags);
+ ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+ list_del_init(&call->accept_link);
+ sk_acceptq_removed(&rx->sk);
+ } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ rb_erase(&call->sock_node, &rx->calls);
+ memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+ clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ }
+ write_unlock_bh(&rx->call_lock);
+
+ /* free up the channel for reuse */
+ spin_lock(&conn->trans->client_lock);
+ write_lock_bh(&conn->lock);
+ write_lock(&call->state_lock);
+
+ if (conn->channels[call->channel] == call)
+ conn->channels[call->channel] = NULL;
+
+ if (conn->out_clientflag && conn->bundle) {
+ conn->avail_calls++;
+ switch (conn->avail_calls) {
+ case 1:
+ list_move_tail(&conn->bundle_link,
+ &conn->bundle->avail_conns);
+ case 2 ... RXRPC_MAXCALLS - 1:
+ ASSERT(conn->channels[0] == NULL ||
+ conn->channels[1] == NULL ||
+ conn->channels[2] == NULL ||
+ conn->channels[3] == NULL);
+ break;
+ case RXRPC_MAXCALLS:
+ list_move_tail(&conn->bundle_link,
+ &conn->bundle->unused_conns);
+ ASSERT(conn->channels[0] == NULL &&
+ conn->channels[1] == NULL &&
+ conn->channels[2] == NULL &&
+ conn->channels[3] == NULL);
+ break;
+ default:
+ printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
+ conn->avail_calls);
+ BUG();
+ }
+ }
+
+ spin_unlock(&conn->trans->client_lock);
+
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
+ _debug("+++ ABORTING STATE %d +++\n", call->state);
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_CALL_DEAD;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ write_unlock_bh(&conn->lock);
+
+ /* clean up the Rx queue */
+ if (!skb_queue_empty(&call->rx_queue) ||
+ !skb_queue_empty(&call->rx_oos_queue)) {
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+
+ _debug("purge Rx queues");
+
+ spin_lock_bh(&call->lock);
+ while ((skb = skb_dequeue(&call->rx_queue)) ||
+ (skb = skb_dequeue(&call->rx_oos_queue))) {
+ sp = rxrpc_skb(skb);
+ if (sp->call) {
+ ASSERTCMP(sp->call, ==, call);
+ rxrpc_put_call(call);
+ sp->call = NULL;
+ }
+ skb->destructor = NULL;
+ spin_unlock_bh(&call->lock);
+
+ _debug("- zap %s %%%u #%u",
+ rxrpc_pkts[sp->hdr.type],
+ ntohl(sp->hdr.serial),
+ ntohl(sp->hdr.seq));
+ rxrpc_free_skb(skb);
+ spin_lock_bh(&call->lock);
+ }
+ spin_unlock_bh(&call->lock);
+
+ ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+ }
+
+ del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->ack_timer);
+ del_timer_sync(&call->lifetimer);
+ call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+ add_timer(&call->deadspan);
+
+ _leave("");
+}
+
+/*
+ * handle a dead call being ready for reaping
+ */
+static void rxrpc_dead_call_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ _enter("{%d}", call->debug_id);
+
+ write_lock_bh(&call->state_lock);
+ call->state = RXRPC_CALL_DEAD;
+ write_unlock_bh(&call->state_lock);
+ rxrpc_put_call(call);
+}
+
+/*
+ * mark a call as to be released, aborting it if it's still in progress
+ * - called with softirqs disabled
+ */
+static void rxrpc_mark_call_released(struct rxrpc_call *call)
+{
+ bool sched;
+
+ write_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD) {
+ sched = false;
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ _debug("abort call %p", call);
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_CALL_DEAD;
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ sched = true;
+ }
+ if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ sched = true;
+ if (sched)
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+}
+
+/*
+ * release all the calls associated with a socket
+ */
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p", rx);
+
+ read_lock_bh(&rx->call_lock);
+
+ /* mark all the calls as no longer wanting incoming packets */
+ for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+ rxrpc_mark_call_released(call);
+ }
+
+ /* kill the not-yet-accepted incoming calls */
+ list_for_each_entry(call, &rx->secureq, accept_link) {
+ rxrpc_mark_call_released(call);
+ }
+
+ list_for_each_entry(call, &rx->acceptq, accept_link) {
+ rxrpc_mark_call_released(call);
+ }
+
+ read_unlock_bh(&rx->call_lock);
+ _leave("");
+}
+
+/*
+ * release a call
+ */
+void __rxrpc_put_call(struct rxrpc_call *call)
+{
+ ASSERT(call != NULL);
+
+ _enter("%p{u=%d}", call, atomic_read(&call->usage));
+
+ ASSERTCMP(atomic_read(&call->usage), >, 0);
+
+ if (atomic_dec_and_test(&call->usage)) {
+ _debug("call %d dead", call->debug_id);
+ ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+ rxrpc_queue_work(&call->destroyer);
+ }
+ _leave("");
+}
+
+/*
+ * clean up a call
+ */
+static void rxrpc_cleanup_call(struct rxrpc_call *call)
+{
+ _net("DESTROY CALL %d", call->debug_id);
+
+ ASSERT(call->socket);
+
+ memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
+
+ del_timer_sync(&call->lifetimer);
+ del_timer_sync(&call->deadspan);
+ del_timer_sync(&call->ack_timer);
+ del_timer_sync(&call->resend_timer);
+
+ ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
+ ASSERTCMP(call->events, ==, 0);
+ if (work_pending(&call->processor)) {
+ _debug("defer destroy");
+ rxrpc_queue_work(&call->destroyer);
+ return;
+ }
+
+ if (call->conn) {
+ spin_lock(&call->conn->trans->peer->lock);
+ list_del(&call->error_link);
+ spin_unlock(&call->conn->trans->peer->lock);
+
+ write_lock_bh(&call->conn->lock);
+ rb_erase(&call->conn_node, &call->conn->calls);
+ write_unlock_bh(&call->conn->lock);
+ rxrpc_put_connection(call->conn);
+ }
+
+ if (call->acks_window) {
+ _debug("kill Tx window %d",
+ CIRC_CNT(call->acks_head, call->acks_tail,
+ call->acks_winsz));
+ smp_mb();
+ while (CIRC_CNT(call->acks_head, call->acks_tail,
+ call->acks_winsz) > 0) {
+ struct rxrpc_skb_priv *sp;
+ unsigned long _skb;
+
+ _skb = call->acks_window[call->acks_tail] & ~1;
+ sp = rxrpc_skb((struct sk_buff *) _skb);
+ _debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+ rxrpc_free_skb((struct sk_buff *) _skb);
+ call->acks_tail =
+ (call->acks_tail + 1) & (call->acks_winsz - 1);
+ }
+
+ kfree(call->acks_window);
+ }
+
+ rxrpc_free_skb(call->tx_pending);
+
+ rxrpc_purge_queue(&call->rx_queue);
+ ASSERT(skb_queue_empty(&call->rx_oos_queue));
+ sock_put(&call->socket->sk);
+ kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
+ * destroy a call
+ */
+static void rxrpc_destroy_call(struct work_struct *work)
+{
+ struct rxrpc_call *call =
+ container_of(work, struct rxrpc_call, destroyer);
+
+ _enter("%p{%d,%d,%p}",
+ call, atomic_read(&call->usage), call->channel, call->conn);
+
+ ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+
+ write_lock_bh(&rxrpc_call_lock);
+ list_del_init(&call->link);
+ write_unlock_bh(&rxrpc_call_lock);
+
+ rxrpc_cleanup_call(call);
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the call records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_calls(void)
+{
+ struct rxrpc_call *call;
+
+ _enter("");
+ write_lock_bh(&rxrpc_call_lock);
+
+ while (!list_empty(&rxrpc_calls)) {
+ call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
+ _debug("Zapping call %p", call);
+
+ list_del_init(&call->link);
+
+ switch (atomic_read(&call->usage)) {
+ case 0:
+ ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+ break;
+ case 1:
+ if (del_timer_sync(&call->deadspan) != 0 &&
+ call->state != RXRPC_CALL_DEAD)
+ rxrpc_dead_call_expired((unsigned long) call);
+ if (call->state != RXRPC_CALL_DEAD)
+ break;
+ default:
+ printk(KERN_ERR "RXRPC:"
+ " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+ call, atomic_read(&call->usage),
+ atomic_read(&call->ackr_not_idle),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
+ if (!skb_queue_empty(&call->rx_queue))
+ printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+ if (!skb_queue_empty(&call->rx_oos_queue))
+ printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+ break;
+ }
+
+ write_unlock_bh(&rxrpc_call_lock);
+ cond_resched();
+ write_lock_bh(&rxrpc_call_lock);
+ }
+
+ write_unlock_bh(&rxrpc_call_lock);
+ _leave("");
+}
+
+/*
+ * handle call lifetime being exceeded
+ */
+static void rxrpc_call_life_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ _enter("{%d}", call->debug_id);
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ set_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+ rxrpc_queue_call(call);
+ }
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_time_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ _enter("{%d}", call->debug_id);
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ read_lock_bh(&call->state_lock);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle ACK timer expiry
+ */
+static void rxrpc_ack_time_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+ _enter("{%d}", call->debug_id);
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
new file mode 100644
index 00000000000..43cb3e051ec
--- /dev/null
+++ b/net/rxrpc/ar-connection.c
@@ -0,0 +1,911 @@
+/* RxRPC virtual connection handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_connection_reaper(struct work_struct *work);
+
+LIST_HEAD(rxrpc_connections);
+DEFINE_RWLOCK(rxrpc_connection_lock);
+static unsigned long rxrpc_connection_timeout = 10 * 60;
+static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+
+/*
+ * allocate a new client connection bundle
+ */
+static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
+{
+ struct rxrpc_conn_bundle *bundle;
+
+ _enter("");
+
+ bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
+ if (bundle) {
+ INIT_LIST_HEAD(&bundle->unused_conns);
+ INIT_LIST_HEAD(&bundle->avail_conns);
+ INIT_LIST_HEAD(&bundle->busy_conns);
+ init_waitqueue_head(&bundle->chanwait);
+ atomic_set(&bundle->usage, 1);
+ }
+
+ _leave(" = %p", bundle);
+ return bundle;
+}
+
+/*
+ * compare bundle parameters with what we're looking for
+ * - return -ve, 0 or +ve
+ */
+static inline
+int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
+ struct key *key, __be16 service_id)
+{
+ return (bundle->service_id - service_id) ?:
+ ((unsigned long) bundle->key - (unsigned long) key);
+}
+
+/*
+ * get bundle of client connections that a client socket can make use of
+ */
+struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct key *key,
+ __be16 service_id,
+ gfp_t gfp)
+{
+ struct rxrpc_conn_bundle *bundle, *candidate;
+ struct rb_node *p, *parent, **pp;
+
+ _enter("%p{%x},%x,%hx,",
+ rx, key_serial(key), trans->debug_id, ntohl(service_id));
+
+ if (rx->trans == trans && rx->bundle) {
+ atomic_inc(&rx->bundle->usage);
+ return rx->bundle;
+ }
+
+ /* search the extant bundles first for one that matches the specified
+ * user ID */
+ spin_lock(&trans->client_lock);
+
+ p = trans->bundles.rb_node;
+ while (p) {
+ bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
+
+ if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+ p = p->rb_left;
+ else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+ p = p->rb_right;
+ else
+ goto found_extant_bundle;
+ }
+
+ spin_unlock(&trans->client_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_bundle(gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ candidate->key = key_get(key);
+ candidate->service_id = service_id;
+
+ spin_lock(&trans->client_lock);
+
+ pp = &trans->bundles.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
+
+ if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+ pp = &(*pp)->rb_left;
+ else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* second search also failed; add the new bundle */
+ bundle = candidate;
+ candidate = NULL;
+
+ rb_link_node(&bundle->node, parent, pp);
+ rb_insert_color(&bundle->node, &trans->bundles);
+ spin_unlock(&trans->client_lock);
+ _net("BUNDLE new on trans %d", trans->debug_id);
+ if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+ atomic_inc(&bundle->usage);
+ rx->bundle = bundle;
+ }
+ _leave(" = %p [new]", bundle);
+ return bundle;
+
+ /* we found the bundle in the list immediately */
+found_extant_bundle:
+ atomic_inc(&bundle->usage);
+ spin_unlock(&trans->client_lock);
+ _net("BUNDLE old on trans %d", trans->debug_id);
+ if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+ atomic_inc(&bundle->usage);
+ rx->bundle = bundle;
+ }
+ _leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
+ return bundle;
+
+ /* we found the bundle on the second time through the list */
+found_extant_second:
+ atomic_inc(&bundle->usage);
+ spin_unlock(&trans->client_lock);
+ kfree(candidate);
+ _net("BUNDLE old2 on trans %d", trans->debug_id);
+ if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+ atomic_inc(&bundle->usage);
+ rx->bundle = bundle;
+ }
+ _leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
+ return bundle;
+}
+
+/*
+ * release a bundle
+ */
+void rxrpc_put_bundle(struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle)
+{
+ _enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
+
+ if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
+ _debug("Destroy bundle");
+ rb_erase(&bundle->node, &trans->bundles);
+ spin_unlock(&trans->client_lock);
+ ASSERT(list_empty(&bundle->unused_conns));
+ ASSERT(list_empty(&bundle->avail_conns));
+ ASSERT(list_empty(&bundle->busy_conns));
+ ASSERTCMP(bundle->num_conns, ==, 0);
+ key_put(bundle->key);
+ kfree(bundle);
+ }
+
+ _leave("");
+}
+
+/*
+ * allocate a new connection
+ */
+static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+
+ _enter("");
+
+ conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+ if (conn) {
+ INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_LIST_HEAD(&conn->bundle_link);
+ conn->calls = RB_ROOT;
+ skb_queue_head_init(&conn->rx_queue);
+ rwlock_init(&conn->lock);
+ spin_lock_init(&conn->state_lock);
+ atomic_set(&conn->usage, 1);
+ conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ conn->avail_calls = RXRPC_MAXCALLS;
+ conn->size_align = 4;
+ conn->header_size = sizeof(struct rxrpc_header);
+ }
+
+ _leave(" = %p{%d}", conn, conn->debug_id);
+ return conn;
+}
+
+/*
+ * assign a connection ID to a connection and add it to the transport's
+ * connection lookup tree
+ * - called with transport client lock held
+ */
+static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *xconn;
+ struct rb_node *parent, **p;
+ __be32 epoch;
+ u32 real_conn_id;
+
+ _enter("");
+
+ epoch = conn->epoch;
+
+ write_lock_bh(&conn->trans->conn_lock);
+
+ conn->trans->conn_idcounter += RXRPC_CID_INC;
+ if (conn->trans->conn_idcounter < RXRPC_CID_INC)
+ conn->trans->conn_idcounter = RXRPC_CID_INC;
+ real_conn_id = conn->trans->conn_idcounter;
+
+attempt_insertion:
+ parent = NULL;
+ p = &conn->trans->client_conns.rb_node;
+
+ while (*p) {
+ parent = *p;
+ xconn = rb_entry(parent, struct rxrpc_connection, node);
+
+ if (epoch < xconn->epoch)
+ p = &(*p)->rb_left;
+ else if (epoch > xconn->epoch)
+ p = &(*p)->rb_right;
+ else if (real_conn_id < xconn->real_conn_id)
+ p = &(*p)->rb_left;
+ else if (real_conn_id > xconn->real_conn_id)
+ p = &(*p)->rb_right;
+ else
+ goto id_exists;
+ }
+
+ /* we've found a suitable hole - arrange for this connection to occupy
+ * it */
+ rb_link_node(&conn->node, parent, p);
+ rb_insert_color(&conn->node, &conn->trans->client_conns);
+
+ conn->real_conn_id = real_conn_id;
+ conn->cid = htonl(real_conn_id);
+ write_unlock_bh(&conn->trans->conn_lock);
+ _leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid));
+ return;
+
+ /* we found a connection with the proposed ID - walk the tree from that
+ * point looking for the next unused ID */
+id_exists:
+ for (;;) {
+ real_conn_id += RXRPC_CID_INC;
+ if (real_conn_id < RXRPC_CID_INC) {
+ real_conn_id = RXRPC_CID_INC;
+ conn->trans->conn_idcounter = real_conn_id;
+ goto attempt_insertion;
+ }
+
+ parent = rb_next(parent);
+ if (!parent)
+ goto attempt_insertion;
+
+ xconn = rb_entry(parent, struct rxrpc_connection, node);
+ if (epoch < xconn->epoch ||
+ real_conn_id < xconn->real_conn_id)
+ goto attempt_insertion;
+ }
+}
+
+/*
+ * add a call to a connection's call-by-ID tree
+ */
+static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
+{
+ struct rxrpc_call *xcall;
+ struct rb_node *parent, **p;
+ __be32 call_id;
+
+ write_lock_bh(&conn->lock);
+
+ call_id = call->call_id;
+ p = &conn->calls.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ xcall = rb_entry(parent, struct rxrpc_call, conn_node);
+
+ if (call_id < xcall->call_id)
+ p = &(*p)->rb_left;
+ else if (call_id > xcall->call_id)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(&call->conn_node, parent, p);
+ rb_insert_color(&call->conn_node, &conn->calls);
+
+ write_unlock_bh(&conn->lock);
+}
+
+/*
+ * connect a call on an exclusive connection
+ */
+static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ __be16 service_id,
+ struct rxrpc_call *call,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+ int chan, ret;
+
+ _enter("");
+
+ conn = rx->conn;
+ if (!conn) {
+ /* not yet present - create a candidate for a new connection
+ * and then redo the check */
+ conn = rxrpc_alloc_connection(gfp);
+ if (IS_ERR(conn)) {
+ _leave(" = %ld", PTR_ERR(conn));
+ return PTR_ERR(conn);
+ }
+
+ conn->trans = trans;
+ conn->bundle = NULL;
+ conn->service_id = service_id;
+ conn->epoch = rxrpc_epoch;
+ conn->in_clientflag = 0;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->cid = 0;
+ conn->state = RXRPC_CONN_CLIENT;
+ conn->avail_calls = RXRPC_MAXCALLS - 1;
+ conn->security_level = rx->min_sec_level;
+ conn->key = key_get(rx->key);
+
+ ret = rxrpc_init_client_conn_security(conn);
+ if (ret < 0) {
+ key_put(conn->key);
+ kfree(conn);
+ _leave(" = %d [key]", ret);
+ return ret;
+ }
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ spin_lock(&trans->client_lock);
+ atomic_inc(&trans->usage);
+
+ _net("CONNECT EXCL new %d on TRANS %d",
+ conn->debug_id, conn->trans->debug_id);
+
+ rxrpc_assign_connection_id(conn);
+ rx->conn = conn;
+ }
+
+ /* we've got a connection with a free channel and we can now attach the
+ * call to it
+ * - we're holding the transport's client lock
+ * - we're holding a reference on the connection
+ */
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan])
+ goto found_channel;
+ goto no_free_channels;
+
+found_channel:
+ atomic_inc(&conn->usage);
+ conn->channels[chan] = call;
+ call->conn = conn;
+ call->channel = chan;
+ call->cid = conn->cid | htonl(chan);
+ call->call_id = htonl(++conn->call_counter);
+
+ _net("CONNECT client on conn %d chan %d as call %x",
+ conn->debug_id, chan, ntohl(call->call_id));
+
+ spin_unlock(&trans->client_lock);
+
+ rxrpc_add_call_ID_to_conn(conn, call);
+ _leave(" = 0");
+ return 0;
+
+no_free_channels:
+ spin_unlock(&trans->client_lock);
+ _leave(" = -ENOSR");
+ return -ENOSR;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans,
+ struct rxrpc_conn_bundle *bundle,
+ struct rxrpc_call *call,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate;
+ int chan, ret;
+
+ DECLARE_WAITQUEUE(myself, current);
+
+ _enter("%p,%lx,", rx, call->user_call_ID);
+
+ if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
+ return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
+ call, gfp);
+
+ spin_lock(&trans->client_lock);
+ for (;;) {
+ /* see if the bundle has a call slot available */
+ if (!list_empty(&bundle->avail_conns)) {
+ _debug("avail");
+ conn = list_entry(bundle->avail_conns.next,
+ struct rxrpc_connection,
+ bundle_link);
+ if (--conn->avail_calls == 0)
+ list_move(&conn->bundle_link,
+ &bundle->busy_conns);
+ ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+ ASSERT(conn->channels[0] == NULL ||
+ conn->channels[1] == NULL ||
+ conn->channels[2] == NULL ||
+ conn->channels[3] == NULL);
+ atomic_inc(&conn->usage);
+ break;
+ }
+
+ if (!list_empty(&bundle->unused_conns)) {
+ _debug("unused");
+ conn = list_entry(bundle->unused_conns.next,
+ struct rxrpc_connection,
+ bundle_link);
+ ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
+ conn->avail_calls = RXRPC_MAXCALLS - 1;
+ ASSERT(conn->channels[0] == NULL &&
+ conn->channels[1] == NULL &&
+ conn->channels[2] == NULL &&
+ conn->channels[3] == NULL);
+ atomic_inc(&conn->usage);
+ list_move(&conn->bundle_link, &bundle->avail_conns);
+ break;
+ }
+
+ /* need to allocate a new connection */
+ _debug("get new conn [%d]", bundle->num_conns);
+
+ spin_unlock(&trans->client_lock);
+
+ if (signal_pending(current))
+ goto interrupted;
+
+ if (bundle->num_conns >= 20) {
+ _debug("too many conns");
+
+ if (!(gfp & __GFP_WAIT)) {
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
+ }
+
+ add_wait_queue(&bundle->chanwait, &myself);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (bundle->num_conns < 20 ||
+ !list_empty(&bundle->unused_conns) ||
+ !list_empty(&bundle->avail_conns))
+ break;
+ if (signal_pending(current))
+ goto interrupted_dequeue;
+ schedule();
+ }
+ remove_wait_queue(&bundle->chanwait, &myself);
+ __set_current_state(TASK_RUNNING);
+ spin_lock(&trans->client_lock);
+ continue;
+ }
+
+ /* not yet present - create a candidate for a new connection and then
+ * redo the check */
+ candidate = rxrpc_alloc_connection(gfp);
+ if (IS_ERR(candidate)) {
+ _leave(" = %ld", PTR_ERR(candidate));
+ return PTR_ERR(candidate);
+ }
+
+ candidate->trans = trans;
+ candidate->bundle = bundle;
+ candidate->service_id = bundle->service_id;
+ candidate->epoch = rxrpc_epoch;
+ candidate->in_clientflag = 0;
+ candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
+ candidate->cid = 0;
+ candidate->state = RXRPC_CONN_CLIENT;
+ candidate->avail_calls = RXRPC_MAXCALLS;
+ candidate->security_level = rx->min_sec_level;
+ candidate->key = key_get(bundle->key);
+
+ ret = rxrpc_init_client_conn_security(candidate);
+ if (ret < 0) {
+ key_put(candidate->key);
+ kfree(candidate);
+ _leave(" = %d [key]", ret);
+ return ret;
+ }
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_add_tail(&candidate->link, &rxrpc_connections);
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ spin_lock(&trans->client_lock);
+
+ list_add(&candidate->bundle_link, &bundle->unused_conns);
+ bundle->num_conns++;
+ atomic_inc(&bundle->usage);
+ atomic_inc(&trans->usage);
+
+ _net("CONNECT new %d on TRANS %d",
+ candidate->debug_id, candidate->trans->debug_id);
+
+ rxrpc_assign_connection_id(candidate);
+ if (candidate->security)
+ candidate->security->prime_packet_security(candidate);
+
+ /* leave the candidate lurking in zombie mode attached to the
+ * bundle until we're ready for it */
+ rxrpc_put_connection(candidate);
+ candidate = NULL;
+ }
+
+ /* we've got a connection with a free channel and we can now attach the
+ * call to it
+ * - we're holding the transport's client lock
+ * - we're holding a reference on the connection
+ * - we're holding a reference on the bundle
+ */
+ for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+ if (!conn->channels[chan])
+ goto found_channel;
+ ASSERT(conn->channels[0] == NULL ||
+ conn->channels[1] == NULL ||
+ conn->channels[2] == NULL ||
+ conn->channels[3] == NULL);
+ BUG();
+
+found_channel:
+ conn->channels[chan] = call;
+ call->conn = conn;
+ call->channel = chan;
+ call->cid = conn->cid | htonl(chan);
+ call->call_id = htonl(++conn->call_counter);
+
+ _net("CONNECT client on conn %d chan %d as call %x",
+ conn->debug_id, chan, ntohl(call->call_id));
+
+ ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+ spin_unlock(&trans->client_lock);
+
+ rxrpc_add_call_ID_to_conn(conn, call);
+
+ _leave(" = 0");
+ return 0;
+
+interrupted_dequeue:
+ remove_wait_queue(&bundle->chanwait, &myself);
+ __set_current_state(TASK_RUNNING);
+interrupted:
+ _leave(" = -ERESTARTSYS");
+ return -ERESTARTSYS;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *trans,
+ struct rxrpc_header *hdr,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn, *candidate = NULL;
+ struct rb_node *p, **pp;
+ const char *new = "old";
+ __be32 epoch;
+ u32 conn_id;
+
+ _enter("");
+
+ ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
+
+ epoch = hdr->epoch;
+ conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+
+ /* search the connection list first */
+ read_lock_bh(&trans->conn_lock);
+
+ p = trans->server_conns.rb_node;
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, node);
+
+ _debug("maybe %x", conn->real_conn_id);
+
+ if (epoch < conn->epoch)
+ p = p->rb_left;
+ else if (epoch > conn->epoch)
+ p = p->rb_right;
+ else if (conn_id < conn->real_conn_id)
+ p = p->rb_left;
+ else if (conn_id > conn->real_conn_id)
+ p = p->rb_right;
+ else
+ goto found_extant_connection;
+ }
+ read_unlock_bh(&trans->conn_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_connection(gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ candidate->trans = trans;
+ candidate->epoch = hdr->epoch;
+ candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK);
+ candidate->service_id = hdr->serviceId;
+ candidate->security_ix = hdr->securityIndex;
+ candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
+ candidate->out_clientflag = 0;
+ candidate->real_conn_id = conn_id;
+ candidate->state = RXRPC_CONN_SERVER;
+ if (candidate->service_id)
+ candidate->state = RXRPC_CONN_SERVER_UNSECURED;
+
+ write_lock_bh(&trans->conn_lock);
+
+ pp = &trans->server_conns.rb_node;
+ p = NULL;
+ while (*pp) {
+ p = *pp;
+ conn = rb_entry(p, struct rxrpc_connection, node);
+
+ if (epoch < conn->epoch)
+ pp = &(*pp)->rb_left;
+ else if (epoch > conn->epoch)
+ pp = &(*pp)->rb_right;
+ else if (conn_id < conn->real_conn_id)
+ pp = &(*pp)->rb_left;
+ else if (conn_id > conn->real_conn_id)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ conn = candidate;
+ candidate = NULL;
+ rb_link_node(&conn->node, p, pp);
+ rb_insert_color(&conn->node, &trans->server_conns);
+ atomic_inc(&conn->trans->usage);
+
+ write_unlock_bh(&trans->conn_lock);
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ new = "new";
+
+success:
+ _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id);
+
+ _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+ return conn;
+
+ /* we found the connection in the list immediately */
+found_extant_connection:
+ if (hdr->securityIndex != conn->security_ix) {
+ read_unlock_bh(&trans->conn_lock);
+ goto security_mismatch;
+ }
+ atomic_inc(&conn->usage);
+ read_unlock_bh(&trans->conn_lock);
+ goto success;
+
+ /* we found the connection on the second time through the list */
+found_extant_second:
+ if (hdr->securityIndex != conn->security_ix) {
+ write_unlock_bh(&trans->conn_lock);
+ goto security_mismatch;
+ }
+ atomic_inc(&conn->usage);
+ write_unlock_bh(&trans->conn_lock);
+ kfree(candidate);
+ goto success;
+
+security_mismatch:
+ kfree(candidate);
+ _leave(" = -EKEYREJECTED");
+ return ERR_PTR(-EKEYREJECTED);
+}
+
+/*
+ * find a connection based on transport and RxRPC connection ID for an incoming
+ * packet
+ */
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
+ struct rxrpc_header *hdr)
+{
+ struct rxrpc_connection *conn;
+ struct rb_node *p;
+ __be32 epoch;
+ u32 conn_id;
+
+ _enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags);
+
+ read_lock_bh(&trans->conn_lock);
+
+ conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+ epoch = hdr->epoch;
+
+ if (hdr->flags & RXRPC_CLIENT_INITIATED)
+ p = trans->server_conns.rb_node;
+ else
+ p = trans->client_conns.rb_node;
+
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, node);
+
+ _debug("maybe %x", conn->real_conn_id);
+
+ if (epoch < conn->epoch)
+ p = p->rb_left;
+ else if (epoch > conn->epoch)
+ p = p->rb_right;
+ else if (conn_id < conn->real_conn_id)
+ p = p->rb_left;
+ else if (conn_id > conn->real_conn_id)
+ p = p->rb_right;
+ else
+ goto found;
+ }
+
+ read_unlock_bh(&trans->conn_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found:
+ atomic_inc(&conn->usage);
+ read_unlock_bh(&trans->conn_lock);
+ _leave(" = %p", conn);
+ return conn;
+}
+
+/*
+ * release a virtual connection
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+ _enter("%p{u=%d,d=%d}",
+ conn, atomic_read(&conn->usage), conn->debug_id);
+
+ ASSERTCMP(atomic_read(&conn->usage), >, 0);
+
+ conn->put_time = xtime.tv_sec;
+ if (atomic_dec_and_test(&conn->usage)) {
+ _debug("zombie");
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ }
+
+ _leave("");
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+{
+ _enter("%p{%d}", conn, atomic_read(&conn->usage));
+
+ ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+
+ _net("DESTROY CONN %d", conn->debug_id);
+
+ if (conn->bundle)
+ rxrpc_put_bundle(conn->trans, conn->bundle);
+
+ ASSERT(RB_EMPTY_ROOT(&conn->calls));
+ rxrpc_purge_queue(&conn->rx_queue);
+
+ rxrpc_clear_conn_security(conn);
+ rxrpc_put_transport(conn->trans);
+ kfree(conn);
+ _leave("");
+}
+
+/*
+ * reap dead connections
+ */
+void rxrpc_connection_reaper(struct work_struct *work)
+{
+ struct rxrpc_connection *conn, *_p;
+ unsigned long now, earliest, reap_time;
+
+ LIST_HEAD(graveyard);
+
+ _enter("");
+
+ now = xtime.tv_sec;
+ earliest = ULONG_MAX;
+
+ write_lock_bh(&rxrpc_connection_lock);
+ list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long) now - (long) conn->put_time);
+
+ if (likely(atomic_read(&conn->usage) > 0))
+ continue;
+
+ spin_lock(&conn->trans->client_lock);
+ write_lock(&conn->trans->conn_lock);
+ reap_time = conn->put_time + rxrpc_connection_timeout;
+
+ if (atomic_read(&conn->usage) > 0) {
+ ;
+ } else if (reap_time <= now) {
+ list_move_tail(&conn->link, &graveyard);
+ if (conn->out_clientflag)
+ rb_erase(&conn->node,
+ &conn->trans->client_conns);
+ else
+ rb_erase(&conn->node,
+ &conn->trans->server_conns);
+ if (conn->bundle) {
+ list_del_init(&conn->bundle_link);
+ conn->bundle->num_conns--;
+ }
+
+ } else if (reap_time < earliest) {
+ earliest = reap_time;
+ }
+
+ write_unlock(&conn->trans->conn_lock);
+ spin_unlock(&conn->trans->client_lock);
+ }
+ write_unlock_bh(&rxrpc_connection_lock);
+
+ if (earliest != ULONG_MAX) {
+ _debug("reschedule reaper %ld", (long) earliest - now);
+ ASSERTCMP(earliest, >, now);
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+ (earliest - now) * HZ);
+ }
+
+ /* then destroy all those pulled out */
+ while (!list_empty(&graveyard)) {
+ conn = list_entry(graveyard.next, struct rxrpc_connection,
+ link);
+ list_del_init(&conn->link);
+
+ ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ rxrpc_destroy_connection(conn);
+ }
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the connection records rather than waiting for them
+ * to time out
+ */
+void __exit rxrpc_destroy_all_connections(void)
+{
+ _enter("");
+
+ rxrpc_connection_timeout = 0;
+ cancel_delayed_work(&rxrpc_connection_reap);
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
new file mode 100644
index 00000000000..1ada43d5116
--- /dev/null
+++ b/net/rxrpc/ar-connevent.c
@@ -0,0 +1,403 @@
+/* connection-level event handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * pass a connection-level abort onto all calls on that connection
+ */
+static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
+ u32 abort_code)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("{%d},%x", conn->debug_id, abort_code);
+
+ read_lock_bh(&conn->lock);
+
+ for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
+ call = rb_entry(p, struct rxrpc_call, conn_node);
+ write_lock(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = state;
+ call->abort_code = abort_code;
+ if (state == RXRPC_CALL_LOCALLY_ABORTED)
+ set_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+ else
+ set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ }
+
+ read_unlock_bh(&conn->lock);
+ _leave("");
+}
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_abort_connection(struct rxrpc_connection *conn,
+ u32 error, u32 abort_code)
+{
+ struct rxrpc_header hdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ __be32 word;
+ size_t len;
+ int ret;
+
+ _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
+
+ /* generate a connection-level abort */
+ spin_lock_bh(&conn->state_lock);
+ if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
+ conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+ conn->error = error;
+ spin_unlock_bh(&conn->state_lock);
+ } else {
+ spin_unlock_bh(&conn->state_lock);
+ _leave(" = 0 [already dead]");
+ return 0;
+ }
+
+ rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+
+ msg.msg_name = &conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr.epoch = conn->epoch;
+ hdr.cid = conn->cid;
+ hdr.callNumber = 0;
+ hdr.seq = 0;
+ hdr.type = RXRPC_PACKET_TYPE_ABORT;
+ hdr.flags = conn->out_clientflag;
+ hdr.userStatus = 0;
+ hdr.securityIndex = conn->security_ix;
+ hdr._rsvd = 0;
+ hdr.serviceId = conn->service_id;
+
+ word = htonl(abort_code);
+
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = &word;
+ iov[1].iov_len = sizeof(word);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ hdr.serial = htonl(atomic_inc_return(&conn->serial));
+ _proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code);
+
+ ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * mark a call as being on a now-secured channel
+ * - must be called with softirqs disabled
+ */
+void rxrpc_call_is_secure(struct rxrpc_call *call)
+{
+ _enter("%p", call);
+ if (call) {
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_SECURED, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock(&call->state_lock);
+ }
+}
+
+/*
+ * connection-level Rx packet processor
+ */
+static int rxrpc_process_event(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ __be32 tmp;
+ u32 serial;
+ int loop, ret;
+
+ if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED)
+ return -ECONNABORTED;
+
+ serial = ntohl(sp->hdr.serial);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0)
+ return -EPROTO;
+ _proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp));
+
+ conn->state = RXRPC_CONN_REMOTELY_ABORTED;
+ rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
+ ntohl(tmp));
+ return -ECONNABORTED;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ if (conn->security)
+ return conn->security->respond_to_challenge(
+ conn, skb, _abort_code);
+ return -EPROTO;
+
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ if (!conn->security)
+ return -EPROTO;
+
+ ret = conn->security->verify_response(conn, skb, _abort_code);
+ if (ret < 0)
+ return ret;
+
+ ret = conn->security->init_connection_security(conn);
+ if (ret < 0)
+ return ret;
+
+ conn->security->prime_packet_security(conn);
+ read_lock_bh(&conn->lock);
+ spin_lock(&conn->state_lock);
+
+ if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
+ conn->state = RXRPC_CONN_SERVER;
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop]);
+ }
+
+ spin_unlock(&conn->state_lock);
+ read_unlock_bh(&conn->lock);
+ return 0;
+
+ default:
+ return -EPROTO;
+ }
+}
+
+/*
+ * set up security and issue a challenge
+ */
+static void rxrpc_secure_connection(struct rxrpc_connection *conn)
+{
+ u32 abort_code;
+ int ret;
+
+ _enter("{%d}", conn->debug_id);
+
+ ASSERT(conn->security_ix != 0);
+
+ if (!conn->key) {
+ _debug("set up security");
+ ret = rxrpc_init_server_conn_security(conn);
+ switch (ret) {
+ case 0:
+ break;
+ case -ENOENT:
+ abort_code = RX_CALL_DEAD;
+ goto abort;
+ default:
+ abort_code = RXKADNOAUTH;
+ goto abort;
+ }
+ }
+
+ ASSERT(conn->security != NULL);
+
+ if (conn->security->issue_challenge(conn) < 0) {
+ abort_code = RX_CALL_DEAD;
+ ret = -ENOMEM;
+ goto abort;
+ }
+
+ _leave("");
+ return;
+
+abort:
+ _debug("abort %d, %d", ret, abort_code);
+ rxrpc_abort_connection(conn, -ret, abort_code);
+ _leave(" [aborted]");
+}
+
+/*
+ * connection-level event processor
+ */
+void rxrpc_process_connection(struct work_struct *work)
+{
+ struct rxrpc_connection *conn =
+ container_of(work, struct rxrpc_connection, processor);
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ u32 abort_code = RX_PROTOCOL_ERROR;
+ int ret;
+
+ _enter("{%d}", conn->debug_id);
+
+ atomic_inc(&conn->usage);
+
+ if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+ rxrpc_secure_connection(conn);
+ rxrpc_put_connection(conn);
+ }
+
+ /* go through the conn-level event packets, releasing the ref on this
+ * connection that each one has when we've finished with it */
+ while ((skb = skb_dequeue(&conn->rx_queue))) {
+ sp = rxrpc_skb(skb);
+
+ ret = rxrpc_process_event(conn, skb, &abort_code);
+ switch (ret) {
+ case -EPROTO:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ goto protocol_error;
+ case -EAGAIN:
+ goto requeue_and_leave;
+ case -ECONNABORTED:
+ default:
+ rxrpc_put_connection(conn);
+ rxrpc_free_skb(skb);
+ break;
+ }
+ }
+
+out:
+ rxrpc_put_connection(conn);
+ _leave("");
+ return;
+
+requeue_and_leave:
+ skb_queue_head(&conn->rx_queue, skb);
+ goto out;
+
+protocol_error:
+ if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+ goto requeue_and_leave;
+ rxrpc_put_connection(conn);
+ rxrpc_free_skb(skb);
+ _leave(" [EPROTO]");
+ goto out;
+}
+
+/*
+ * put a packet up for transport-level abort
+ */
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ CHECK_SLAB_OKAY(&local->usage);
+
+ if (!atomic_inc_not_zero(&local->usage)) {
+ printk("resurrected on reject\n");
+ BUG();
+ }
+
+ skb_queue_tail(&local->reject_queue, skb);
+ rxrpc_queue_work(&local->rejecter);
+}
+
+/*
+ * reject packets through the local endpoint
+ */
+void rxrpc_reject_packets(struct work_struct *work)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ } sa;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_header hdr;
+ struct rxrpc_local *local;
+ struct sk_buff *skb;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t size;
+ __be32 code;
+
+ local = container_of(work, struct rxrpc_local, rejecter);
+ rxrpc_get_local(local);
+
+ _enter("%d", local->debug_id);
+
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = &code;
+ iov[1].iov_len = sizeof(code);
+ size = sizeof(hdr) + sizeof(code);
+
+ msg.msg_name = &sa;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa.sa_family = local->srx.transport.family;
+ switch (sa.sa.sa_family) {
+ case AF_INET:
+ msg.msg_namelen = sizeof(sa.sin);
+ break;
+ default:
+ msg.msg_namelen = 0;
+ break;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.type = RXRPC_PACKET_TYPE_ABORT;
+
+ while ((skb = skb_dequeue(&local->reject_queue))) {
+ sp = rxrpc_skb(skb);
+ switch (sa.sa.sa_family) {
+ case AF_INET:
+ sa.sin.sin_port = udp_hdr(skb)->source;
+ sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ code = htonl(skb->priority);
+
+ hdr.epoch = sp->hdr.epoch;
+ hdr.cid = sp->hdr.cid;
+ hdr.callNumber = sp->hdr.callNumber;
+ hdr.serviceId = sp->hdr.serviceId;
+ hdr.flags = sp->hdr.flags;
+ hdr.flags ^= RXRPC_CLIENT_INITIATED;
+ hdr.flags &= RXRPC_CLIENT_INITIATED;
+
+ kernel_sendmsg(local->socket, &msg, iov, 2, size);
+ break;
+
+ default:
+ break;
+ }
+
+ rxrpc_free_skb(skb);
+ rxrpc_put_local(local);
+ }
+
+ rxrpc_put_local(local);
+ _leave("");
+}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
new file mode 100644
index 00000000000..6cb3e8890e7
--- /dev/null
+++ b/net/rxrpc/ar-error.c
@@ -0,0 +1,255 @@
+/* Error message handling (ICMP)
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * handle an error received on the local endpoint
+ */
+void rxrpc_UDP_error_report(struct sock *sk)
+{
+ struct sock_exterr_skb *serr;
+ struct rxrpc_transport *trans;
+ struct rxrpc_local *local = sk->sk_user_data;
+ struct rxrpc_peer *peer;
+ struct sk_buff *skb;
+ __be32 addr;
+ __be16 port;
+
+ _enter("%p{%d}", sk, local->debug_id);
+
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (!skb) {
+ _leave("UDP socket errqueue empty");
+ return;
+ }
+
+ rxrpc_new_skb(skb);
+
+ serr = SKB_EXT_ERR(skb);
+ addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
+ port = serr->port;
+
+ _net("Rx UDP Error from "NIPQUAD_FMT":%hu",
+ NIPQUAD(addr), ntohs(port));
+ _debug("Msg l:%d d:%d", skb->len, skb->data_len);
+
+ peer = rxrpc_find_peer(local, addr, port);
+ if (IS_ERR(peer)) {
+ rxrpc_free_skb(skb);
+ _leave(" [no peer]");
+ return;
+ }
+
+ trans = rxrpc_find_transport(local, peer);
+ if (!trans) {
+ rxrpc_put_peer(peer);
+ rxrpc_free_skb(skb);
+ _leave(" [no trans]");
+ return;
+ }
+
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_type == ICMP_DEST_UNREACH &&
+ serr->ee.ee_code == ICMP_FRAG_NEEDED
+ ) {
+ u32 mtu = serr->ee.ee_info;
+
+ _net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
+
+ /* wind down the local interface MTU */
+ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+ peer->if_mtu = mtu;
+ _net("I/F MTU %u", mtu);
+ }
+
+ /* ip_rt_frag_needed() may have eaten the info */
+ if (mtu == 0)
+ mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
+
+ if (mtu == 0) {
+ /* they didn't give us a size, estimate one */
+ if (mtu > 1500) {
+ mtu >>= 1;
+ if (mtu < 1500)
+ mtu = 1500;
+ } else {
+ mtu -= 100;
+ if (mtu < peer->hdrsize)
+ mtu = peer->hdrsize + 4;
+ }
+ }
+
+ if (mtu < peer->mtu) {
+ spin_lock_bh(&peer->lock);
+ peer->mtu = mtu;
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ spin_unlock_bh(&peer->lock);
+ _net("Net MTU %u (maxdata %u)",
+ peer->mtu, peer->maxdata);
+ }
+ }
+
+ rxrpc_put_peer(peer);
+
+ /* pass the transport ref to error_handler to release */
+ skb_queue_tail(&trans->error_queue, skb);
+ rxrpc_queue_work(&trans->error_handler);
+
+ /* reset and regenerate socket error */
+ spin_lock_bh(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ skb = skb_peek(&sk->sk_error_queue);
+ if (skb) {
+ sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else {
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ }
+
+ _leave("");
+}
+
+/*
+ * deal with UDP error messages
+ */
+void rxrpc_UDP_error_handler(struct work_struct *work)
+{
+ struct sock_extended_err *ee;
+ struct sock_exterr_skb *serr;
+ struct rxrpc_transport *trans =
+ container_of(work, struct rxrpc_transport, error_handler);
+ struct sk_buff *skb;
+ int local, err;
+
+ _enter("");
+
+ skb = skb_dequeue(&trans->error_queue);
+ if (!skb)
+ return;
+
+ serr = SKB_EXT_ERR(skb);
+ ee = &serr->ee;
+
+ _net("Rx Error o=%d t=%d c=%d e=%d",
+ ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
+
+ err = ee->ee_errno;
+
+ switch (ee->ee_origin) {
+ case SO_EE_ORIGIN_ICMP:
+ local = 0;
+ switch (ee->ee_type) {
+ case ICMP_DEST_UNREACH:
+ switch (ee->ee_code) {
+ case ICMP_NET_UNREACH:
+ _net("Rx Received ICMP Network Unreachable");
+ err = ENETUNREACH;
+ break;
+ case ICMP_HOST_UNREACH:
+ _net("Rx Received ICMP Host Unreachable");
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_PORT_UNREACH:
+ _net("Rx Received ICMP Port Unreachable");
+ err = ECONNREFUSED;
+ break;
+ case ICMP_FRAG_NEEDED:
+ _net("Rx Received ICMP Fragmentation Needed (%d)",
+ ee->ee_info);
+ err = 0; /* dealt with elsewhere */
+ break;
+ case ICMP_NET_UNKNOWN:
+ _net("Rx Received ICMP Unknown Network");
+ err = ENETUNREACH;
+ break;
+ case ICMP_HOST_UNKNOWN:
+ _net("Rx Received ICMP Unknown Host");
+ err = EHOSTUNREACH;
+ break;
+ default:
+ _net("Rx Received ICMP DestUnreach code=%u",
+ ee->ee_code);
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ _net("Rx Received ICMP TTL Exceeded");
+ break;
+
+ default:
+ _proto("Rx Received ICMP error { type=%u code=%u }",
+ ee->ee_type, ee->ee_code);
+ break;
+ }
+ break;
+
+ case SO_EE_ORIGIN_LOCAL:
+ _proto("Rx Received local error { error=%d }",
+ ee->ee_errno);
+ local = 1;
+ break;
+
+ case SO_EE_ORIGIN_NONE:
+ case SO_EE_ORIGIN_ICMP6:
+ default:
+ _proto("Rx Received error report { orig=%u }",
+ ee->ee_origin);
+ local = 0;
+ break;
+ }
+
+ /* terminate all the affected calls if there's an unrecoverable
+ * error */
+ if (err) {
+ struct rxrpc_call *call, *_n;
+
+ _debug("ISSUE ERROR %d", err);
+
+ spin_lock_bh(&trans->peer->lock);
+ trans->peer->net_error = err;
+
+ list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
+ error_link) {
+ write_lock(&call->state_lock);
+ if (call->state != RXRPC_CALL_COMPLETE &&
+ call->state < RXRPC_CALL_NETWORK_ERROR) {
+ call->state = RXRPC_CALL_NETWORK_ERROR;
+ set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock(&call->state_lock);
+ list_del_init(&call->error_link);
+ }
+
+ spin_unlock_bh(&trans->peer->lock);
+ }
+
+ if (!skb_queue_empty(&trans->error_queue))
+ rxrpc_queue_work(&trans->error_handler);
+
+ rxrpc_free_skb(skb);
+ rxrpc_put_transport(trans);
+ _leave("");
+}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
new file mode 100644
index 00000000000..91b5bbb003e
--- /dev/null
+++ b/net/rxrpc/ar-input.c
@@ -0,0 +1,797 @@
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+unsigned long rxrpc_ack_timeout = 1;
+
+const char *rxrpc_pkts[] = {
+ "?00",
+ "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
+ "?09", "?10", "?11", "?12", "?13", "?14", "?15"
+};
+
+/*
+ * queue a packet for recvmsg to pass to userspace
+ * - the caller must hold a lock on call->lock
+ * - must not be called with interrupts disabled (sk_filter() disables BH's)
+ * - eats the packet whether successful or not
+ * - there must be just one reference to the packet, which the caller passes to
+ * this function
+ */
+int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
+ bool force, bool terminal)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_sock *rx = call->socket;
+ struct sock *sk;
+ int skb_len, ret;
+
+ _enter(",,%d,%d", force, terminal);
+
+ ASSERT(!irqs_disabled());
+
+ sp = rxrpc_skb(skb);
+ ASSERTCMP(sp->call, ==, call);
+
+ /* if we've already posted the terminal message for a call, then we
+ * don't post any more */
+ if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+ _debug("already terminated");
+ ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
+ skb->destructor = NULL;
+ sp->call = NULL;
+ rxrpc_put_call(call);
+ rxrpc_free_skb(skb);
+ return 0;
+ }
+
+ sk = &rx->sk;
+
+ if (!force) {
+ /* cast skb->rcvbuf to unsigned... It's pointless, but
+ * reduces number of warnings when compiling with -W
+ * --ANK */
+// ret = -ENOBUFS;
+// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+// (unsigned) sk->sk_rcvbuf)
+// goto out;
+
+ ret = sk_filter(sk, skb);
+ if (ret < 0)
+ goto out;
+ }
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
+ !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ call->socket->sk.sk_state != RXRPC_CLOSE) {
+ skb->destructor = rxrpc_packet_destructor;
+ skb->dev = NULL;
+ skb->sk = sk;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+ if (terminal) {
+ _debug("<<<< TERMINAL MESSAGE >>>>");
+ set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
+ }
+
+ /* allow interception by a kernel service */
+ if (rx->interceptor) {
+ rx->interceptor(sk, call->user_call_ID, skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ } else {
+
+ /* Cache the SKB length before we tack it onto the
+ * receive queue. Once it is added it no longer
+ * belongs to us and may be freed by other threads of
+ * control pulling packets from the queue */
+ skb_len = skb->len;
+
+ _net("post skb %p", skb);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk, skb_len);
+ }
+ skb = NULL;
+ } else {
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ }
+ ret = 0;
+
+out:
+ /* release the socket buffer */
+ if (skb) {
+ skb->destructor = NULL;
+ sp->call = NULL;
+ rxrpc_put_call(call);
+ rxrpc_free_skb(skb);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * process a DATA packet, posting the packet to the appropriate queue
+ * - eats the packet if successful
+ */
+static int rxrpc_fast_process_data(struct rxrpc_call *call,
+ struct sk_buff *skb, u32 seq)
+{
+ struct rxrpc_skb_priv *sp;
+ bool terminal;
+ int ret, ackbit, ack;
+
+ _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+
+ sp = rxrpc_skb(skb);
+ ASSERTCMP(sp->call, ==, NULL);
+
+ spin_lock(&call->lock);
+
+ if (call->state > RXRPC_CALL_COMPLETE)
+ goto discard;
+
+ ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
+ ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
+ ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
+
+ if (seq < call->rx_data_post) {
+ _debug("dup #%u [-%u]", seq, call->rx_data_post);
+ ack = RXRPC_ACK_DUPLICATE;
+ ret = -ENOBUFS;
+ goto discard_and_ack;
+ }
+
+ /* we may already have the packet in the out of sequence queue */
+ ackbit = seq - (call->rx_data_eaten + 1);
+ ASSERTCMP(ackbit, >=, 0);
+ if (__test_and_set_bit(ackbit, call->ackr_window)) {
+ _debug("dup oos #%u [%u,%u]",
+ seq, call->rx_data_eaten, call->rx_data_post);
+ ack = RXRPC_ACK_DUPLICATE;
+ goto discard_and_ack;
+ }
+
+ if (seq >= call->ackr_win_top) {
+ _debug("exceed #%u [%u]", seq, call->ackr_win_top);
+ __clear_bit(ackbit, call->ackr_window);
+ ack = RXRPC_ACK_EXCEEDS_WINDOW;
+ goto discard_and_ack;
+ }
+
+ if (seq == call->rx_data_expect) {
+ clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
+ call->rx_data_expect++;
+ } else if (seq > call->rx_data_expect) {
+ _debug("oos #%u [%u]", seq, call->rx_data_expect);
+ call->rx_data_expect = seq + 1;
+ if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
+ ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+ goto enqueue_and_ack;
+ }
+ goto enqueue_packet;
+ }
+
+ if (seq != call->rx_data_post) {
+ _debug("ahead #%u [%u]", seq, call->rx_data_post);
+ goto enqueue_packet;
+ }
+
+ if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
+ goto protocol_error;
+
+ /* if the packet need security things doing to it, then it goes down
+ * the slow path */
+ if (call->conn->security)
+ goto enqueue_packet;
+
+ sp->call = call;
+ rxrpc_get_call(call);
+ terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+ !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+ ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
+ if (ret < 0) {
+ if (ret == -ENOMEM || ret == -ENOBUFS) {
+ __clear_bit(ackbit, call->ackr_window);
+ ack = RXRPC_ACK_NOSPACE;
+ goto discard_and_ack;
+ }
+ goto out;
+ }
+
+ skb = NULL;
+
+ _debug("post #%u", seq);
+ ASSERTCMP(call->rx_data_post, ==, seq);
+ call->rx_data_post++;
+
+ if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
+
+ /* if we've reached an out of sequence packet then we need to drain
+ * that queue into the socket Rx queue now */
+ if (call->rx_data_post == call->rx_first_oos) {
+ _debug("drain rx oos now");
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock(&call->state_lock);
+ }
+
+ spin_unlock(&call->lock);
+ atomic_inc(&call->ackr_not_idle);
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+ _leave(" = 0 [posted]");
+ return 0;
+
+protocol_error:
+ ret = -EBADMSG;
+out:
+ spin_unlock(&call->lock);
+ _leave(" = %d", ret);
+ return ret;
+
+discard_and_ack:
+ _debug("discard and ACK packet %p", skb);
+ __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+discard:
+ spin_unlock(&call->lock);
+ rxrpc_free_skb(skb);
+ _leave(" = 0 [discarded]");
+ return 0;
+
+enqueue_and_ack:
+ __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+enqueue_packet:
+ _net("defer skb %p", skb);
+ spin_unlock(&call->lock);
+ skb_queue_tail(&call->rx_queue, skb);
+ atomic_inc(&call->ackr_not_idle);
+ read_lock(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD)
+ rxrpc_queue_call(call);
+ read_unlock(&call->state_lock);
+ _leave(" = 0 [queued]");
+ return 0;
+}
+
+/*
+ * assume an implicit ACKALL of the transmission phase of a client socket upon
+ * reception of the first reply packet
+ */
+static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+{
+ write_lock_bh(&call->state_lock);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+ call->acks_latest = serial;
+
+ _debug("implicit ACKALL %%%u", call->acks_latest);
+ set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events);
+ write_unlock_bh(&call->state_lock);
+
+ if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+ clear_bit(RXRPC_CALL_RESEND, &call->events);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ }
+ break;
+
+ default:
+ write_unlock_bh(&call->state_lock);
+ break;
+ }
+}
+
+/*
+ * post an incoming packet to the nominated call to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ __be32 _abort_code;
+ u32 serial, hi_serial, seq, abort_code;
+
+ _enter("%p,%p", call, skb);
+
+ ASSERT(!irqs_disabled());
+
+#if 0 // INJECT RX ERROR
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+ static int skip = 0;
+ if (++skip == 3) {
+ printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
+ skip = 0;
+ goto free_packet;
+ }
+ }
+#endif
+
+ /* track the latest serial number on this connection for ACK packet
+ * information */
+ serial = ntohl(sp->hdr.serial);
+ hi_serial = atomic_read(&call->conn->hi_serial);
+ while (serial > hi_serial)
+ hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
+ serial);
+
+ /* request ACK generation for any ACK or DATA packet that requests
+ * it */
+ if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+ _proto("ACK Requested on %%%u", serial);
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
+ !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+ }
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ _debug("abort");
+
+ if (skb_copy_bits(skb, 0, &_abort_code,
+ sizeof(_abort_code)) < 0)
+ goto protocol_error;
+
+ abort_code = ntohl(_abort_code);
+ _proto("Rx ABORT %%%u { %x }", serial, abort_code);
+
+ write_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_REMOTELY_ABORTED;
+ call->abort_code = abort_code;
+ set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ goto free_packet_unlock;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ _proto("Rx BUSY %%%u", serial);
+
+ if (call->conn->out_clientflag)
+ goto protocol_error;
+
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ call->state = RXRPC_CALL_SERVER_BUSY;
+ set_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+ rxrpc_queue_call(call);
+ case RXRPC_CALL_SERVER_BUSY:
+ goto free_packet_unlock;
+ default:
+ goto protocol_error_locked;
+ }
+
+ default:
+ _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial);
+ goto protocol_error;
+
+ case RXRPC_PACKET_TYPE_DATA:
+ seq = ntohl(sp->hdr.seq);
+
+ _proto("Rx DATA %%%u { #%u }", serial, seq);
+
+ if (seq == 0)
+ goto protocol_error;
+
+ call->ackr_prev_seq = sp->hdr.seq;
+
+ /* received data implicitly ACKs all of the request packets we
+ * sent when we're acting as a client */
+ if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ rxrpc_assume_implicit_ackall(call, serial);
+
+ switch (rxrpc_fast_process_data(call, skb, seq)) {
+ case 0:
+ skb = NULL;
+ goto done;
+
+ default:
+ BUG();
+
+ /* data packet received beyond the last packet */
+ case -EBADMSG:
+ goto protocol_error;
+ }
+
+ case RXRPC_PACKET_TYPE_ACK:
+ /* ACK processing is done in process context */
+ read_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_DEAD) {
+ skb_queue_tail(&call->rx_queue, skb);
+ rxrpc_queue_call(call);
+ skb = NULL;
+ }
+ read_unlock_bh(&call->state_lock);
+ goto free_packet;
+ }
+
+protocol_error:
+ _debug("protocol error");
+ write_lock_bh(&call->state_lock);
+protocol_error_locked:
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_PROTOCOL_ERROR;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+free_packet_unlock:
+ write_unlock_bh(&call->state_lock);
+free_packet:
+ rxrpc_free_skb(skb);
+done:
+ _leave("");
+}
+
+/*
+ * split up a jumbo data packet
+ */
+static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
+ struct sk_buff *jumbo)
+{
+ struct rxrpc_jumbo_header jhdr;
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *part;
+
+ _enter(",{%u,%u}", jumbo->data_len, jumbo->len);
+
+ sp = rxrpc_skb(jumbo);
+
+ do {
+ sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
+
+ /* make a clone to represent the first subpacket in what's left
+ * of the jumbo packet */
+ part = skb_clone(jumbo, GFP_ATOMIC);
+ if (!part) {
+ /* simply ditch the tail in the event of ENOMEM */
+ pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
+ break;
+ }
+ rxrpc_new_skb(part);
+
+ pskb_trim(part, RXRPC_JUMBO_DATALEN);
+
+ if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
+ goto protocol_error;
+
+ if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
+ goto protocol_error;
+ if (!pskb_pull(jumbo, sizeof(jhdr)))
+ BUG();
+
+ sp->hdr.seq = htonl(ntohl(sp->hdr.seq) + 1);
+ sp->hdr.serial = htonl(ntohl(sp->hdr.serial) + 1);
+ sp->hdr.flags = jhdr.flags;
+ sp->hdr._rsvd = jhdr._rsvd;
+
+ _proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1);
+
+ rxrpc_fast_process_packet(call, part);
+ part = NULL;
+
+ } while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
+
+ rxrpc_fast_process_packet(call, jumbo);
+ _leave("");
+ return;
+
+protocol_error:
+ _debug("protocol error");
+ rxrpc_free_skb(part);
+ rxrpc_free_skb(jumbo);
+ write_lock_bh(&call->state_lock);
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = RX_PROTOCOL_ERROR;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ write_unlock_bh(&call->state_lock);
+ _leave("");
+}
+
+/*
+ * post an incoming packet to the appropriate call/socket to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_call *call;
+ struct rb_node *p;
+ __be32 call_id;
+
+ _enter("%p,%p", conn, skb);
+
+ read_lock_bh(&conn->lock);
+
+ sp = rxrpc_skb(skb);
+
+ /* look at extant calls by channel number first */
+ call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
+ if (!call || call->call_id != sp->hdr.callNumber)
+ goto call_not_extant;
+
+ _debug("extant call [%d]", call->state);
+ ASSERTCMP(call->conn, ==, conn);
+
+ read_lock(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ rxrpc_queue_call(call);
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ case RXRPC_CALL_NETWORK_ERROR:
+ case RXRPC_CALL_DEAD:
+ goto free_unlock;
+ default:
+ break;
+ }
+
+ read_unlock(&call->state_lock);
+ rxrpc_get_call(call);
+ read_unlock_bh(&conn->lock);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ sp->hdr.flags & RXRPC_JUMBO_PACKET)
+ rxrpc_process_jumbo_packet(call, skb);
+ else
+ rxrpc_fast_process_packet(call, skb);
+
+ rxrpc_put_call(call);
+ goto done;
+
+call_not_extant:
+ /* search the completed calls in case what we're dealing with is
+ * there */
+ _debug("call not extant");
+
+ call_id = sp->hdr.callNumber;
+ p = conn->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, conn_node);
+
+ if (call_id < call->call_id)
+ p = p->rb_left;
+ else if (call_id > call->call_id)
+ p = p->rb_right;
+ else
+ goto found_completed_call;
+ }
+
+dead_call:
+ /* it's a either a really old call that we no longer remember or its a
+ * new incoming call */
+ read_unlock_bh(&conn->lock);
+
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+ sp->hdr.seq == __constant_cpu_to_be32(1)) {
+ _debug("incoming call");
+ skb_queue_tail(&conn->trans->local->accept_queue, skb);
+ rxrpc_queue_work(&conn->trans->local->acceptor);
+ goto done;
+ }
+
+ _debug("dead call");
+ skb->priority = RX_CALL_DEAD;
+ rxrpc_reject_packet(conn->trans->local, skb);
+ goto done;
+
+ /* resend last packet of a completed call
+ * - client calls may have been aborted or ACK'd
+ * - server calls may have been aborted
+ */
+found_completed_call:
+ _debug("completed call");
+
+ if (atomic_read(&call->usage) == 0)
+ goto dead_call;
+
+ /* synchronise any state changes */
+ read_lock(&call->state_lock);
+ ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
+ call->state, >=, RXRPC_CALL_COMPLETE);
+
+ if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
+ call->state == RXRPC_CALL_REMOTELY_ABORTED ||
+ call->state == RXRPC_CALL_DEAD) {
+ read_unlock(&call->state_lock);
+ goto dead_call;
+ }
+
+ if (call->conn->in_clientflag) {
+ read_unlock(&call->state_lock);
+ goto dead_call; /* complete server call */
+ }
+
+ _debug("final ack again");
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ rxrpc_queue_call(call);
+
+free_unlock:
+ read_unlock(&call->state_lock);
+ read_unlock_bh(&conn->lock);
+ rxrpc_free_skb(skb);
+done:
+ _leave("");
+}
+
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses and some aborts
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", conn, skb);
+
+ atomic_inc(&conn->usage);
+ skb_queue_tail(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn);
+}
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ */
+void rxrpc_data_ready(struct sock *sk, int count)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_transport *trans;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_local *local;
+ struct rxrpc_peer *peer;
+ struct sk_buff *skb;
+ int ret;
+
+ _enter("%p, %d", sk, count);
+
+ ASSERT(!irqs_disabled());
+
+ read_lock_bh(&rxrpc_local_lock);
+ local = sk->sk_user_data;
+ if (local && atomic_read(&local->usage) > 0)
+ rxrpc_get_local(local);
+ else
+ local = NULL;
+ read_unlock_bh(&rxrpc_local_lock);
+ if (!local) {
+ _leave(" [local dead]");
+ return;
+ }
+
+ skb = skb_recv_datagram(sk, 0, 1, &ret);
+ if (!skb) {
+ rxrpc_put_local(local);
+ if (ret == -EAGAIN)
+ return;
+ _debug("UDP socket error %d", ret);
+ return;
+ }
+
+ rxrpc_new_skb(skb);
+
+ _net("recv skb %p", skb);
+
+ /* we'll probably need to checksum it (didn't call sock_recvmsg) */
+ if (skb_checksum_complete(skb)) {
+ rxrpc_free_skb(skb);
+ rxrpc_put_local(local);
+ _leave(" [CSUM failed]");
+ return;
+ }
+
+ /* the socket buffer we have is owned by UDP, with UDP's data all over
+ * it, but we really want our own */
+ skb_orphan(skb);
+ sp = rxrpc_skb(skb);
+ memset(sp, 0, sizeof(*sp));
+
+ _net("Rx UDP packet from %08x:%04hu",
+ ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
+
+ /* dig out the RxRPC connection details */
+ if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr,
+ sizeof(sp->hdr)) < 0)
+ goto bad_message;
+ if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
+ BUG();
+
+ _net("Rx RxRPC %s ep=%x call=%x:%x",
+ sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
+ ntohl(sp->hdr.epoch),
+ ntohl(sp->hdr.cid),
+ ntohl(sp->hdr.callNumber));
+
+ if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) {
+ _proto("Rx Bad Packet Type %u", sp->hdr.type);
+ goto bad_message;
+ }
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
+ goto bad_message;
+
+ peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
+ if (IS_ERR(peer))
+ goto cant_route_call;
+
+ trans = rxrpc_find_transport(local, peer);
+ rxrpc_put_peer(peer);
+ if (!trans)
+ goto cant_route_call;
+
+ conn = rxrpc_find_connection(trans, &sp->hdr);
+ rxrpc_put_transport(trans);
+ if (!conn)
+ goto cant_route_call;
+
+ _debug("CONN %p {%d}", conn, conn->debug_id);
+
+ if (sp->hdr.callNumber == 0)
+ rxrpc_post_packet_to_conn(conn, skb);
+ else
+ rxrpc_post_packet_to_call(conn, skb);
+ rxrpc_put_connection(conn);
+ rxrpc_put_local(local);
+ return;
+
+cant_route_call:
+ _debug("can't route call");
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+ sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+ if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
+ _debug("first packet");
+ skb_queue_tail(&local->accept_queue, skb);
+ rxrpc_queue_work(&local->acceptor);
+ rxrpc_put_local(local);
+ _leave(" [incoming]");
+ return;
+ }
+ skb->priority = RX_INVALID_OPERATION;
+ } else {
+ skb->priority = RX_CALL_DEAD;
+ }
+
+ _debug("reject");
+ rxrpc_reject_packet(local, skb);
+ rxrpc_put_local(local);
+ _leave(" [no call]");
+ return;
+
+bad_message:
+ skb->priority = RX_PROTOCOL_ERROR;
+ rxrpc_reject_packet(local, skb);
+ rxrpc_put_local(local);
+ _leave(" [badmsg]");
+}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
new file mode 100644
index 00000000000..58aaf892238
--- /dev/null
+++ b/net/rxrpc/ar-internal.h
@@ -0,0 +1,808 @@
+/* AF_RXRPC internal definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <rxrpc/packet.h>
+
+#if 0
+#define CHECK_SLAB_OKAY(X) \
+ BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
+ (POISON_FREE << 8 | POISON_FREE))
+#else
+#define CHECK_SLAB_OKAY(X) do {} while(0)
+#endif
+
+#define FCRYPT_BSIZE 8
+struct rxrpc_crypt {
+ union {
+ u8 x[FCRYPT_BSIZE];
+ u32 n[2];
+ };
+} __attribute__((aligned(8)));
+
+#define rxrpc_queue_work(WS) queue_work(rxrpc_workqueue, (WS))
+#define rxrpc_queue_delayed_work(WS,D) \
+ queue_delayed_work(rxrpc_workqueue, (WS), (D))
+
+#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
+#define rxrpc_queue_conn(CONN) rxrpc_queue_work(&(CONN)->processor)
+
+/*
+ * sk_state for RxRPC sockets
+ */
+enum {
+ RXRPC_UNCONNECTED = 0,
+ RXRPC_CLIENT_BOUND, /* client local address bound */
+ RXRPC_CLIENT_CONNECTED, /* client is connected */
+ RXRPC_SERVER_BOUND, /* server local address bound */
+ RXRPC_SERVER_LISTENING, /* server listening for connections */
+ RXRPC_CLOSE, /* socket is being closed */
+};
+
+/*
+ * RxRPC socket definition
+ */
+struct rxrpc_sock {
+ /* WARNING: sk has to be the first member */
+ struct sock sk;
+ rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */
+ struct rxrpc_local *local; /* local endpoint */
+ struct rxrpc_transport *trans; /* transport handler */
+ struct rxrpc_conn_bundle *bundle; /* virtual connection bundle */
+ struct rxrpc_connection *conn; /* exclusive virtual connection */
+ struct list_head listen_link; /* link in the local endpoint's listen list */
+ struct list_head secureq; /* calls awaiting connection security clearance */
+ struct list_head acceptq; /* calls awaiting acceptance */
+ struct key *key; /* security for this socket */
+ struct key *securities; /* list of server security descriptors */
+ struct rb_root calls; /* outstanding calls on this socket */
+ unsigned long flags;
+#define RXRPC_SOCK_EXCLUSIVE_CONN 1 /* exclusive connection for a client socket */
+ rwlock_t call_lock; /* lock for calls */
+ u32 min_sec_level; /* minimum security level */
+#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
+ struct sockaddr_rxrpc srx; /* local address */
+ sa_family_t proto; /* protocol created with */
+ __be16 service_id; /* service ID of local/remote service */
+};
+
+#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
+
+/*
+ * RxRPC socket buffer private variables
+ * - max 48 bytes (struct sk_buff::cb)
+ */
+struct rxrpc_skb_priv {
+ struct rxrpc_call *call; /* call with which associated */
+ unsigned long resend_at; /* time in jiffies at which to resend */
+ union {
+ unsigned offset; /* offset into buffer of next read */
+ int remain; /* amount of space remaining for next write */
+ u32 error; /* network error code */
+ bool need_resend; /* T if needs resending */
+ };
+
+ struct rxrpc_header hdr; /* RxRPC packet header from this packet */
+};
+
+#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
+
+enum rxrpc_command {
+ RXRPC_CMD_SEND_DATA, /* send data message */
+ RXRPC_CMD_SEND_ABORT, /* request abort generation */
+ RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
+ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
+};
+
+/*
+ * RxRPC security module interface
+ */
+struct rxrpc_security {
+ struct module *owner; /* providing module */
+ struct list_head link; /* link in master list */
+ const char *name; /* name of this service */
+ u8 security_index; /* security type provided */
+
+ /* initialise a connection's security */
+ int (*init_connection_security)(struct rxrpc_connection *);
+
+ /* prime a connection's packet security */
+ void (*prime_packet_security)(struct rxrpc_connection *);
+
+ /* impose security on a packet */
+ int (*secure_packet)(const struct rxrpc_call *,
+ struct sk_buff *,
+ size_t,
+ void *);
+
+ /* verify the security on a received packet */
+ int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
+ u32 *);
+
+ /* issue a challenge */
+ int (*issue_challenge)(struct rxrpc_connection *);
+
+ /* respond to a challenge */
+ int (*respond_to_challenge)(struct rxrpc_connection *,
+ struct sk_buff *,
+ u32 *);
+
+ /* verify a response */
+ int (*verify_response)(struct rxrpc_connection *,
+ struct sk_buff *,
+ u32 *);
+
+ /* clear connection security */
+ void (*clear)(struct rxrpc_connection *);
+};
+
+/*
+ * RxRPC local transport endpoint definition
+ * - matched by local port, address and protocol type
+ */
+struct rxrpc_local {
+ struct socket *socket; /* my UDP socket */
+ struct work_struct destroyer; /* endpoint destroyer */
+ struct work_struct acceptor; /* incoming call processor */
+ struct work_struct rejecter; /* packet reject writer */
+ struct list_head services; /* services listening on this endpoint */
+ struct list_head link; /* link in endpoint list */
+ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
+ struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
+ struct sk_buff_head reject_queue; /* packets awaiting rejection */
+ spinlock_t lock; /* access lock */
+ rwlock_t services_lock; /* lock for services list */
+ atomic_t usage;
+ int debug_id; /* debug ID for printks */
+ volatile char error_rcvd; /* T if received ICMP error outstanding */
+ struct sockaddr_rxrpc srx; /* local address */
+};
+
+/*
+ * RxRPC remote transport endpoint definition
+ * - matched by remote port, address and protocol type
+ * - holds the connection ID counter for connections between the two endpoints
+ */
+struct rxrpc_peer {
+ struct work_struct destroyer; /* peer destroyer */
+ struct list_head link; /* link in master peer list */
+ struct list_head error_targets; /* targets for net error distribution */
+ spinlock_t lock; /* access lock */
+ atomic_t usage;
+ unsigned if_mtu; /* interface MTU for this peer */
+ unsigned mtu; /* network MTU for this peer */
+ unsigned maxdata; /* data size (MTU - hdrsize) */
+ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
+ int debug_id; /* debug ID for printks */
+ int net_error; /* network error distributed */
+ struct sockaddr_rxrpc srx; /* remote address */
+
+ /* calculated RTT cache */
+#define RXRPC_RTT_CACHE_SIZE 32
+ suseconds_t rtt; /* current RTT estimate (in uS) */
+ unsigned rtt_point; /* next entry at which to insert */
+ unsigned rtt_usage; /* amount of cache actually used */
+ suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+};
+
+/*
+ * RxRPC point-to-point transport / connection manager definition
+ * - handles a bundle of connections between two endpoints
+ * - matched by { local, peer }
+ */
+struct rxrpc_transport {
+ struct rxrpc_local *local; /* local transport endpoint */
+ struct rxrpc_peer *peer; /* remote transport endpoint */
+ struct work_struct error_handler; /* network error distributor */
+ struct rb_root bundles; /* client connection bundles on this transport */
+ struct rb_root client_conns; /* client connections on this transport */
+ struct rb_root server_conns; /* server connections on this transport */
+ struct list_head link; /* link in master session list */
+ struct sk_buff_head error_queue; /* error packets awaiting processing */
+ time_t put_time; /* time at which to reap */
+ spinlock_t client_lock; /* client connection allocation lock */
+ rwlock_t conn_lock; /* lock for active/dead connections */
+ atomic_t usage;
+ int debug_id; /* debug ID for printks */
+ unsigned int conn_idcounter; /* connection ID counter (client) */
+};
+
+/*
+ * RxRPC client connection bundle
+ * - matched by { transport, service_id, key }
+ */
+struct rxrpc_conn_bundle {
+ struct rb_node node; /* node in transport's lookup tree */
+ struct list_head unused_conns; /* unused connections in this bundle */
+ struct list_head avail_conns; /* available connections in this bundle */
+ struct list_head busy_conns; /* busy connections in this bundle */
+ struct key *key; /* security for this bundle */
+ wait_queue_head_t chanwait; /* wait for channel to become available */
+ atomic_t usage;
+ int debug_id; /* debug ID for printks */
+ unsigned short num_conns; /* number of connections in this bundle */
+ __be16 service_id; /* service ID */
+ uint8_t security_ix; /* security type */
+};
+
+/*
+ * RxRPC connection definition
+ * - matched by { transport, service_id, conn_id, direction, key }
+ * - each connection can only handle four simultaneous calls
+ */
+struct rxrpc_connection {
+ struct rxrpc_transport *trans; /* transport session */
+ struct rxrpc_conn_bundle *bundle; /* connection bundle (client) */
+ struct work_struct processor; /* connection event processor */
+ struct rb_node node; /* node in transport's lookup tree */
+ struct list_head link; /* link in master connection list */
+ struct list_head bundle_link; /* link in bundle */
+ struct rb_root calls; /* calls on this connection */
+ struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct rxrpc_call *channels[RXRPC_MAXCALLS]; /* channels (active calls) */
+ struct rxrpc_security *security; /* applied security module */
+ struct key *key; /* security for this connection (client) */
+ struct key *server_key; /* security for this service */
+ struct crypto_blkcipher *cipher; /* encryption handle */
+ struct rxrpc_crypt csum_iv; /* packet checksum base */
+ unsigned long events;
+#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
+ time_t put_time; /* time at which to reap */
+ rwlock_t lock; /* access lock */
+ spinlock_t state_lock; /* state-change lock */
+ atomic_t usage;
+ u32 real_conn_id; /* connection ID (host-endian) */
+ enum { /* current state of connection */
+ RXRPC_CONN_UNUSED, /* - connection not yet attempted */
+ RXRPC_CONN_CLIENT, /* - client connection */
+ RXRPC_CONN_SERVER_UNSECURED, /* - server unsecured connection */
+ RXRPC_CONN_SERVER_CHALLENGING, /* - server challenging for security */
+ RXRPC_CONN_SERVER, /* - server secured connection */
+ RXRPC_CONN_REMOTELY_ABORTED, /* - conn aborted by peer */
+ RXRPC_CONN_LOCALLY_ABORTED, /* - conn aborted locally */
+ RXRPC_CONN_NETWORK_ERROR, /* - conn terminated by network error */
+ } state;
+ int error; /* error code for local abort */
+ int debug_id; /* debug ID for printks */
+ unsigned call_counter; /* call ID counter */
+ atomic_t serial; /* packet serial number counter */
+ atomic_t hi_serial; /* highest serial number received */
+ u8 avail_calls; /* number of calls available */
+ u8 size_align; /* data size alignment (for security) */
+ u8 header_size; /* rxrpc + security header size */
+ u8 security_size; /* security header size */
+ u32 security_level; /* security level negotiated */
+ u32 security_nonce; /* response re-use preventer */
+
+ /* the following are all in net order */
+ __be32 epoch; /* epoch of this connection */
+ __be32 cid; /* connection ID */
+ __be16 service_id; /* service ID */
+ u8 security_ix; /* security type */
+ u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
+ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
+};
+
+/*
+ * RxRPC call definition
+ * - matched by { connection, call_id }
+ */
+struct rxrpc_call {
+ struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_sock *socket; /* socket responsible */
+ struct timer_list lifetimer; /* lifetime remaining on call */
+ struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */
+ struct timer_list ack_timer; /* ACK generation timer */
+ struct timer_list resend_timer; /* Tx resend timer */
+ struct work_struct destroyer; /* call destroyer */
+ struct work_struct processor; /* packet processor and ACK generator */
+ struct list_head link; /* link in master call list */
+ struct list_head error_link; /* link in error distribution list */
+ struct list_head accept_link; /* calls awaiting acceptance */
+ struct rb_node sock_node; /* node in socket call tree */
+ struct rb_node conn_node; /* node in connection call tree */
+ struct sk_buff_head rx_queue; /* received packets */
+ struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
+ struct sk_buff *tx_pending; /* Tx socket buffer being filled */
+ wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ unsigned long user_call_ID; /* user-defined call ID */
+ unsigned long creation_jif; /* time of call creation */
+ unsigned long flags;
+#define RXRPC_CALL_RELEASED 0 /* call has been released - no more message to userspace */
+#define RXRPC_CALL_TERMINAL_MSG 1 /* call has given the socket its final message */
+#define RXRPC_CALL_RCVD_LAST 2 /* all packets received */
+#define RXRPC_CALL_RUN_RTIMER 3 /* Tx resend timer started */
+#define RXRPC_CALL_TX_SOFT_ACK 4 /* sent some soft ACKs */
+#define RXRPC_CALL_PROC_BUSY 5 /* the processor is busy */
+#define RXRPC_CALL_INIT_ACCEPT 6 /* acceptance was initiated */
+#define RXRPC_CALL_HAS_USERID 7 /* has a user ID attached */
+#define RXRPC_CALL_EXPECT_OOS 8 /* expect out of sequence packets */
+ unsigned long events;
+#define RXRPC_CALL_RCVD_ACKALL 0 /* ACKALL or reply received */
+#define RXRPC_CALL_RCVD_BUSY 1 /* busy packet received */
+#define RXRPC_CALL_RCVD_ABORT 2 /* abort packet received */
+#define RXRPC_CALL_RCVD_ERROR 3 /* network error received */
+#define RXRPC_CALL_ACK_FINAL 4 /* need to generate final ACK (and release call) */
+#define RXRPC_CALL_ACK 5 /* need to generate ACK */
+#define RXRPC_CALL_REJECT_BUSY 6 /* need to generate busy message */
+#define RXRPC_CALL_ABORT 7 /* need to generate abort */
+#define RXRPC_CALL_CONN_ABORT 8 /* local connection abort generated */
+#define RXRPC_CALL_RESEND_TIMER 9 /* Tx resend timer expired */
+#define RXRPC_CALL_RESEND 10 /* Tx resend required */
+#define RXRPC_CALL_DRAIN_RX_OOS 11 /* drain the Rx out of sequence queue */
+#define RXRPC_CALL_LIFE_TIMER 12 /* call's lifetimer ran out */
+#define RXRPC_CALL_ACCEPTED 13 /* incoming call accepted by userspace app */
+#define RXRPC_CALL_SECURED 14 /* incoming call's connection is now secure */
+#define RXRPC_CALL_POST_ACCEPT 15 /* need to post an "accept?" message to the app */
+#define RXRPC_CALL_RELEASE 16 /* need to release the call's resources */
+
+ spinlock_t lock;
+ rwlock_t state_lock; /* lock for state transition */
+ atomic_t usage;
+ atomic_t sequence; /* Tx data packet sequence counter */
+ u32 abort_code; /* local/remote abort code */
+ enum { /* current state of call */
+ RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
+ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
+ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
+ RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
+ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
+ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
+ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
+ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
+ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
+ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
+ RXRPC_CALL_COMPLETE, /* - call completed */
+ RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
+ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
+ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
+ RXRPC_CALL_DEAD, /* - call is dead */
+ } state;
+ int debug_id; /* debug ID for printks */
+ u8 channel; /* connection channel occupied by this call */
+
+ /* transmission-phase ACK management */
+ uint8_t acks_head; /* offset into window of first entry */
+ uint8_t acks_tail; /* offset into window of last entry */
+ uint8_t acks_winsz; /* size of un-ACK'd window */
+ uint8_t acks_unacked; /* lowest unacked packet in last ACK received */
+ int acks_latest; /* serial number of latest ACK received */
+ rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */
+ unsigned long *acks_window; /* sent packet window
+ * - elements are pointers with LSB set if ACK'd
+ */
+
+ /* receive-phase ACK management */
+ rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */
+ rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */
+ rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */
+ rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */
+ rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */
+ rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
+ rxrpc_seq_net_t ackr_prev_seq; /* previous sequence number received */
+ uint8_t ackr_reason; /* reason to ACK */
+ __be32 ackr_serial; /* serial of packet being ACK'd */
+ atomic_t ackr_not_idle; /* number of packets in Rx queue */
+
+ /* received packet records, 1 bit per record */
+#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
+ unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+
+ /* the following should all be in net order */
+ __be32 cid; /* connection ID + channel index */
+ __be32 call_id; /* call ID on connection */
+};
+
+/*
+ * RxRPC key for Kerberos (type-2 security)
+ */
+struct rxkad_key {
+ u16 security_index; /* RxRPC header security index */
+ u16 ticket_len; /* length of ticket[] */
+ u32 expiry; /* time at which expires */
+ u32 kvno; /* key version number */
+ u8 session_key[8]; /* DES session key */
+ u8 ticket[0]; /* the encrypted ticket */
+};
+
+struct rxrpc_key_payload {
+ struct rxkad_key k;
+};
+
+/*
+ * locally abort an RxRPC call
+ */
+static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+ write_lock_bh(&call->state_lock);
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->abort_code = abort_code;
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ }
+ write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * af_rxrpc.c
+ */
+extern atomic_t rxrpc_n_skbs;
+extern __be32 rxrpc_epoch;
+extern atomic_t rxrpc_debug_id;
+extern struct workqueue_struct *rxrpc_workqueue;
+
+/*
+ * ar-accept.c
+ */
+extern void rxrpc_accept_incoming_calls(struct work_struct *);
+extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *,
+ unsigned long);
+extern int rxrpc_reject_call(struct rxrpc_sock *);
+
+/*
+ * ar-ack.c
+ */
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_process_call(struct work_struct *);
+
+/*
+ * ar-call.c
+ */
+extern struct kmem_cache *rxrpc_call_jar;
+extern struct list_head rxrpc_calls;
+extern rwlock_t rxrpc_call_lock;
+
+extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
+ struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *,
+ unsigned long, int, gfp_t);
+extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
+ struct rxrpc_connection *,
+ struct rxrpc_header *, gfp_t);
+extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *,
+ unsigned long);
+extern void rxrpc_release_call(struct rxrpc_call *);
+extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+extern void __rxrpc_put_call(struct rxrpc_call *);
+extern void __exit rxrpc_destroy_all_calls(void);
+
+/*
+ * ar-connection.c
+ */
+extern struct list_head rxrpc_connections;
+extern rwlock_t rxrpc_connection_lock;
+
+extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
+ struct rxrpc_transport *,
+ struct key *,
+ __be16, gfp_t);
+extern void rxrpc_put_bundle(struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *);
+extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
+ struct rxrpc_conn_bundle *, struct rxrpc_call *,
+ gfp_t);
+extern void rxrpc_put_connection(struct rxrpc_connection *);
+extern void __exit rxrpc_destroy_all_connections(void);
+extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
+ struct rxrpc_header *);
+extern struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
+ gfp_t);
+
+/*
+ * ar-connevent.c
+ */
+extern void rxrpc_process_connection(struct work_struct *);
+extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
+extern void rxrpc_reject_packets(struct work_struct *);
+
+/*
+ * ar-error.c
+ */
+extern void rxrpc_UDP_error_report(struct sock *);
+extern void rxrpc_UDP_error_handler(struct work_struct *);
+
+/*
+ * ar-input.c
+ */
+extern unsigned long rxrpc_ack_timeout;
+extern const char *rxrpc_pkts[];
+
+extern void rxrpc_data_ready(struct sock *, int);
+extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool,
+ bool);
+extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * ar-local.c
+ */
+extern rwlock_t rxrpc_local_lock;
+extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
+extern void rxrpc_put_local(struct rxrpc_local *);
+extern void __exit rxrpc_destroy_all_locals(void);
+
+/*
+ * ar-key.c
+ */
+extern struct key_type key_type_rxrpc;
+extern struct key_type key_type_rxrpc_s;
+
+extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *,
+ time_t, u32);
+
+/*
+ * ar-output.c
+ */
+extern int rxrpc_resend_timeout;
+
+extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
+extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
+ struct rxrpc_transport *, struct msghdr *,
+ size_t);
+extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *,
+ struct msghdr *, size_t);
+
+/*
+ * ar-peer.c
+ */
+extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
+extern void rxrpc_put_peer(struct rxrpc_peer *);
+extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *,
+ __be32, __be16);
+extern void __exit rxrpc_destroy_all_peers(void);
+
+/*
+ * ar-proc.c
+ */
+extern const char *rxrpc_call_states[];
+extern struct file_operations rxrpc_call_seq_fops;
+extern struct file_operations rxrpc_connection_seq_fops;
+
+/*
+ * ar-recvmsg.c
+ */
+extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
+ size_t, int);
+
+/*
+ * ar-security.c
+ */
+extern int rxrpc_register_security(struct rxrpc_security *);
+extern void rxrpc_unregister_security(struct rxrpc_security *);
+extern int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+extern int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *,
+ size_t, void *);
+extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *,
+ u32 *);
+extern void rxrpc_clear_conn_security(struct rxrpc_connection *);
+
+/*
+ * ar-skbuff.c
+ */
+extern void rxrpc_packet_destructor(struct sk_buff *);
+
+/*
+ * ar-transport.c
+ */
+extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
+ struct rxrpc_peer *,
+ gfp_t);
+extern void rxrpc_put_transport(struct rxrpc_transport *);
+extern void __exit rxrpc_destroy_all_transports(void);
+extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
+ struct rxrpc_peer *);
+
+/*
+ * debug tracing
+ */
+extern unsigned rxrpc_debug;
+
+#define dbgprintk(FMT,...) \
+ printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf,1,2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
+#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__)
+#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
+#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__)
+#define _net(FMT,...) knet(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AF_RXRPC_DEBUG)
+#define RXRPC_DEBUG_KENTER 0x01
+#define RXRPC_DEBUG_KLEAVE 0x02
+#define RXRPC_DEBUG_KDEBUG 0x04
+#define RXRPC_DEBUG_KPROTO 0x08
+#define RXRPC_DEBUG_KNET 0x10
+
+#define _enter(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER)) \
+ kenter(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE)) \
+ kleave(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG)) \
+ kdebug(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _proto(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \
+ kproto(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _net(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \
+ knet(FMT,##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
+#define _proto(FMT,...) _dbprintk("### "FMT ,##__VA_ARGS__)
+#define _net(FMT,...) _dbprintk("@@@ "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ printk(KERN_ERR "%lu " #OP " %lu is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ BUG(); \
+ } \
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "RxRPC: Assertion failed\n"); \
+ printk(KERN_ERR "%lu " #OP " %lu is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while(0)
+
+#else
+
+#define ASSERT(X) \
+do { \
+} while(0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+} while(0)
+
+#define ASSERTIF(C, X) \
+do { \
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+} while(0)
+
+#endif /* __KDEBUGALL */
+
+/*
+ * socket buffer accounting / leak finding
+ */
+static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
+{
+ //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+ //atomic_inc(&rxrpc_n_skbs);
+}
+
+#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
+
+static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
+{
+ //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+ //atomic_dec(&rxrpc_n_skbs);
+}
+
+#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
+
+static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
+{
+ if (skb) {
+ CHECK_SLAB_OKAY(&skb->users);
+ //_net("free skb %p %s [%d]",
+ // skb, fn, atomic_read(&rxrpc_n_skbs));
+ //atomic_dec(&rxrpc_n_skbs);
+ kfree_skb(skb);
+ }
+}
+
+#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
+
+static inline void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue((list))) != NULL)
+ rxrpc_free_skb(skb);
+}
+
+static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
+{
+ CHECK_SLAB_OKAY(&local->usage);
+ if (atomic_inc_return(&local->usage) == 1)
+ printk("resurrected (%s)\n", f);
+}
+
+#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
+
+#define rxrpc_get_call(CALL) \
+do { \
+ CHECK_SLAB_OKAY(&(CALL)->usage); \
+ if (atomic_inc_return(&(CALL)->usage) == 1) \
+ BUG(); \
+} while(0)
+
+#define rxrpc_put_call(CALL) \
+do { \
+ __rxrpc_put_call(CALL); \
+} while(0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
new file mode 100644
index 00000000000..7e049ff6ae6
--- /dev/null
+++ b/net/rxrpc/ar-key.c
@@ -0,0 +1,334 @@
+/* RxRPC key management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * RxRPC keys should have a description of describing their purpose:
+ * "afs@CAMBRIDGE.REDHAT.COM>
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include <keys/user-type.h>
+#include "ar-internal.h"
+
+static int rxrpc_instantiate(struct key *, const void *, size_t);
+static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static void rxrpc_destroy(struct key *);
+static void rxrpc_destroy_s(struct key *);
+static void rxrpc_describe(const struct key *, struct seq_file *);
+
+/*
+ * rxrpc defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_rxrpc = {
+ .name = "rxrpc",
+ .instantiate = rxrpc_instantiate,
+ .match = user_match,
+ .destroy = rxrpc_destroy,
+ .describe = rxrpc_describe,
+};
+
+EXPORT_SYMBOL(key_type_rxrpc);
+
+/*
+ * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the
+ * description and an 8-byte decryption key as the payload
+ */
+struct key_type key_type_rxrpc_s = {
+ .name = "rxrpc_s",
+ .instantiate = rxrpc_instantiate_s,
+ .match = user_match,
+ .destroy = rxrpc_destroy_s,
+ .describe = rxrpc_describe,
+};
+
+/*
+ * instantiate an rxrpc defined key
+ * data should be of the form:
+ * OFFSET LEN CONTENT
+ * 0 4 key interface version number
+ * 4 2 security index (type)
+ * 6 2 ticket length
+ * 8 4 key expiry time (time_t)
+ * 12 4 kvno
+ * 16 8 session key
+ * 24 [len] ticket
+ *
+ * if no data is provided, then a no-security key is made
+ */
+static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+{
+ const struct rxkad_key *tsec;
+ struct rxrpc_key_payload *upayload;
+ size_t plen;
+ u32 kver;
+ int ret;
+
+ _enter("{%x},,%zu", key_serial(key), datalen);
+
+ /* handle a no-security key */
+ if (!data && datalen == 0)
+ return 0;
+
+ /* get the key interface version number */
+ ret = -EINVAL;
+ if (datalen <= 4 || !data)
+ goto error;
+ memcpy(&kver, data, sizeof(kver));
+ data += sizeof(kver);
+ datalen -= sizeof(kver);
+
+ _debug("KEY I/F VERSION: %u", kver);
+
+ ret = -EKEYREJECTED;
+ if (kver != 1)
+ goto error;
+
+ /* deal with a version 1 key */
+ ret = -EINVAL;
+ if (datalen < sizeof(*tsec))
+ goto error;
+
+ tsec = data;
+ if (datalen != sizeof(*tsec) + tsec->ticket_len)
+ goto error;
+
+ _debug("SCIX: %u", tsec->security_index);
+ _debug("TLEN: %u", tsec->ticket_len);
+ _debug("EXPY: %x", tsec->expiry);
+ _debug("KVNO: %u", tsec->kvno);
+ _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+ tsec->session_key[0], tsec->session_key[1],
+ tsec->session_key[2], tsec->session_key[3],
+ tsec->session_key[4], tsec->session_key[5],
+ tsec->session_key[6], tsec->session_key[7]);
+ if (tsec->ticket_len >= 8)
+ _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+ tsec->ticket[0], tsec->ticket[1],
+ tsec->ticket[2], tsec->ticket[3],
+ tsec->ticket[4], tsec->ticket[5],
+ tsec->ticket[6], tsec->ticket[7]);
+
+ ret = -EPROTONOSUPPORT;
+ if (tsec->security_index != 2)
+ goto error;
+
+ key->type_data.x[0] = tsec->security_index;
+
+ plen = sizeof(*upayload) + tsec->ticket_len;
+ ret = key_payload_reserve(key, plen);
+ if (ret < 0)
+ goto error;
+
+ ret = -ENOMEM;
+ upayload = kmalloc(plen, GFP_KERNEL);
+ if (!upayload)
+ goto error;
+
+ /* attach the data */
+ memcpy(&upayload->k, tsec, sizeof(*tsec));
+ memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
+ tsec->ticket_len);
+ key->payload.data = upayload;
+ key->expiry = tsec->expiry;
+ ret = 0;
+
+error:
+ return ret;
+}
+
+/*
+ * instantiate a server secret key
+ * data should be a pointer to the 8-byte secret key
+ */
+static int rxrpc_instantiate_s(struct key *key, const void *data,
+ size_t datalen)
+{
+ struct crypto_blkcipher *ci;
+
+ _enter("{%x},,%zu", key_serial(key), datalen);
+
+ if (datalen != 8)
+ return -EINVAL;
+
+ memcpy(&key->type_data, data, 8);
+
+ ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ci)) {
+ _leave(" = %ld", PTR_ERR(ci));
+ return PTR_ERR(ci);
+ }
+
+ if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+ BUG();
+
+ key->payload.data = ci;
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy(struct key *key)
+{
+ kfree(key->payload.data);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy_s(struct key *key)
+{
+ if (key->payload.data) {
+ crypto_free_blkcipher(key->payload.data);
+ key->payload.data = NULL;
+ }
+}
+
+/*
+ * describe the rxrpc key
+ */
+static void rxrpc_describe(const struct key *key, struct seq_file *m)
+{
+ seq_puts(m, key->description);
+}
+
+/*
+ * grab the security key for a socket
+ */
+int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
+{
+ struct key *key;
+ char *description;
+
+ _enter("");
+
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+ return -EINVAL;
+
+ description = kmalloc(optlen + 1, GFP_KERNEL);
+ if (!description)
+ return -ENOMEM;
+
+ if (copy_from_user(description, optval, optlen)) {
+ kfree(description);
+ return -EFAULT;
+ }
+ description[optlen] = 0;
+
+ key = request_key(&key_type_rxrpc, description, NULL);
+ if (IS_ERR(key)) {
+ kfree(description);
+ _leave(" = %ld", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ rx->key = key;
+ kfree(description);
+ _leave(" = 0 [key %x]", key->serial);
+ return 0;
+}
+
+/*
+ * grab the security keyring for a server socket
+ */
+int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
+ int optlen)
+{
+ struct key *key;
+ char *description;
+
+ _enter("");
+
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+ return -EINVAL;
+
+ description = kmalloc(optlen + 1, GFP_KERNEL);
+ if (!description)
+ return -ENOMEM;
+
+ if (copy_from_user(description, optval, optlen)) {
+ kfree(description);
+ return -EFAULT;
+ }
+ description[optlen] = 0;
+
+ key = request_key(&key_type_keyring, description, NULL);
+ if (IS_ERR(key)) {
+ kfree(description);
+ _leave(" = %ld", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ rx->securities = key;
+ kfree(description);
+ _leave(" = 0 [key %x]", key->serial);
+ return 0;
+}
+
+/*
+ * generate a server data key
+ */
+int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
+ const void *session_key,
+ time_t expiry,
+ u32 kvno)
+{
+ struct key *key;
+ int ret;
+
+ struct {
+ u32 kver;
+ struct rxkad_key tsec;
+ } data;
+
+ _enter("");
+
+ key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ if (IS_ERR(key)) {
+ _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
+ return -ENOMEM;
+ }
+
+ _debug("key %d", key_serial(key));
+
+ data.kver = 1;
+ data.tsec.security_index = 2;
+ data.tsec.ticket_len = 0;
+ data.tsec.expiry = expiry;
+ data.tsec.kvno = 0;
+
+ memcpy(&data.tsec.session_key, session_key,
+ sizeof(data.tsec.session_key));
+
+ ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
+ if (ret < 0)
+ goto error;
+
+ conn->key = key;
+ _leave(" = 0 [%d]", key_serial(key));
+ return 0;
+
+error:
+ key_revoke(key);
+ key_put(key);
+ _leave(" = -ENOMEM [ins %d]", ret);
+ return -ENOMEM;
+}
+
+EXPORT_SYMBOL(rxrpc_get_server_data_key);
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
new file mode 100644
index 00000000000..fe03f71f17d
--- /dev/null
+++ b/net/rxrpc/ar-local.c
@@ -0,0 +1,309 @@
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_locals);
+DEFINE_RWLOCK(rxrpc_local_lock);
+static DECLARE_RWSEM(rxrpc_local_sem);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
+
+static void rxrpc_destroy_local(struct work_struct *work);
+
+/*
+ * allocate a new local
+ */
+static
+struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+
+ local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+ if (local) {
+ INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
+ INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
+ INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+ INIT_LIST_HEAD(&local->services);
+ INIT_LIST_HEAD(&local->link);
+ init_rwsem(&local->defrag_sem);
+ skb_queue_head_init(&local->accept_queue);
+ skb_queue_head_init(&local->reject_queue);
+ spin_lock_init(&local->lock);
+ rwlock_init(&local->services_lock);
+ atomic_set(&local->usage, 1);
+ local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ memcpy(&local->srx, srx, sizeof(*srx));
+ }
+
+ _leave(" = %p", local);
+ return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_sem writelocked
+ */
+static int rxrpc_create_local(struct rxrpc_local *local)
+{
+ struct sock *sock;
+ int ret, opt;
+
+ _enter("%p{%d}", local, local->srx.transport_type);
+
+ /* create a socket to represent the local endpoint */
+ ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
+ &local->socket);
+ if (ret < 0) {
+ _leave(" = %d [socket]", ret);
+ return ret;
+ }
+
+ /* if a local address was supplied then bind it */
+ if (local->srx.transport_len > sizeof(sa_family_t)) {
+ _debug("bind");
+ ret = kernel_bind(local->socket,
+ (struct sockaddr *) &local->srx.transport,
+ local->srx.transport_len);
+ if (ret < 0) {
+ _debug("bind failed");
+ goto error;
+ }
+ }
+
+ /* we want to receive ICMP errors */
+ opt = 1;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ /* we want to set the don't fragment bit */
+ opt = IP_PMTUDISC_DO;
+ ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret < 0) {
+ _debug("setsockopt failed");
+ goto error;
+ }
+
+ write_lock_bh(&rxrpc_local_lock);
+ list_add(&local->link, &rxrpc_locals);
+ write_unlock_bh(&rxrpc_local_lock);
+
+ /* set the socket up */
+ sock = local->socket->sk;
+ sock->sk_user_data = local;
+ sock->sk_data_ready = rxrpc_data_ready;
+ sock->sk_error_report = rxrpc_UDP_error_report;
+ _leave(" = 0");
+ return 0;
+
+error:
+ local->socket->ops->shutdown(local->socket, 2);
+ local->socket->sk->sk_user_data = NULL;
+ sock_release(local->socket);
+ local->socket = NULL;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * create a new local endpoint using the specified UDP address
+ */
+struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+ int ret;
+
+ _enter("{%d,%u,%u.%u.%u.%u+%hu}",
+ srx->transport_type,
+ srx->transport.family,
+ NIPQUAD(srx->transport.sin.sin_addr),
+ ntohs(srx->transport.sin.sin_port));
+
+ down_write(&rxrpc_local_sem);
+
+ /* see if we have a suitable local local endpoint already */
+ read_lock_bh(&rxrpc_local_lock);
+
+ list_for_each_entry(local, &rxrpc_locals, link) {
+ _debug("CMP {%d,%u,%u.%u.%u.%u+%hu}",
+ local->srx.transport_type,
+ local->srx.transport.family,
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port));
+
+ if (local->srx.transport_type != srx->transport_type ||
+ local->srx.transport.family != srx->transport.family)
+ continue;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ if (local->srx.transport.sin.sin_port !=
+ srx->transport.sin.sin_port)
+ continue;
+ if (memcmp(&local->srx.transport.sin.sin_addr,
+ &srx->transport.sin.sin_addr,
+ sizeof(struct in_addr)) != 0)
+ continue;
+ goto found_local;
+
+ default:
+ BUG();
+ }
+ }
+
+ read_unlock_bh(&rxrpc_local_lock);
+
+ /* we didn't find one, so we need to create one */
+ local = rxrpc_alloc_local(srx);
+ if (!local) {
+ up_write(&rxrpc_local_sem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = rxrpc_create_local(local);
+ if (ret < 0) {
+ up_write(&rxrpc_local_sem);
+ kfree(local);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+ }
+
+ up_write(&rxrpc_local_sem);
+
+ _net("LOCAL new %d {%d,%u,%u.%u.%u.%u+%hu}",
+ local->debug_id,
+ local->srx.transport_type,
+ local->srx.transport.family,
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port));
+
+ _leave(" = %p [new]", local);
+ return local;
+
+found_local:
+ rxrpc_get_local(local);
+ read_unlock_bh(&rxrpc_local_lock);
+ up_write(&rxrpc_local_sem);
+
+ _net("LOCAL old %d {%d,%u,%u.%u.%u.%u+%hu}",
+ local->debug_id,
+ local->srx.transport_type,
+ local->srx.transport.family,
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port));
+
+ _leave(" = %p [reuse]", local);
+ return local;
+}
+
+/*
+ * release a local endpoint
+ */
+void rxrpc_put_local(struct rxrpc_local *local)
+{
+ _enter("%p{u=%d}", local, atomic_read(&local->usage));
+
+ ASSERTCMP(atomic_read(&local->usage), >, 0);
+
+ /* to prevent a race, the decrement and the dequeue must be effectively
+ * atomic */
+ write_lock_bh(&rxrpc_local_lock);
+ if (unlikely(atomic_dec_and_test(&local->usage))) {
+ _debug("destroy local");
+ rxrpc_queue_work(&local->destroyer);
+ }
+ write_unlock_bh(&rxrpc_local_lock);
+ _leave("");
+}
+
+/*
+ * destroy a local endpoint
+ */
+static void rxrpc_destroy_local(struct work_struct *work)
+{
+ struct rxrpc_local *local =
+ container_of(work, struct rxrpc_local, destroyer);
+
+ _enter("%p{%d}", local, atomic_read(&local->usage));
+
+ down_write(&rxrpc_local_sem);
+
+ write_lock_bh(&rxrpc_local_lock);
+ if (atomic_read(&local->usage) > 0) {
+ write_unlock_bh(&rxrpc_local_lock);
+ up_read(&rxrpc_local_sem);
+ _leave(" [resurrected]");
+ return;
+ }
+
+ list_del(&local->link);
+ local->socket->sk->sk_user_data = NULL;
+ write_unlock_bh(&rxrpc_local_lock);
+
+ downgrade_write(&rxrpc_local_sem);
+
+ ASSERT(list_empty(&local->services));
+ ASSERT(!work_pending(&local->acceptor));
+ ASSERT(!work_pending(&local->rejecter));
+
+ /* finish cleaning up the local descriptor */
+ rxrpc_purge_queue(&local->accept_queue);
+ rxrpc_purge_queue(&local->reject_queue);
+ local->socket->ops->shutdown(local->socket, 2);
+ sock_release(local->socket);
+
+ up_read(&rxrpc_local_sem);
+
+ _net("DESTROY LOCAL %d", local->debug_id);
+ kfree(local);
+
+ if (list_empty(&rxrpc_locals))
+ wake_up_all(&rxrpc_local_wq);
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all local local endpoint rather than waiting for
+ * them to be destroyed
+ */
+void __exit rxrpc_destroy_all_locals(void)
+{
+ DECLARE_WAITQUEUE(myself,current);
+
+ _enter("");
+
+ /* we simply have to wait for them to go away */
+ if (!list_empty(&rxrpc_locals)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&rxrpc_local_wq, &myself);
+
+ while (!list_empty(&rxrpc_locals)) {
+ schedule();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ remove_wait_queue(&rxrpc_local_wq, &myself);
+ set_current_state(TASK_RUNNING);
+ }
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
new file mode 100644
index 00000000000..591c4422205
--- /dev/null
+++ b/net/rxrpc/ar-output.c
@@ -0,0 +1,734 @@
+/* RxRPC packet transmission
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+int rxrpc_resend_timeout = 4;
+
+static int rxrpc_send_data(struct kiocb *iocb,
+ struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len);
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+ unsigned long *user_call_ID,
+ enum rxrpc_command *command,
+ u32 *abort_code,
+ bool server)
+{
+ struct cmsghdr *cmsg;
+ int len;
+
+ *command = RXRPC_CMD_SEND_DATA;
+
+ if (msg->msg_controllen == 0)
+ return -EINVAL;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+ _debug("CMSG %d, %d, %d",
+ cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+ if (cmsg->cmsg_level != SOL_RXRPC)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case RXRPC_USER_CALL_ID:
+ if (msg->msg_flags & MSG_CMSG_COMPAT) {
+ if (len != sizeof(u32))
+ return -EINVAL;
+ *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+ } else {
+ if (len != sizeof(unsigned long))
+ return -EINVAL;
+ *user_call_ID = *(unsigned long *)
+ CMSG_DATA(cmsg);
+ }
+ _debug("User Call ID %lx", *user_call_ID);
+ break;
+
+ case RXRPC_ABORT:
+ if (*command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ *command = RXRPC_CMD_SEND_ABORT;
+ if (len != sizeof(*abort_code))
+ return -EINVAL;
+ *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+ _debug("Abort %x", *abort_code);
+ if (*abort_code == 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_ACCEPT:
+ if (*command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ *command = RXRPC_CMD_ACCEPT;
+ if (len != 0)
+ return -EINVAL;
+ if (!server)
+ return -EISCONN;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * abort a call, sending an ABORT packet to the peer
+ */
+static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
+{
+ write_lock_bh(&call->state_lock);
+
+ if (call->state <= RXRPC_CALL_COMPLETE) {
+ call->state = RXRPC_CALL_LOCALLY_ABORTED;
+ call->abort_code = abort_code;
+ set_bit(RXRPC_CALL_ABORT, &call->events);
+ del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->ack_timer);
+ clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+ clear_bit(RXRPC_CALL_ACK, &call->events);
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ rxrpc_queue_call(call);
+ }
+
+ write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+ struct rxrpc_transport *trans, struct msghdr *msg,
+ size_t len)
+{
+ struct rxrpc_conn_bundle *bundle;
+ enum rxrpc_command cmd;
+ struct rxrpc_call *call;
+ unsigned long user_call_ID = 0;
+ struct key *key;
+ __be16 service_id;
+ u32 abort_code = 0;
+ int ret;
+
+ _enter("");
+
+ ASSERT(trans != NULL);
+
+ ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+ false);
+ if (ret < 0)
+ return ret;
+
+ bundle = NULL;
+ if (trans) {
+ service_id = rx->service_id;
+ if (msg->msg_name) {
+ struct sockaddr_rxrpc *srx =
+ (struct sockaddr_rxrpc *) msg->msg_name;
+ service_id = htons(srx->srx_service);
+ }
+ key = rx->key;
+ if (key && !rx->key->payload.data)
+ key = NULL;
+ bundle = rxrpc_get_bundle(rx, trans, key, service_id,
+ GFP_KERNEL);
+ if (IS_ERR(bundle))
+ return PTR_ERR(bundle);
+ }
+
+ call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
+ abort_code == 0, GFP_KERNEL);
+ if (trans)
+ rxrpc_put_bundle(trans, bundle);
+ if (IS_ERR(call)) {
+ _leave(" = %ld", PTR_ERR(call));
+ return PTR_ERR(call);
+ }
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ /* it's too late for this call */
+ ret = -ESHUTDOWN;
+ } else if (cmd == RXRPC_CMD_SEND_ABORT) {
+ rxrpc_send_abort(call, abort_code);
+ } else if (cmd != RXRPC_CMD_SEND_DATA) {
+ ret = -EINVAL;
+ } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ /* request phase complete for this client call */
+ ret = -EPROTO;
+ } else {
+ ret = rxrpc_send_data(iocb, rx, call, msg, len);
+ }
+
+ rxrpc_put_call(call);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call. The call must be in an state
+ * appropriate to sending data. No control data should be supplied in @msg,
+ * nor should an address be supplied. MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+ size_t len)
+{
+ int ret;
+
+ _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+ ASSERTCMP(msg->msg_name, ==, NULL);
+ ASSERTCMP(msg->msg_control, ==, NULL);
+
+ lock_sock(&call->socket->sk);
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ ret = -ESHUTDOWN; /* it's too late for this call */
+ } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ ret = -EPROTO; /* request phase complete for this client call */
+ } else {
+ mm_segment_t oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
+ set_fs(oldfs);
+ }
+
+ release_sock(&call->socket->sk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/*
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+ _enter("{%d},%d", call->debug_id, abort_code);
+
+ lock_sock(&call->socket->sk);
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state < RXRPC_CALL_COMPLETE)
+ rxrpc_send_abort(call, abort_code);
+
+ release_sock(&call->socket->sk);
+ _leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
+/*
+ * send a message through a server socket
+ * - caller holds the socket locked
+ */
+int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+ struct msghdr *msg, size_t len)
+{
+ enum rxrpc_command cmd;
+ struct rxrpc_call *call;
+ unsigned long user_call_ID = 0;
+ u32 abort_code = 0;
+ int ret;
+
+ _enter("");
+
+ ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+ true);
+ if (ret < 0)
+ return ret;
+
+ if (cmd == RXRPC_CMD_ACCEPT) {
+ call = rxrpc_accept_call(rx, user_call_ID);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ rxrpc_put_call(call);
+ return 0;
+ }
+
+ call = rxrpc_find_server_call(rx, user_call_ID);
+ if (!call)
+ return -EBADSLT;
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ ret = -ESHUTDOWN;
+ goto out;
+ }
+
+ switch (cmd) {
+ case RXRPC_CMD_SEND_DATA:
+ if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Tx phase not yet begun for this call */
+ ret = -EPROTO;
+ break;
+ }
+
+ ret = rxrpc_send_data(iocb, rx, call, msg, len);
+ break;
+
+ case RXRPC_CMD_SEND_ABORT:
+ rxrpc_send_abort(call, abort_code);
+ break;
+ default:
+ BUG();
+ }
+
+ out:
+ rxrpc_put_call(call);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
+{
+ struct kvec iov[1];
+ struct msghdr msg;
+ int ret, opt;
+
+ _enter(",{%d}", skb->len);
+
+ iov[0].iov_base = skb->head;
+ iov[0].iov_len = skb->len;
+
+ msg.msg_name = &trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ /* send the packet with the don't fragment bit set if we currently
+ * think it's small enough */
+ if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
+ down_read(&trans->local->defrag_sem);
+ /* send the packet by UDP
+ * - returns -EMSGSIZE if UDP would have to fragment the packet
+ * to go out of the interface
+ * - in which case, we'll have processed the ICMP error
+ * message and update the peer record
+ */
+ ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+ iov[0].iov_len);
+
+ up_read(&trans->local->defrag_sem);
+ if (ret == -EMSGSIZE)
+ goto send_fragmentable;
+
+ _leave(" = %d [%u]", ret, trans->peer->maxdata);
+ return ret;
+ }
+
+send_fragmentable:
+ /* attempt to send this message with fragmentation enabled */
+ _debug("send fragment");
+
+ down_write(&trans->local->defrag_sem);
+ opt = IP_PMTUDISC_DONT;
+ ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
+ (char *) &opt, sizeof(opt));
+ if (ret == 0) {
+ ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+ iov[0].iov_len);
+
+ opt = IP_PMTUDISC_DO;
+ kernel_setsockopt(trans->local->socket, SOL_IP,
+ IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
+ }
+
+ up_write(&trans->local->defrag_sem);
+ _leave(" = %d [frag %u]", ret, trans->peer->maxdata);
+ return ret;
+}
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret;
+
+ _enter(",{%d},%ld",
+ CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
+ *timeo);
+
+ add_wait_queue(&call->tx_waitq, &myself);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ ret = 0;
+ if (CIRC_SPACE(call->acks_head, call->acks_tail,
+ call->acks_winsz) > 0)
+ break;
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+
+ release_sock(&rx->sk);
+ *timeo = schedule_timeout(*timeo);
+ lock_sock(&rx->sk);
+ }
+
+ remove_wait_queue(&call->tx_waitq, &myself);
+ set_current_state(TASK_RUNNING);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * attempt to schedule an instant Tx resend
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call)
+{
+ read_lock_bh(&call->state_lock);
+ if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+ clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+ if (call->state < RXRPC_CALL_COMPLETE &&
+ !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+ rxrpc_queue_call(call);
+ }
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * queue a packet for transmission, set the resend timer and attempt
+ * to send the packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ bool last)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ int ret;
+
+ _net("queue skb %p [%d]", skb, call->acks_head);
+
+ ASSERT(call->acks_window != NULL);
+ call->acks_window[call->acks_head] = (unsigned long) skb;
+ smp_wmb();
+ call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
+
+ if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+ _debug("________awaiting reply/ACK__________");
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ break;
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+ if (!last)
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+ break;
+ default:
+ break;
+ }
+ write_unlock_bh(&call->state_lock);
+ }
+
+ _proto("Tx DATA %%%u { #%u }",
+ ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+ sp->need_resend = 0;
+ sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+ if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
+ _debug("run timer");
+ call->resend_timer.expires = sp->resend_at;
+ add_timer(&call->resend_timer);
+ }
+
+ /* attempt to cancel the rx-ACK timer, deferring reply transmission if
+ * we're ACK'ing the request phase of an incoming call */
+ ret = -EAGAIN;
+ if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
+ /* the packet may be freed by rxrpc_process_call() before this
+ * returns */
+ ret = rxrpc_send_packet(call->conn->trans, skb);
+ _net("sent skb %p", skb);
+ } else {
+ _debug("failed to delete ACK timer");
+ }
+
+ if (ret < 0) {
+ _debug("need instant resend %d", ret);
+ sp->need_resend = 1;
+ rxrpc_instant_resend(call);
+ }
+
+ _leave("");
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct kiocb *iocb,
+ struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len)
+{
+ struct rxrpc_skb_priv *sp;
+ unsigned char __user *from;
+ struct sk_buff *skb;
+ struct iovec *iov;
+ struct sock *sk = &rx->sk;
+ long timeo;
+ bool more;
+ int ret, ioc, segment, copied;
+
+ _enter(",,,{%zu},%zu", msg->msg_iovlen, len);
+
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+ /* this should be in poll */
+ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ return -EPIPE;
+
+ iov = msg->msg_iov;
+ ioc = msg->msg_iovlen - 1;
+ from = iov->iov_base;
+ segment = iov->iov_len;
+ iov++;
+ more = msg->msg_flags & MSG_MORE;
+
+ skb = call->tx_pending;
+ call->tx_pending = NULL;
+
+ copied = 0;
+ do {
+ int copy;
+
+ if (segment > len)
+ segment = len;
+
+ _debug("SEGMENT %d @%p", segment, from);
+
+ if (!skb) {
+ size_t size, chunk, max, space;
+
+ _debug("alloc");
+
+ if (CIRC_SPACE(call->acks_head, call->acks_tail,
+ call->acks_winsz) <= 0) {
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ ret = rxrpc_wait_for_tx_window(rx, call,
+ &timeo);
+ if (ret < 0)
+ goto maybe_error;
+ }
+
+ max = call->conn->trans->peer->maxdata;
+ max -= call->conn->security_size;
+ max &= ~(call->conn->size_align - 1UL);
+
+ chunk = max;
+ if (chunk > len && !more)
+ chunk = len;
+
+ space = chunk + call->conn->size_align;
+ space &= ~(call->conn->size_align - 1UL);
+
+ size = space + call->conn->header_size;
+
+ _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+ /* create a buffer that we can retain until it's ACK'd */
+ skb = sock_alloc_send_skb(
+ sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+ if (!skb)
+ goto maybe_error;
+
+ rxrpc_new_skb(skb);
+
+ _debug("ALLOC SEND %p", skb);
+
+ ASSERTCMP(skb->mark, ==, 0);
+
+ _debug("HS: %u", call->conn->header_size);
+ skb_reserve(skb, call->conn->header_size);
+ skb->len += call->conn->header_size;
+
+ sp = rxrpc_skb(skb);
+ sp->remain = chunk;
+ if (sp->remain > skb_tailroom(skb))
+ sp->remain = skb_tailroom(skb);
+
+ _net("skb: hr %d, tr %d, hl %d, rm %d",
+ skb_headroom(skb),
+ skb_tailroom(skb),
+ skb_headlen(skb),
+ sp->remain);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ _debug("append");
+ sp = rxrpc_skb(skb);
+
+ /* append next segment of data to the current buffer */
+ copy = skb_tailroom(skb);
+ ASSERTCMP(copy, >, 0);
+ if (copy > segment)
+ copy = segment;
+ if (copy > sp->remain)
+ copy = sp->remain;
+
+ _debug("add");
+ ret = skb_add_data(skb, from, copy);
+ _debug("added");
+ if (ret < 0)
+ goto efault;
+ sp->remain -= copy;
+ skb->mark += copy;
+
+ len -= copy;
+ segment -= copy;
+ from += copy;
+ while (segment == 0 && ioc > 0) {
+ from = iov->iov_base;
+ segment = iov->iov_len;
+ iov++;
+ ioc--;
+ }
+ if (len == 0) {
+ segment = 0;
+ ioc = 0;
+ }
+
+ /* check for the far side aborting the call or a network error
+ * occurring */
+ if (call->state > RXRPC_CALL_COMPLETE)
+ goto call_aborted;
+
+ /* add the packet to the send queue if it's now full */
+ if (sp->remain <= 0 || (segment == 0 && !more)) {
+ struct rxrpc_connection *conn = call->conn;
+ size_t pad;
+
+ /* pad out if we're using security */
+ if (conn->security) {
+ pad = conn->security_size + skb->mark;
+ pad = conn->size_align - pad;
+ pad &= conn->size_align - 1;
+ _debug("pad %zu", pad);
+ if (pad)
+ memset(skb_put(skb, pad), 0, pad);
+ }
+
+ sp->hdr.epoch = conn->epoch;
+ sp->hdr.cid = call->cid;
+ sp->hdr.callNumber = call->call_id;
+ sp->hdr.seq =
+ htonl(atomic_inc_return(&call->sequence));
+ sp->hdr.serial =
+ htonl(atomic_inc_return(&conn->serial));
+ sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
+ sp->hdr.userStatus = 0;
+ sp->hdr.securityIndex = conn->security_ix;
+ sp->hdr._rsvd = 0;
+ sp->hdr.serviceId = conn->service_id;
+
+ sp->hdr.flags = conn->out_clientflag;
+ if (len == 0 && !more)
+ sp->hdr.flags |= RXRPC_LAST_PACKET;
+ else if (CIRC_SPACE(call->acks_head, call->acks_tail,
+ call->acks_winsz) > 1)
+ sp->hdr.flags |= RXRPC_MORE_PACKETS;
+
+ ret = rxrpc_secure_packet(
+ call, skb, skb->mark,
+ skb->head + sizeof(struct rxrpc_header));
+ if (ret < 0)
+ goto out;
+
+ memcpy(skb->head, &sp->hdr,
+ sizeof(struct rxrpc_header));
+ rxrpc_queue_packet(call, skb, segment == 0 && !more);
+ skb = NULL;
+ }
+
+ } while (segment > 0);
+
+out:
+ call->tx_pending = skb;
+ _leave(" = %d", ret);
+ return ret;
+
+call_aborted:
+ rxrpc_free_skb(skb);
+ if (call->state == RXRPC_CALL_NETWORK_ERROR)
+ ret = call->conn->trans->peer->net_error;
+ else
+ ret = -ECONNABORTED;
+ _leave(" = %d", ret);
+ return ret;
+
+maybe_error:
+ if (copied)
+ ret = copied;
+ goto out;
+
+efault:
+ ret = -EFAULT;
+ goto out;
+}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
new file mode 100644
index 00000000000..ce08b78647c
--- /dev/null
+++ b/net/rxrpc/ar-peer.c
@@ -0,0 +1,316 @@
+/* RxRPC remote transport endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_peers);
+static DEFINE_RWLOCK(rxrpc_peer_lock);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
+
+static void rxrpc_destroy_peer(struct work_struct *work);
+
+/*
+ * assess the MTU size for the network interface through which this peer is
+ * reached
+ */
+static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
+{
+ struct rtable *rt;
+ struct flowi fl;
+ int ret;
+
+ peer->if_mtu = 1500;
+
+ memset(&fl, 0, sizeof(fl));
+
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ fl.oif = 0;
+ fl.proto = IPPROTO_UDP,
+ fl.nl_u.ip4_u.saddr = 0;
+ fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
+ fl.nl_u.ip4_u.tos = 0;
+ /* assume AFS.CM talking to AFS.FS */
+ fl.uli_u.ports.sport = htons(7001);
+ fl.uli_u.ports.dport = htons(7000);
+ break;
+ default:
+ BUG();
+ }
+
+ ret = ip_route_output_key(&rt, &fl);
+ if (ret < 0) {
+ kleave(" [route err %d]", ret);
+ return;
+ }
+
+ peer->if_mtu = dst_mtu(&rt->u.dst);
+ dst_release(&rt->u.dst);
+
+ kleave(" [if_mtu %u]", peer->if_mtu);
+}
+
+/*
+ * allocate a new peer
+ */
+static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+ if (peer) {
+ INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
+ INIT_LIST_HEAD(&peer->link);
+ INIT_LIST_HEAD(&peer->error_targets);
+ spin_lock_init(&peer->lock);
+ atomic_set(&peer->usage, 1);
+ peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ memcpy(&peer->srx, srx, sizeof(*srx));
+
+ rxrpc_assess_MTU_size(peer);
+ peer->mtu = peer->if_mtu;
+
+ if (srx->transport.family == AF_INET) {
+ peer->hdrsize = sizeof(struct iphdr);
+ switch (srx->transport_type) {
+ case SOCK_DGRAM:
+ peer->hdrsize += sizeof(struct udphdr);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ } else {
+ BUG();
+ }
+
+ peer->hdrsize += sizeof(struct rxrpc_header);
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ }
+
+ _leave(" = %p", peer);
+ return peer;
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_peer *peer, *candidate;
+ const char *new = "old";
+ int usage;
+
+ _enter("{%d,%d,%u.%u.%u.%u+%hu}",
+ srx->transport_type,
+ srx->transport_len,
+ NIPQUAD(srx->transport.sin.sin_addr),
+ ntohs(srx->transport.sin.sin_port));
+
+ /* search the peer list first */
+ read_lock_bh(&rxrpc_peer_lock);
+ list_for_each_entry(peer, &rxrpc_peers, link) {
+ _debug("check PEER %d { u=%d t=%d l=%d }",
+ peer->debug_id,
+ atomic_read(&peer->usage),
+ peer->srx.transport_type,
+ peer->srx.transport_len);
+
+ if (atomic_read(&peer->usage) > 0 &&
+ peer->srx.transport_type == srx->transport_type &&
+ peer->srx.transport_len == srx->transport_len &&
+ memcmp(&peer->srx.transport,
+ &srx->transport,
+ srx->transport_len) == 0)
+ goto found_extant_peer;
+ }
+ read_unlock_bh(&rxrpc_peer_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_peer(srx, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ write_lock_bh(&rxrpc_peer_lock);
+
+ list_for_each_entry(peer, &rxrpc_peers, link) {
+ if (atomic_read(&peer->usage) > 0 &&
+ peer->srx.transport_type == srx->transport_type &&
+ peer->srx.transport_len == srx->transport_len &&
+ memcmp(&peer->srx.transport,
+ &srx->transport,
+ srx->transport_len) == 0)
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ peer = candidate;
+ candidate = NULL;
+
+ list_add_tail(&peer->link, &rxrpc_peers);
+ write_unlock_bh(&rxrpc_peer_lock);
+ new = "new";
+
+success:
+ _net("PEER %s %d {%d,%u,%u.%u.%u.%u+%hu}",
+ new,
+ peer->debug_id,
+ peer->srx.transport_type,
+ peer->srx.transport.family,
+ NIPQUAD(peer->srx.transport.sin.sin_addr),
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ return peer;
+
+ /* we found the peer in the list immediately */
+found_extant_peer:
+ usage = atomic_inc_return(&peer->usage);
+ read_unlock_bh(&rxrpc_peer_lock);
+ goto success;
+
+ /* we found the peer on the second time through the list */
+found_extant_second:
+ usage = atomic_inc_return(&peer->usage);
+ write_unlock_bh(&rxrpc_peer_lock);
+ kfree(candidate);
+ goto success;
+}
+
+/*
+ * find the peer associated with a packet
+ */
+struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
+ __be32 addr, __be16 port)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ /* search the peer list */
+ read_lock_bh(&rxrpc_peer_lock);
+
+ if (local->srx.transport.family == AF_INET &&
+ local->srx.transport_type == SOCK_DGRAM
+ ) {
+ list_for_each_entry(peer, &rxrpc_peers, link) {
+ if (atomic_read(&peer->usage) > 0 &&
+ peer->srx.transport_type == SOCK_DGRAM &&
+ peer->srx.transport.family == AF_INET &&
+ peer->srx.transport.sin.sin_port == port &&
+ peer->srx.transport.sin.sin_addr.s_addr == addr)
+ goto found_UDP_peer;
+ }
+
+ goto new_UDP_peer;
+ }
+
+ read_unlock_bh(&rxrpc_peer_lock);
+ _leave(" = -EAFNOSUPPORT");
+ return ERR_PTR(-EAFNOSUPPORT);
+
+found_UDP_peer:
+ _net("Rx UDP DGRAM from peer %d", peer->debug_id);
+ atomic_inc(&peer->usage);
+ read_unlock_bh(&rxrpc_peer_lock);
+ _leave(" = %p", peer);
+ return peer;
+
+new_UDP_peer:
+ _net("Rx UDP DGRAM from NEW peer %d", peer->debug_id);
+ read_unlock_bh(&rxrpc_peer_lock);
+ _leave(" = -EBUSY [new]");
+ return ERR_PTR(-EBUSY);
+}
+
+/*
+ * release a remote transport endpoint
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+ _enter("%p{u=%d}", peer, atomic_read(&peer->usage));
+
+ ASSERTCMP(atomic_read(&peer->usage), >, 0);
+
+ if (likely(!atomic_dec_and_test(&peer->usage))) {
+ _leave(" [in use]");
+ return;
+ }
+
+ rxrpc_queue_work(&peer->destroyer);
+ _leave("");
+}
+
+/*
+ * destroy a remote transport endpoint
+ */
+static void rxrpc_destroy_peer(struct work_struct *work)
+{
+ struct rxrpc_peer *peer =
+ container_of(work, struct rxrpc_peer, destroyer);
+
+ _enter("%p{%d}", peer, atomic_read(&peer->usage));
+
+ write_lock_bh(&rxrpc_peer_lock);
+ list_del(&peer->link);
+ write_unlock_bh(&rxrpc_peer_lock);
+
+ _net("DESTROY PEER %d", peer->debug_id);
+ kfree(peer);
+
+ if (list_empty(&rxrpc_peers))
+ wake_up_all(&rxrpc_peer_wq);
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the peer records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_peers(void)
+{
+ DECLARE_WAITQUEUE(myself,current);
+
+ _enter("");
+
+ /* we simply have to wait for them to go away */
+ if (!list_empty(&rxrpc_peers)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&rxrpc_peer_wq, &myself);
+
+ while (!list_empty(&rxrpc_peers)) {
+ schedule();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ remove_wait_queue(&rxrpc_peer_wq, &myself);
+ set_current_state(TASK_RUNNING);
+ }
+
+ _leave("");
+}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
new file mode 100644
index 00000000000..58f4b4e5cec
--- /dev/null
+++ b/net/rxrpc/ar-proc.c
@@ -0,0 +1,247 @@
+/* /proc/net/ support for AF_RXRPC
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static const char *rxrpc_conn_states[] = {
+ [RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVER_UNSECURED] = "SvUnsec ",
+ [RXRPC_CONN_SERVER_CHALLENGING] = "SvChall ",
+ [RXRPC_CONN_SERVER] = "SvSecure",
+ [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_NETWORK_ERROR] = "NetError",
+};
+
+const char *rxrpc_call_states[] = {
+ [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
+ [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
+ [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
+ [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK",
+ [RXRPC_CALL_SERVER_SECURING] = "SvSecure",
+ [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept",
+ [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
+ [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq",
+ [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
+ [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK",
+ [RXRPC_CALL_COMPLETE] = "Complete",
+ [RXRPC_CALL_SERVER_BUSY] = "SvBusy ",
+ [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CALL_NETWORK_ERROR] = "NetError",
+ [RXRPC_CALL_DEAD] = "Dead ",
+};
+
+/*
+ * generate a list of extant and dead calls in /proc/net/rxrpc_calls
+ */
+static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+ struct list_head *_p;
+ loff_t pos = *_pos;
+
+ read_lock(&rxrpc_call_lock);
+ if (!pos)
+ return SEQ_START_TOKEN;
+ pos--;
+
+ list_for_each(_p, &rxrpc_calls)
+ if (!pos--)
+ break;
+
+ return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct list_head *_p;
+
+ (*pos)++;
+
+ _p = v;
+ _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
+
+ return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock(&rxrpc_call_lock);
+}
+
+static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_transport *trans;
+ struct rxrpc_call *call;
+ char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local Remote "
+ " SvID ConnID CallID End Use State Abort "
+ " UserID\n");
+ return 0;
+ }
+
+ call = list_entry(v, struct rxrpc_call, link);
+ trans = call->conn->trans;
+
+ sprintf(lbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+ ntohs(trans->local->srx.transport.sin.sin_port));
+
+ sprintf(rbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+ ntohs(trans->peer->srx.transport.sin.sin_port));
+
+ seq_printf(seq,
+ "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ " %-8.8s %08x %lx\n",
+ lbuff,
+ rbuff,
+ ntohs(call->conn->service_id),
+ ntohl(call->conn->cid),
+ ntohl(call->call_id),
+ call->conn->in_clientflag ? "Svc" : "Clt",
+ atomic_read(&call->usage),
+ rxrpc_call_states[call->state],
+ call->abort_code,
+ call->user_call_ID);
+
+ return 0;
+}
+
+static struct seq_operations rxrpc_call_seq_ops = {
+ .start = rxrpc_call_seq_start,
+ .next = rxrpc_call_seq_next,
+ .stop = rxrpc_call_seq_stop,
+ .show = rxrpc_call_seq_show,
+};
+
+static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &rxrpc_call_seq_ops);
+}
+
+struct file_operations rxrpc_call_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = rxrpc_call_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+/*
+ * generate a list of extant virtual connections in /proc/net/rxrpc_conns
+ */
+static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+ struct list_head *_p;
+ loff_t pos = *_pos;
+
+ read_lock(&rxrpc_connection_lock);
+ if (!pos)
+ return SEQ_START_TOKEN;
+ pos--;
+
+ list_for_each(_p, &rxrpc_connections)
+ if (!pos--)
+ break;
+
+ return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct list_head *_p;
+
+ (*pos)++;
+
+ _p = v;
+ _p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next;
+
+ return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock(&rxrpc_connection_lock);
+}
+
+static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_transport *trans;
+ char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local Remote "
+ " SvID ConnID Calls End Use State Key "
+ " Serial ISerial\n"
+ );
+ return 0;
+ }
+
+ conn = list_entry(v, struct rxrpc_connection, link);
+ trans = conn->trans;
+
+ sprintf(lbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+ ntohs(trans->local->srx.transport.sin.sin_port));
+
+ sprintf(rbuff, NIPQUAD_FMT":%u",
+ NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+ ntohs(trans->peer->srx.transport.sin.sin_port));
+
+ seq_printf(seq,
+ "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ " %s %08x %08x %08x\n",
+ lbuff,
+ rbuff,
+ ntohs(conn->service_id),
+ ntohl(conn->cid),
+ conn->call_counter,
+ conn->in_clientflag ? "Svc" : "Clt",
+ atomic_read(&conn->usage),
+ rxrpc_conn_states[conn->state],
+ key_serial(conn->key),
+ atomic_read(&conn->serial),
+ atomic_read(&conn->hi_serial));
+
+ return 0;
+}
+
+static struct seq_operations rxrpc_connection_seq_ops = {
+ .start = rxrpc_connection_seq_start,
+ .next = rxrpc_connection_seq_next,
+ .stop = rxrpc_connection_seq_stop,
+ .show = rxrpc_connection_seq_show,
+};
+
+
+static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &rxrpc_connection_seq_ops);
+}
+
+struct file_operations rxrpc_connection_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = rxrpc_connection_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
new file mode 100644
index 00000000000..f19121d4795
--- /dev/null
+++ b/net/rxrpc/ar-recvmsg.c
@@ -0,0 +1,437 @@
+/* RxRPC recvmsg() implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * removal a call's user ID from the socket tree to make the user ID available
+ * again and so that it won't be seen again in association with that call
+ */
+void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+ _debug("RELEASE CALL %d", call->debug_id);
+
+ if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ write_lock_bh(&rx->call_lock);
+ rb_erase(&call->sock_node, &call->socket->calls);
+ clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ write_unlock_bh(&rx->call_lock);
+ }
+
+ read_lock_bh(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+ !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+ rxrpc_queue_call(call);
+ read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ * simultaneously
+ */
+int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t len, int flags)
+{
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_call *call = NULL, *continue_call = NULL;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct sk_buff *skb;
+ long timeo;
+ int copy, ret, ullen, offset, copied = 0;
+ u32 abort_code;
+
+ DEFINE_WAIT(wait);
+
+ _enter(",,,%zu,%d", len, flags);
+
+ if (flags & (MSG_OOB | MSG_TRUNC))
+ return -EOPNOTSUPP;
+
+ ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
+
+ timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+ msg->msg_flags |= MSG_MORE;
+
+ lock_sock(&rx->sk);
+
+ for (;;) {
+ /* return immediately if a client socket has no outstanding
+ * calls */
+ if (RB_EMPTY_ROOT(&rx->calls)) {
+ if (copied)
+ goto out;
+ if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+ release_sock(&rx->sk);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ return -ENODATA;
+ }
+ }
+
+ /* get the next message on the Rx queue */
+ skb = skb_peek(&rx->sk.sk_receive_queue);
+ if (!skb) {
+ /* nothing remains on the queue */
+ if (copied &&
+ (msg->msg_flags & MSG_PEEK || timeo == 0))
+ goto out;
+
+ /* wait for a message to turn up */
+ release_sock(&rx->sk);
+ prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
+ TASK_INTERRUPTIBLE);
+ ret = sock_error(&rx->sk);
+ if (ret)
+ goto wait_error;
+
+ if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
+ if (signal_pending(current))
+ goto wait_interrupted;
+ timeo = schedule_timeout(timeo);
+ }
+ finish_wait(rx->sk.sk_sleep, &wait);
+ lock_sock(&rx->sk);
+ continue;
+ }
+
+ peek_next_packet:
+ sp = rxrpc_skb(skb);
+ call = sp->call;
+ ASSERT(call != NULL);
+
+ _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
+
+ /* make sure we wait for the state to be updated in this call */
+ spin_lock_bh(&call->lock);
+ spin_unlock_bh(&call->lock);
+
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ _debug("packet from released call");
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ continue;
+ }
+
+ /* determine whether to continue last data receive */
+ if (continue_call) {
+ _debug("maybe cont");
+ if (call != continue_call ||
+ skb->mark != RXRPC_SKB_MARK_DATA) {
+ release_sock(&rx->sk);
+ rxrpc_put_call(continue_call);
+ _leave(" = %d [noncont]", copied);
+ return copied;
+ }
+ }
+
+ rxrpc_get_call(call);
+
+ /* copy the peer address and timestamp */
+ if (!continue_call) {
+ if (msg->msg_name && msg->msg_namelen > 0)
+ memcpy(&msg->msg_name, &call->conn->trans->peer->srx,
+ sizeof(call->conn->trans->peer->srx));
+ sock_recv_timestamp(msg, &rx->sk, skb);
+ }
+
+ /* receive the message */
+ if (skb->mark != RXRPC_SKB_MARK_DATA)
+ goto receive_non_data_message;
+
+ _debug("recvmsg DATA #%u { %d, %d }",
+ ntohl(sp->hdr.seq), skb->len, sp->offset);
+
+ if (!continue_call) {
+ /* only set the control data once per recvmsg() */
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ ullen, &call->user_call_ID);
+ if (ret < 0)
+ goto copy_error;
+ ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+ }
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+ ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+ call->rx_data_recv = ntohl(sp->hdr.seq);
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+
+ offset = sp->offset;
+ copy = skb->len - offset;
+ if (copy > len - copied)
+ copy = len - copied;
+
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ ret = skb_copy_datagram_iovec(skb, offset,
+ msg->msg_iov, copy);
+ } else {
+ ret = skb_copy_and_csum_datagram_iovec(skb, offset,
+ msg->msg_iov);
+ if (ret == -EINVAL)
+ goto csum_copy_error;
+ }
+
+ if (ret < 0)
+ goto copy_error;
+
+ /* handle piecemeal consumption of data packets */
+ _debug("copied %d+%d", copy, copied);
+
+ offset += copy;
+ copied += copy;
+
+ if (!(flags & MSG_PEEK))
+ sp->offset = offset;
+
+ if (sp->offset < skb->len) {
+ _debug("buffer full");
+ ASSERTCMP(copied, ==, len);
+ break;
+ }
+
+ /* we transferred the whole data packet */
+ if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+ _debug("last");
+ if (call->conn->out_clientflag) {
+ /* last byte of reply received */
+ ret = copied;
+ goto terminal_message;
+ }
+
+ /* last bit of request received */
+ if (!(flags & MSG_PEEK)) {
+ _debug("eat packet");
+ if (skb_dequeue(&rx->sk.sk_receive_queue) !=
+ skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ }
+ msg->msg_flags &= ~MSG_MORE;
+ break;
+ }
+
+ /* move on to the next data message */
+ _debug("next");
+ if (!continue_call)
+ continue_call = sp->call;
+ else
+ rxrpc_put_call(call);
+ call = NULL;
+
+ if (flags & MSG_PEEK) {
+ _debug("peek next");
+ skb = skb->next;
+ if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
+ break;
+ goto peek_next_packet;
+ }
+
+ _debug("eat packet");
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ }
+
+ /* end of non-terminal data packet reception for the moment */
+ _debug("end rcv data");
+out:
+ release_sock(&rx->sk);
+ if (call)
+ rxrpc_put_call(call);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ _leave(" = %d [data]", copied);
+ return copied;
+
+ /* handle non-DATA messages such as aborts, incoming connections and
+ * final ACKs */
+receive_non_data_message:
+ _debug("non-data");
+
+ if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
+ _debug("RECV NEW CALL");
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
+ if (ret < 0)
+ goto copy_error;
+ if (!(flags & MSG_PEEK)) {
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ }
+ goto out;
+ }
+
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ ullen, &call->user_call_ID);
+ if (ret < 0)
+ goto copy_error;
+ ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_DATA:
+ BUG();
+ case RXRPC_SKB_MARK_FINAL_ACK:
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_BUSY:
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_REMOTE_ABORT:
+ abort_code = call->abort_code;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_NET_ERROR:
+ _debug("RECV NET ERROR %d", sp->error);
+ abort_code = sp->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
+ break;
+ case RXRPC_SKB_MARK_LOCAL_ERROR:
+ _debug("RECV LOCAL ERROR %d", sp->error);
+ abort_code = sp->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
+ &abort_code);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (ret < 0)
+ goto copy_error;
+
+terminal_message:
+ _debug("terminal");
+ msg->msg_flags &= ~MSG_MORE;
+ msg->msg_flags |= MSG_EOR;
+
+ if (!(flags & MSG_PEEK)) {
+ _net("free terminal skb %p", skb);
+ if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+ BUG();
+ rxrpc_free_skb(skb);
+ rxrpc_remove_user_ID(rx, call);
+ }
+
+ release_sock(&rx->sk);
+ rxrpc_put_call(call);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ _leave(" = %d", ret);
+ return ret;
+
+copy_error:
+ _debug("copy error");
+ release_sock(&rx->sk);
+ rxrpc_put_call(call);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ _leave(" = %d", ret);
+ return ret;
+
+csum_copy_error:
+ _debug("csum error");
+ release_sock(&rx->sk);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ rxrpc_kill_skb(skb);
+ skb_kill_datagram(&rx->sk, skb, flags);
+ rxrpc_put_call(call);
+ return -EAGAIN;
+
+wait_interrupted:
+ ret = sock_intr_errno(timeo);
+wait_error:
+ finish_wait(rx->sk.sk_sleep, &wait);
+ if (continue_call)
+ rxrpc_put_call(continue_call);
+ if (copied)
+ copied = ret;
+ _leave(" = %d [waitfail %d]", copied, ret);
+ return copied;
+
+}
+
+/**
+ * rxrpc_kernel_data_delivered - Record delivery of data message
+ * @skb: Message holding data
+ *
+ * Record the delivery of a data message. This permits RxRPC to keep its
+ * tracking correct. The socket buffer will be deleted.
+ */
+void rxrpc_kernel_data_delivered(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_call *call = sp->call;
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+ ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+ call->rx_data_recv = ntohl(sp->hdr.seq);
+
+ ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+ rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
+
+/**
+ * rxrpc_kernel_is_data_last - Determine if data message is last one
+ * @skb: Message holding data
+ *
+ * Determine if data message is last one for the parent call.
+ */
+bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
+
+ return sp->hdr.flags & RXRPC_LAST_PACKET;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
+
+/**
+ * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
+ * @skb: Message indicating an abort
+ *
+ * Get the abort code from an RxRPC abort message.
+ */
+u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT);
+
+ return sp->call->abort_code;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
+
+/**
+ * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
+ * @skb: Message indicating an error
+ *
+ * Get the error number from an RxRPC error message.
+ */
+int rxrpc_kernel_get_error_number(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ return sp->error;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
new file mode 100644
index 00000000000..60d1d364430
--- /dev/null
+++ b/net/rxrpc/ar-security.c
@@ -0,0 +1,258 @@
+/* RxRPC security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_security_methods);
+static DECLARE_RWSEM(rxrpc_security_sem);
+
+/*
+ * get an RxRPC security module
+ */
+static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec)
+{
+ return try_module_get(sec->owner) ? sec : NULL;
+}
+
+/*
+ * release an RxRPC security module
+ */
+static void rxrpc_security_put(struct rxrpc_security *sec)
+{
+ module_put(sec->owner);
+}
+
+/*
+ * look up an rxrpc security module
+ */
+struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
+{
+ struct rxrpc_security *sec = NULL;
+
+ _enter("");
+
+ down_read(&rxrpc_security_sem);
+
+ list_for_each_entry(sec, &rxrpc_security_methods, link) {
+ if (sec->security_index == security_index) {
+ if (unlikely(!rxrpc_security_get(sec)))
+ break;
+ goto out;
+ }
+ }
+
+ sec = NULL;
+out:
+ up_read(&rxrpc_security_sem);
+ _leave(" = %p [%s]", sec, sec ? sec->name : "");
+ return sec;
+}
+
+/**
+ * rxrpc_register_security - register an RxRPC security handler
+ * @sec: security module
+ *
+ * register an RxRPC security handler for use by RxRPC
+ */
+int rxrpc_register_security(struct rxrpc_security *sec)
+{
+ struct rxrpc_security *psec;
+ int ret;
+
+ _enter("");
+ down_write(&rxrpc_security_sem);
+
+ ret = -EEXIST;
+ list_for_each_entry(psec, &rxrpc_security_methods, link) {
+ if (psec->security_index == sec->security_index)
+ goto out;
+ }
+
+ list_add(&sec->link, &rxrpc_security_methods);
+
+ printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n",
+ sec->security_index, sec->name);
+ ret = 0;
+
+out:
+ up_write(&rxrpc_security_sem);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_register_security);
+
+/**
+ * rxrpc_unregister_security - unregister an RxRPC security handler
+ * @sec: security module
+ *
+ * unregister an RxRPC security handler
+ */
+void rxrpc_unregister_security(struct rxrpc_security *sec)
+{
+
+ _enter("");
+ down_write(&rxrpc_security_sem);
+ list_del_init(&sec->link);
+ up_write(&rxrpc_security_sem);
+
+ printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n",
+ sec->security_index, sec->name);
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
+
+/*
+ * initialise the security on a client connection
+ */
+int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_security *sec;
+ struct key *key = conn->key;
+ int ret;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(key));
+
+ if (!key)
+ return 0;
+
+ ret = key_validate(key);
+ if (ret < 0)
+ return ret;
+
+ sec = rxrpc_security_lookup(key->type_data.x[0]);
+ if (!sec)
+ return -EKEYREJECTED;
+ conn->security = sec;
+
+ ret = conn->security->init_connection_security(conn);
+ if (ret < 0) {
+ rxrpc_security_put(conn->security);
+ conn->security = NULL;
+ return ret;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * initialise the security on a server connection
+ */
+int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_security *sec;
+ struct rxrpc_local *local = conn->trans->local;
+ struct rxrpc_sock *rx;
+ struct key *key;
+ key_ref_t kref;
+ char kdesc[5+1+3+1];
+
+ _enter("");
+
+ sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix);
+
+ sec = rxrpc_security_lookup(conn->security_ix);
+ if (!sec) {
+ _leave(" = -ENOKEY [lookup]");
+ return -ENOKEY;
+ }
+
+ /* find the service */
+ read_lock_bh(&local->services_lock);
+ list_for_each_entry(rx, &local->services, listen_link) {
+ if (rx->service_id == conn->service_id)
+ goto found_service;
+ }
+
+ /* the service appears to have died */
+ read_unlock_bh(&local->services_lock);
+ rxrpc_security_put(sec);
+ _leave(" = -ENOENT");
+ return -ENOENT;
+
+found_service:
+ if (!rx->securities) {
+ read_unlock_bh(&local->services_lock);
+ rxrpc_security_put(sec);
+ _leave(" = -ENOKEY");
+ return -ENOKEY;
+ }
+
+ /* look through the service's keyring */
+ kref = keyring_search(make_key_ref(rx->securities, 1UL),
+ &key_type_rxrpc_s, kdesc);
+ if (IS_ERR(kref)) {
+ read_unlock_bh(&local->services_lock);
+ rxrpc_security_put(sec);
+ _leave(" = %ld [search]", PTR_ERR(kref));
+ return PTR_ERR(kref);
+ }
+
+ key = key_ref_to_ptr(kref);
+ read_unlock_bh(&local->services_lock);
+
+ conn->server_key = key;
+ conn->security = sec;
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_secure_packet(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ size_t data_size,
+ void *sechdr)
+{
+ if (call->conn->security)
+ return call->conn->security->secure_packet(
+ call, skb, data_size, sechdr);
+ return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ if (call->conn->security)
+ return call->conn->security->verify_packet(
+ call, skb, _abort_code);
+ return 0;
+}
+
+/*
+ * clear connection security
+ */
+void rxrpc_clear_conn_security(struct rxrpc_connection *conn)
+{
+ _enter("{%d}", conn->debug_id);
+
+ if (conn->security) {
+ conn->security->clear(conn);
+ rxrpc_security_put(conn->security);
+ conn->security = NULL;
+ }
+
+ key_put(conn->key);
+ key_put(conn->server_key);
+}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
new file mode 100644
index 00000000000..de755e04d29
--- /dev/null
+++ b/net/rxrpc/ar-skbuff.c
@@ -0,0 +1,132 @@
+/* ar-skbuff.c: socket buffer destruction handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * set up for the ACK at the end of the receive phase when we discard the final
+ * receive phase data packet
+ * - called with softirqs disabled
+ */
+static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+{
+ /* the call may be aborted before we have a chance to ACK it */
+ write_lock(&call->state_lock);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
+ _debug("request final ACK");
+
+ /* get an extra ref on the call for the final-ACK generator to
+ * release */
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ if (try_to_del_timer_sync(&call->ack_timer) >= 0)
+ rxrpc_queue_call(call);
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+ default:
+ break;
+ }
+
+ write_unlock(&call->state_lock);
+}
+
+/*
+ * drop the bottom ACK off of the call ACK window and advance the window
+ */
+static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
+ struct rxrpc_skb_priv *sp)
+{
+ int loop;
+ u32 seq;
+
+ spin_lock_bh(&call->lock);
+
+ _debug("hard ACK #%u", ntohl(sp->hdr.seq));
+
+ for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+ call->ackr_window[loop] >>= 1;
+ call->ackr_window[loop] |=
+ call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
+ }
+
+ seq = ntohl(sp->hdr.seq);
+ ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
+ call->rx_data_eaten = seq;
+
+ if (call->ackr_win_top < UINT_MAX)
+ call->ackr_win_top++;
+
+ ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+ call->rx_data_post, >=, call->rx_data_recv);
+ ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+ call->rx_data_recv, >=, call->rx_data_eaten);
+
+ if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+ rxrpc_request_final_ACK(call);
+ } else if (atomic_dec_and_test(&call->ackr_not_idle) &&
+ test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+ _debug("send Rx idle ACK");
+ __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
+ true);
+ }
+
+ spin_unlock_bh(&call->lock);
+}
+
+/*
+ * destroy a packet that has an RxRPC control buffer
+ * - advance the hard-ACK state of the parent call (done here in case something
+ * in the kernel bypasses recvmsg() and steals the packet directly off of the
+ * socket receive queue)
+ */
+void rxrpc_packet_destructor(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_call *call = sp->call;
+
+ _enter("%p{%p}", skb, call);
+
+ if (call) {
+ /* send the final ACK on a client call */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+ rxrpc_hard_ACK_data(call, sp);
+ rxrpc_put_call(call);
+ sp->call = NULL;
+ }
+
+ if (skb->sk)
+ sock_rfree(skb);
+ _leave("");
+}
+
+/**
+ * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
+ * @skb: The socket buffer to be freed
+ *
+ * Let RxRPC free its own socket buffer, permitting it to maintain debug
+ * accounting.
+ */
+void rxrpc_kernel_free_skb(struct sk_buff *skb)
+{
+ rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
new file mode 100644
index 00000000000..d43d78f1930
--- /dev/null
+++ b/net/rxrpc/ar-transport.c
@@ -0,0 +1,276 @@
+/* RxRPC point-to-point transport session management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_transport_reaper(struct work_struct *work);
+
+static LIST_HEAD(rxrpc_transports);
+static DEFINE_RWLOCK(rxrpc_transport_lock);
+static unsigned long rxrpc_transport_timeout = 3600 * 24;
+static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
+
+/*
+ * allocate a new transport session manager
+ */
+static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ gfp_t gfp)
+{
+ struct rxrpc_transport *trans;
+
+ _enter("");
+
+ trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
+ if (trans) {
+ trans->local = local;
+ trans->peer = peer;
+ INIT_LIST_HEAD(&trans->link);
+ trans->bundles = RB_ROOT;
+ trans->client_conns = RB_ROOT;
+ trans->server_conns = RB_ROOT;
+ skb_queue_head_init(&trans->error_queue);
+ spin_lock_init(&trans->client_lock);
+ rwlock_init(&trans->conn_lock);
+ atomic_set(&trans->usage, 1);
+ trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
+
+ if (peer->srx.transport.family == AF_INET) {
+ switch (peer->srx.transport_type) {
+ case SOCK_DGRAM:
+ INIT_WORK(&trans->error_handler,
+ rxrpc_UDP_error_handler);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ } else {
+ BUG();
+ }
+ }
+
+ _leave(" = %p", trans);
+ return trans;
+}
+
+/*
+ * obtain a transport session for the nominated endpoints
+ */
+struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ gfp_t gfp)
+{
+ struct rxrpc_transport *trans, *candidate;
+ const char *new = "old";
+ int usage;
+
+ _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port),
+ NIPQUAD(peer->srx.transport.sin.sin_addr),
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ /* search the transport list first */
+ read_lock_bh(&rxrpc_transport_lock);
+ list_for_each_entry(trans, &rxrpc_transports, link) {
+ if (trans->local == local && trans->peer == peer)
+ goto found_extant_transport;
+ }
+ read_unlock_bh(&rxrpc_transport_lock);
+
+ /* not yet present - create a candidate for a new record and then
+ * redo the search */
+ candidate = rxrpc_alloc_transport(local, peer, gfp);
+ if (!candidate) {
+ _leave(" = -ENOMEM");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ write_lock_bh(&rxrpc_transport_lock);
+
+ list_for_each_entry(trans, &rxrpc_transports, link) {
+ if (trans->local == local && trans->peer == peer)
+ goto found_extant_second;
+ }
+
+ /* we can now add the new candidate to the list */
+ trans = candidate;
+ candidate = NULL;
+
+ rxrpc_get_local(trans->local);
+ atomic_inc(&trans->peer->usage);
+ list_add_tail(&trans->link, &rxrpc_transports);
+ write_unlock_bh(&rxrpc_transport_lock);
+ new = "new";
+
+success:
+ _net("TRANSPORT %s %d local %d -> peer %d",
+ new,
+ trans->debug_id,
+ trans->local->debug_id,
+ trans->peer->debug_id);
+
+ _leave(" = %p {u=%d}", trans, atomic_read(&trans->usage));
+ return trans;
+
+ /* we found the transport in the list immediately */
+found_extant_transport:
+ usage = atomic_inc_return(&trans->usage);
+ read_unlock_bh(&rxrpc_transport_lock);
+ goto success;
+
+ /* we found the transport on the second time through the list */
+found_extant_second:
+ usage = atomic_inc_return(&trans->usage);
+ write_unlock_bh(&rxrpc_transport_lock);
+ kfree(candidate);
+ goto success;
+}
+
+/*
+ * find the transport connecting two endpoints
+ */
+struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
+ struct rxrpc_peer *peer)
+{
+ struct rxrpc_transport *trans;
+
+ _enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+ NIPQUAD(local->srx.transport.sin.sin_addr),
+ ntohs(local->srx.transport.sin.sin_port),
+ NIPQUAD(peer->srx.transport.sin.sin_addr),
+ ntohs(peer->srx.transport.sin.sin_port));
+
+ /* search the transport list */
+ read_lock_bh(&rxrpc_transport_lock);
+
+ list_for_each_entry(trans, &rxrpc_transports, link) {
+ if (trans->local == local && trans->peer == peer)
+ goto found_extant_transport;
+ }
+
+ read_unlock_bh(&rxrpc_transport_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found_extant_transport:
+ atomic_inc(&trans->usage);
+ read_unlock_bh(&rxrpc_transport_lock);
+ _leave(" = %p", trans);
+ return trans;
+}
+
+/*
+ * release a transport session
+ */
+void rxrpc_put_transport(struct rxrpc_transport *trans)
+{
+ _enter("%p{u=%d}", trans, atomic_read(&trans->usage));
+
+ ASSERTCMP(atomic_read(&trans->usage), >, 0);
+
+ trans->put_time = xtime.tv_sec;
+ if (unlikely(atomic_dec_and_test(&trans->usage)))
+ _debug("zombie");
+ /* let the reaper determine the timeout to avoid a race with
+ * overextending the timeout if the reaper is running at the
+ * same time */
+ rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
+ _leave("");
+}
+
+/*
+ * clean up a transport session
+ */
+static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
+{
+ _net("DESTROY TRANS %d", trans->debug_id);
+
+ rxrpc_purge_queue(&trans->error_queue);
+
+ rxrpc_put_local(trans->local);
+ rxrpc_put_peer(trans->peer);
+ kfree(trans);
+}
+
+/*
+ * reap dead transports that have passed their expiry date
+ */
+static void rxrpc_transport_reaper(struct work_struct *work)
+{
+ struct rxrpc_transport *trans, *_p;
+ unsigned long now, earliest, reap_time;
+
+ LIST_HEAD(graveyard);
+
+ _enter("");
+
+ now = xtime.tv_sec;
+ earliest = ULONG_MAX;
+
+ /* extract all the transports that have been dead too long */
+ write_lock_bh(&rxrpc_transport_lock);
+ list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
+ _debug("reap TRANS %d { u=%d t=%ld }",
+ trans->debug_id, atomic_read(&trans->usage),
+ (long) now - (long) trans->put_time);
+
+ if (likely(atomic_read(&trans->usage) > 0))
+ continue;
+
+ reap_time = trans->put_time + rxrpc_transport_timeout;
+ if (reap_time <= now)
+ list_move_tail(&trans->link, &graveyard);
+ else if (reap_time < earliest)
+ earliest = reap_time;
+ }
+ write_unlock_bh(&rxrpc_transport_lock);
+
+ if (earliest != ULONG_MAX) {
+ _debug("reschedule reaper %ld", (long) earliest - now);
+ ASSERTCMP(earliest, >, now);
+ rxrpc_queue_delayed_work(&rxrpc_transport_reap,
+ (earliest - now) * HZ);
+ }
+
+ /* then destroy all those pulled out */
+ while (!list_empty(&graveyard)) {
+ trans = list_entry(graveyard.next, struct rxrpc_transport,
+ link);
+ list_del_init(&trans->link);
+
+ ASSERTCMP(atomic_read(&trans->usage), ==, 0);
+ rxrpc_cleanup_transport(trans);
+ }
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the transport session records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_transports(void)
+{
+ _enter("");
+
+ rxrpc_transport_timeout = 0;
+ cancel_delayed_work(&rxrpc_transport_reap);
+ rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
+
+ _leave("");
+}
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
deleted file mode 100644
index d07122b57e0..00000000000
--- a/net/rxrpc/call.c
+++ /dev/null
@@ -1,2277 +0,0 @@
-/* call.c: Rx call routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_call_count);
-__RXACCT_DECL(atomic_t rxrpc_message_count);
-
-LIST_HEAD(rxrpc_calls);
-DECLARE_RWSEM(rxrpc_calls_sem);
-
-unsigned rxrpc_call_rcv_timeout = HZ/3;
-static unsigned rxrpc_call_acks_timeout = HZ/3;
-static unsigned rxrpc_call_dfr_ack_timeout = HZ/20;
-static unsigned short rxrpc_call_max_resend = HZ/10;
-
-const char *rxrpc_call_states[] = {
- "COMPLETE",
- "ERROR",
- "SRVR_RCV_OPID",
- "SRVR_RCV_ARGS",
- "SRVR_GOT_ARGS",
- "SRVR_SND_REPLY",
- "SRVR_RCV_FINAL_ACK",
- "CLNT_SND_ARGS",
- "CLNT_RCV_REPLY",
- "CLNT_GOT_REPLY"
-};
-
-const char *rxrpc_call_error_states[] = {
- "NO_ERROR",
- "LOCAL_ABORT",
- "PEER_ABORT",
- "LOCAL_ERROR",
- "REMOTE_ERROR"
-};
-
-const char *rxrpc_pkts[] = {
- "?00",
- "data", "ack", "busy", "abort", "ackall", "chall", "resp", "debug",
- "?09", "?10", "?11", "?12", "?13", "?14", "?15"
-};
-
-static const char *rxrpc_acks[] = {
- "---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
- "-?-"
-};
-
-static const char _acktype[] = "NA-";
-
-static void rxrpc_call_receive_packet(struct rxrpc_call *call);
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg);
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg);
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
- rxrpc_seq_t higest);
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest);
-static int __rxrpc_call_read_data(struct rxrpc_call *call);
-
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
- struct rxrpc_message *msg,
- rxrpc_seq_t seq,
- size_t count);
-
-static int rxrpc_call_flush(struct rxrpc_call *call);
-
-#define _state(call) \
- _debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]);
-
-static void rxrpc_call_default_attn_func(struct rxrpc_call *call)
-{
- wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_error_func(struct rxrpc_call *call)
-{
- wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_aemap_func(struct rxrpc_call *call)
-{
- switch (call->app_err_state) {
- case RXRPC_ESTATE_LOCAL_ABORT:
- call->app_abort_code = -call->app_errno;
- case RXRPC_ESTATE_PEER_ABORT:
- call->app_errno = -ECONNABORTED;
- default:
- break;
- }
-}
-
-static void __rxrpc_call_acks_timeout(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _debug("ACKS TIMEOUT %05lu", jiffies - call->cjif);
-
- call->flags |= RXRPC_CALL_ACKS_TIMO;
- rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_rcv_timeout(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _debug("RCV TIMEOUT %05lu", jiffies - call->cjif);
-
- call->flags |= RXRPC_CALL_RCV_TIMO;
- rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_ackr_timeout(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _debug("ACKR TIMEOUT %05lu",jiffies - call->cjif);
-
- call->flags |= RXRPC_CALL_ACKR_TIMO;
- rxrpc_krxiod_queue_call(call);
-}
-
-/*****************************************************************************/
-/*
- * calculate a timeout based on an RTT value
- */
-static inline unsigned long __rxrpc_rtt_based_timeout(struct rxrpc_call *call,
- unsigned long val)
-{
- unsigned long expiry = call->conn->peer->rtt / (1000000 / HZ);
-
- expiry += 10;
- if (expiry < HZ / 25)
- expiry = HZ / 25;
- if (expiry > HZ)
- expiry = HZ;
-
- _leave(" = %lu jiffies", expiry);
- return jiffies + expiry;
-} /* end __rxrpc_rtt_based_timeout() */
-
-/*****************************************************************************/
-/*
- * create a new call record
- */
-static inline int __rxrpc_create_call(struct rxrpc_connection *conn,
- struct rxrpc_call **_call)
-{
- struct rxrpc_call *call;
-
- _enter("%p", conn);
-
- /* allocate and initialise a call record */
- call = (struct rxrpc_call *) get_zeroed_page(GFP_KERNEL);
- if (!call) {
- _leave(" ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&call->usage, 1);
-
- init_waitqueue_head(&call->waitq);
- spin_lock_init(&call->lock);
- INIT_LIST_HEAD(&call->link);
- INIT_LIST_HEAD(&call->acks_pendq);
- INIT_LIST_HEAD(&call->rcv_receiveq);
- INIT_LIST_HEAD(&call->rcv_krxiodq_lk);
- INIT_LIST_HEAD(&call->app_readyq);
- INIT_LIST_HEAD(&call->app_unreadyq);
- INIT_LIST_HEAD(&call->app_link);
- INIT_LIST_HEAD(&call->app_attn_link);
-
- init_timer(&call->acks_timeout);
- call->acks_timeout.data = (unsigned long) call;
- call->acks_timeout.function = __rxrpc_call_acks_timeout;
-
- init_timer(&call->rcv_timeout);
- call->rcv_timeout.data = (unsigned long) call;
- call->rcv_timeout.function = __rxrpc_call_rcv_timeout;
-
- init_timer(&call->ackr_dfr_timo);
- call->ackr_dfr_timo.data = (unsigned long) call;
- call->ackr_dfr_timo.function = __rxrpc_call_ackr_timeout;
-
- call->conn = conn;
- call->ackr_win_bot = 1;
- call->ackr_win_top = call->ackr_win_bot + RXRPC_CALL_ACK_WINDOW_SIZE - 1;
- call->ackr_prev_seq = 0;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_attn_func = rxrpc_call_default_attn_func;
- call->app_error_func = rxrpc_call_default_error_func;
- call->app_aemap_func = rxrpc_call_default_aemap_func;
- call->app_scr_alloc = call->app_scratch;
-
- call->cjif = jiffies;
-
- _leave(" = 0 (%p)", call);
-
- *_call = call;
-
- return 0;
-} /* end __rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for outgoing calls
- */
-int rxrpc_create_call(struct rxrpc_connection *conn,
- rxrpc_call_attn_func_t attn,
- rxrpc_call_error_func_t error,
- rxrpc_call_aemap_func_t aemap,
- struct rxrpc_call **_call)
-{
- DECLARE_WAITQUEUE(myself, current);
-
- struct rxrpc_call *call;
- int ret, cix, loop;
-
- _enter("%p", conn);
-
- /* allocate and initialise a call record */
- ret = __rxrpc_create_call(conn, &call);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- call->app_call_state = RXRPC_CSTATE_CLNT_SND_ARGS;
- if (attn)
- call->app_attn_func = attn;
- if (error)
- call->app_error_func = error;
- if (aemap)
- call->app_aemap_func = aemap;
-
- _state(call);
-
- spin_lock(&conn->lock);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&conn->chanwait, &myself);
-
- try_again:
- /* try to find an unused channel */
- for (cix = 0; cix < 4; cix++)
- if (!conn->channels[cix])
- goto obtained_chan;
-
- /* no free channels - wait for one to become available */
- ret = -EINTR;
- if (signal_pending(current))
- goto error_unwait;
-
- spin_unlock(&conn->lock);
-
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
-
- spin_lock(&conn->lock);
- goto try_again;
-
- /* got a channel - now attach to the connection */
- obtained_chan:
- remove_wait_queue(&conn->chanwait, &myself);
- set_current_state(TASK_RUNNING);
-
- /* concoct a unique call number */
- next_callid:
- call->call_id = htonl(++conn->call_counter);
- for (loop = 0; loop < 4; loop++)
- if (conn->channels[loop] &&
- conn->channels[loop]->call_id == call->call_id)
- goto next_callid;
-
- rxrpc_get_connection(conn);
- conn->channels[cix] = call; /* assign _after_ done callid check loop */
- do_gettimeofday(&conn->atime);
- call->chan_ix = htonl(cix);
-
- spin_unlock(&conn->lock);
-
- down_write(&rxrpc_calls_sem);
- list_add_tail(&call->call_link, &rxrpc_calls);
- up_write(&rxrpc_calls_sem);
-
- __RXACCT(atomic_inc(&rxrpc_call_count));
- *_call = call;
-
- _leave(" = 0 (call=%p cix=%u)", call, cix);
- return 0;
-
- error_unwait:
- remove_wait_queue(&conn->chanwait, &myself);
- set_current_state(TASK_RUNNING);
- spin_unlock(&conn->lock);
-
- free_page((unsigned long) call);
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for incoming calls
- */
-int rxrpc_incoming_call(struct rxrpc_connection *conn,
- struct rxrpc_message *msg,
- struct rxrpc_call **_call)
-{
- struct rxrpc_call *call;
- unsigned cix;
- int ret;
-
- cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
- _enter("%p,%u,%u", conn, ntohl(msg->hdr.callNumber), cix);
-
- /* allocate and initialise a call record */
- ret = __rxrpc_create_call(conn, &call);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- call->pkt_rcv_count = 1;
- call->app_call_state = RXRPC_CSTATE_SRVR_RCV_OPID;
- call->app_mark = sizeof(uint32_t);
-
- _state(call);
-
- /* attach to the connection */
- ret = -EBUSY;
- call->chan_ix = htonl(cix);
- call->call_id = msg->hdr.callNumber;
-
- spin_lock(&conn->lock);
-
- if (!conn->channels[cix] ||
- conn->channels[cix]->app_call_state == RXRPC_CSTATE_COMPLETE ||
- conn->channels[cix]->app_call_state == RXRPC_CSTATE_ERROR
- ) {
- conn->channels[cix] = call;
- rxrpc_get_connection(conn);
- ret = 0;
- }
-
- spin_unlock(&conn->lock);
-
- if (ret < 0) {
- free_page((unsigned long) call);
- call = NULL;
- }
-
- if (ret == 0) {
- down_write(&rxrpc_calls_sem);
- list_add_tail(&call->call_link, &rxrpc_calls);
- up_write(&rxrpc_calls_sem);
- __RXACCT(atomic_inc(&rxrpc_call_count));
- *_call = call;
- }
-
- _leave(" = %d [%p]", ret, call);
- return ret;
-} /* end rxrpc_incoming_call() */
-
-/*****************************************************************************/
-/*
- * free a call record
- */
-void rxrpc_put_call(struct rxrpc_call *call)
-{
- struct rxrpc_connection *conn = call->conn;
- struct rxrpc_message *msg;
-
- _enter("%p{u=%d}",call,atomic_read(&call->usage));
-
- /* sanity check */
- if (atomic_read(&call->usage) <= 0)
- BUG();
-
- /* to prevent a race, the decrement and the de-list must be effectively
- * atomic */
- spin_lock(&conn->lock);
- if (likely(!atomic_dec_and_test(&call->usage))) {
- spin_unlock(&conn->lock);
- _leave("");
- return;
- }
-
- if (conn->channels[ntohl(call->chan_ix)] == call)
- conn->channels[ntohl(call->chan_ix)] = NULL;
-
- spin_unlock(&conn->lock);
-
- wake_up(&conn->chanwait);
-
- rxrpc_put_connection(conn);
-
- /* clear the timers and dequeue from krxiod */
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
-
- rxrpc_krxiod_dequeue_call(call);
-
- /* clean up the contents of the struct */
- if (call->snd_nextmsg)
- rxrpc_put_message(call->snd_nextmsg);
-
- if (call->snd_ping)
- rxrpc_put_message(call->snd_ping);
-
- while (!list_empty(&call->acks_pendq)) {
- msg = list_entry(call->acks_pendq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- while (!list_empty(&call->rcv_receiveq)) {
- msg = list_entry(call->rcv_receiveq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- while (!list_empty(&call->app_readyq)) {
- msg = list_entry(call->app_readyq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- while (!list_empty(&call->app_unreadyq)) {
- msg = list_entry(call->app_unreadyq.next,
- struct rxrpc_message, link);
- list_del(&msg->link);
- rxrpc_put_message(msg);
- }
-
- module_put(call->owner);
-
- down_write(&rxrpc_calls_sem);
- list_del(&call->call_link);
- up_write(&rxrpc_calls_sem);
-
- __RXACCT(atomic_dec(&rxrpc_call_count));
- free_page((unsigned long) call);
-
- _leave(" [destroyed]");
-} /* end rxrpc_put_call() */
-
-/*****************************************************************************/
-/*
- * actually generate a normal ACK
- */
-static inline int __rxrpc_call_gen_normal_ACK(struct rxrpc_call *call,
- rxrpc_seq_t seq)
-{
- struct rxrpc_message *msg;
- struct kvec diov[3];
- __be32 aux[4];
- int delta, ret;
-
- /* ACKs default to DELAY */
- if (!call->ackr.reason)
- call->ackr.reason = RXRPC_ACK_DELAY;
-
- _proto("Rx %05lu Sending ACK { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
- jiffies - call->cjif,
- ntohs(call->ackr.maxSkew),
- ntohl(call->ackr.firstPacket),
- ntohl(call->ackr.previousPacket),
- ntohl(call->ackr.serial),
- rxrpc_acks[call->ackr.reason],
- call->ackr.nAcks);
-
- aux[0] = htonl(call->conn->peer->if_mtu); /* interface MTU */
- aux[1] = htonl(1444); /* max MTU */
- aux[2] = htonl(16); /* rwind */
- aux[3] = htonl(4); /* max packets */
-
- diov[0].iov_len = sizeof(struct rxrpc_ackpacket);
- diov[0].iov_base = &call->ackr;
- diov[1].iov_len = call->ackr_pend_cnt + 3;
- diov[1].iov_base = call->ackr_array;
- diov[2].iov_len = sizeof(aux);
- diov[2].iov_base = &aux;
-
- /* build and send the message */
- ret = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
- 3, diov, GFP_KERNEL, &msg);
- if (ret < 0)
- goto out;
-
- msg->seq = seq;
- msg->hdr.seq = htonl(seq);
- msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
- ret = rxrpc_conn_sendmsg(call->conn, msg);
- rxrpc_put_message(msg);
- if (ret < 0)
- goto out;
- call->pkt_snd_count++;
-
- /* count how many actual ACKs there were at the front */
- for (delta = 0; delta < call->ackr_pend_cnt; delta++)
- if (call->ackr_array[delta] != RXRPC_ACK_TYPE_ACK)
- break;
-
- call->ackr_pend_cnt -= delta; /* all ACK'd to this point */
-
- /* crank the ACK window around */
- if (delta == 0) {
- /* un-ACK'd window */
- }
- else if (delta < RXRPC_CALL_ACK_WINDOW_SIZE) {
- /* partially ACK'd window
- * - shuffle down to avoid losing out-of-sequence packets
- */
- call->ackr_win_bot += delta;
- call->ackr_win_top += delta;
-
- memmove(&call->ackr_array[0],
- &call->ackr_array[delta],
- call->ackr_pend_cnt);
-
- memset(&call->ackr_array[call->ackr_pend_cnt],
- RXRPC_ACK_TYPE_NACK,
- sizeof(call->ackr_array) - call->ackr_pend_cnt);
- }
- else {
- /* fully ACK'd window
- * - just clear the whole thing
- */
- memset(&call->ackr_array,
- RXRPC_ACK_TYPE_NACK,
- sizeof(call->ackr_array));
- }
-
- /* clear this ACK */
- memset(&call->ackr, 0, sizeof(call->ackr));
-
- out:
- if (!call->app_call_state)
- printk("___ STATE 0 ___\n");
- return ret;
-} /* end __rxrpc_call_gen_normal_ACK() */
-
-/*****************************************************************************/
-/*
- * note the reception of a packet in the call's ACK records and generate an
- * appropriate ACK packet if necessary
- * - returns 0 if packet should be processed, 1 if packet should be ignored
- * and -ve on an error
- */
-static int rxrpc_call_generate_ACK(struct rxrpc_call *call,
- struct rxrpc_header *hdr,
- struct rxrpc_ackpacket *ack)
-{
- struct rxrpc_message *msg;
- rxrpc_seq_t seq;
- unsigned offset;
- int ret = 0, err;
- u8 special_ACK, do_ACK, force;
-
- _enter("%p,%p { seq=%d tp=%d fl=%02x }",
- call, hdr, ntohl(hdr->seq), hdr->type, hdr->flags);
-
- seq = ntohl(hdr->seq);
- offset = seq - call->ackr_win_bot;
- do_ACK = RXRPC_ACK_DELAY;
- special_ACK = 0;
- force = (seq == 1);
-
- if (call->ackr_high_seq < seq)
- call->ackr_high_seq = seq;
-
- /* deal with generation of obvious special ACKs first */
- if (ack && ack->reason == RXRPC_ACK_PING) {
- special_ACK = RXRPC_ACK_PING_RESPONSE;
- ret = 1;
- goto gen_ACK;
- }
-
- if (seq < call->ackr_win_bot) {
- special_ACK = RXRPC_ACK_DUPLICATE;
- ret = 1;
- goto gen_ACK;
- }
-
- if (seq >= call->ackr_win_top) {
- special_ACK = RXRPC_ACK_EXCEEDS_WINDOW;
- ret = 1;
- goto gen_ACK;
- }
-
- if (call->ackr_array[offset] != RXRPC_ACK_TYPE_NACK) {
- special_ACK = RXRPC_ACK_DUPLICATE;
- ret = 1;
- goto gen_ACK;
- }
-
- /* okay... it's a normal data packet inside the ACK window */
- call->ackr_array[offset] = RXRPC_ACK_TYPE_ACK;
-
- if (offset < call->ackr_pend_cnt) {
- }
- else if (offset > call->ackr_pend_cnt) {
- do_ACK = RXRPC_ACK_OUT_OF_SEQUENCE;
- call->ackr_pend_cnt = offset;
- goto gen_ACK;
- }
-
- if (hdr->flags & RXRPC_REQUEST_ACK) {
- do_ACK = RXRPC_ACK_REQUESTED;
- }
-
- /* generate an ACK on the final packet of a reply just received */
- if (hdr->flags & RXRPC_LAST_PACKET) {
- if (call->conn->out_clientflag)
- force = 1;
- }
- else if (!(hdr->flags & RXRPC_MORE_PACKETS)) {
- do_ACK = RXRPC_ACK_REQUESTED;
- }
-
- /* re-ACK packets previously received out-of-order */
- for (offset++; offset < RXRPC_CALL_ACK_WINDOW_SIZE; offset++)
- if (call->ackr_array[offset] != RXRPC_ACK_TYPE_ACK)
- break;
-
- call->ackr_pend_cnt = offset;
-
- /* generate an ACK if we fill up the window */
- if (call->ackr_pend_cnt >= RXRPC_CALL_ACK_WINDOW_SIZE)
- force = 1;
-
- gen_ACK:
- _debug("%05lu ACKs pend=%u norm=%s special=%s%s",
- jiffies - call->cjif,
- call->ackr_pend_cnt,
- rxrpc_acks[do_ACK],
- rxrpc_acks[special_ACK],
- force ? " immediate" :
- do_ACK == RXRPC_ACK_REQUESTED ? " merge-req" :
- hdr->flags & RXRPC_LAST_PACKET ? " finalise" :
- " defer"
- );
-
- /* send any pending normal ACKs if need be */
- if (call->ackr_pend_cnt > 0) {
- /* fill out the appropriate form */
- call->ackr.bufferSpace = htons(RXRPC_CALL_ACK_WINDOW_SIZE);
- call->ackr.maxSkew = htons(min(call->ackr_high_seq - seq,
- 65535U));
- call->ackr.firstPacket = htonl(call->ackr_win_bot);
- call->ackr.previousPacket = call->ackr_prev_seq;
- call->ackr.serial = hdr->serial;
- call->ackr.nAcks = call->ackr_pend_cnt;
-
- if (do_ACK == RXRPC_ACK_REQUESTED)
- call->ackr.reason = do_ACK;
-
- /* generate the ACK immediately if necessary */
- if (special_ACK || force) {
- err = __rxrpc_call_gen_normal_ACK(
- call, do_ACK == RXRPC_ACK_DELAY ? 0 : seq);
- if (err < 0) {
- ret = err;
- goto out;
- }
- }
- }
-
- if (call->ackr.reason == RXRPC_ACK_REQUESTED)
- call->ackr_dfr_seq = seq;
-
- /* start the ACK timer if not running if there are any pending deferred
- * ACKs */
- if (call->ackr_pend_cnt > 0 &&
- call->ackr.reason != RXRPC_ACK_REQUESTED &&
- !timer_pending(&call->ackr_dfr_timo)
- ) {
- unsigned long timo;
-
- timo = rxrpc_call_dfr_ack_timeout + jiffies;
-
- _debug("START ACKR TIMER for cj=%lu", timo - call->cjif);
-
- spin_lock(&call->lock);
- mod_timer(&call->ackr_dfr_timo, timo);
- spin_unlock(&call->lock);
- }
- else if ((call->ackr_pend_cnt == 0 ||
- call->ackr.reason == RXRPC_ACK_REQUESTED) &&
- timer_pending(&call->ackr_dfr_timo)
- ) {
- /* stop timer if no pending ACKs */
- _debug("CLEAR ACKR TIMER");
- del_timer_sync(&call->ackr_dfr_timo);
- }
-
- /* send a special ACK if one is required */
- if (special_ACK) {
- struct rxrpc_ackpacket ack;
- struct kvec diov[2];
- uint8_t acks[1] = { RXRPC_ACK_TYPE_ACK };
-
- /* fill out the appropriate form */
- ack.bufferSpace = htons(RXRPC_CALL_ACK_WINDOW_SIZE);
- ack.maxSkew = htons(min(call->ackr_high_seq - seq,
- 65535U));
- ack.firstPacket = htonl(call->ackr_win_bot);
- ack.previousPacket = call->ackr_prev_seq;
- ack.serial = hdr->serial;
- ack.reason = special_ACK;
- ack.nAcks = 0;
-
- _proto("Rx Sending s-ACK"
- " { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
- ntohs(ack.maxSkew),
- ntohl(ack.firstPacket),
- ntohl(ack.previousPacket),
- ntohl(ack.serial),
- rxrpc_acks[ack.reason],
- ack.nAcks);
-
- diov[0].iov_len = sizeof(struct rxrpc_ackpacket);
- diov[0].iov_base = &ack;
- diov[1].iov_len = sizeof(acks);
- diov[1].iov_base = acks;
-
- /* build and send the message */
- err = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
- hdr->seq ? 2 : 1, diov,
- GFP_KERNEL,
- &msg);
- if (err < 0) {
- ret = err;
- goto out;
- }
-
- msg->seq = seq;
- msg->hdr.seq = htonl(seq);
- msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
- err = rxrpc_conn_sendmsg(call->conn, msg);
- rxrpc_put_message(msg);
- if (err < 0) {
- ret = err;
- goto out;
- }
- call->pkt_snd_count++;
- }
-
- out:
- if (hdr->seq)
- call->ackr_prev_seq = hdr->seq;
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_call_generate_ACK() */
-
-/*****************************************************************************/
-/*
- * handle work to be done on a call
- * - includes packet reception and timeout processing
- */
-void rxrpc_call_do_stuff(struct rxrpc_call *call)
-{
- _enter("%p{flags=%lx}", call, call->flags);
-
- /* handle packet reception */
- if (call->flags & RXRPC_CALL_RCV_PKT) {
- _debug("- receive packet");
- call->flags &= ~RXRPC_CALL_RCV_PKT;
- rxrpc_call_receive_packet(call);
- }
-
- /* handle overdue ACKs */
- if (call->flags & RXRPC_CALL_ACKS_TIMO) {
- _debug("- overdue ACK timeout");
- call->flags &= ~RXRPC_CALL_ACKS_TIMO;
- rxrpc_call_resend(call, call->snd_seq_count);
- }
-
- /* handle lack of reception */
- if (call->flags & RXRPC_CALL_RCV_TIMO) {
- _debug("- reception timeout");
- call->flags &= ~RXRPC_CALL_RCV_TIMO;
- rxrpc_call_abort(call, -EIO);
- }
-
- /* handle deferred ACKs */
- if (call->flags & RXRPC_CALL_ACKR_TIMO ||
- (call->ackr.nAcks > 0 && call->ackr.reason == RXRPC_ACK_REQUESTED)
- ) {
- _debug("- deferred ACK timeout: cj=%05lu r=%s n=%u",
- jiffies - call->cjif,
- rxrpc_acks[call->ackr.reason],
- call->ackr.nAcks);
-
- call->flags &= ~RXRPC_CALL_ACKR_TIMO;
-
- if (call->ackr.nAcks > 0 &&
- call->app_call_state != RXRPC_CSTATE_ERROR) {
- /* generate ACK */
- __rxrpc_call_gen_normal_ACK(call, call->ackr_dfr_seq);
- call->ackr_dfr_seq = 0;
- }
- }
-
- _leave("");
-
-} /* end rxrpc_call_do_stuff() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - must be called with call->lock held
- * - the supplied error code is sent as the packet data
- */
-static int __rxrpc_call_abort(struct rxrpc_call *call, int errno)
-{
- struct rxrpc_connection *conn = call->conn;
- struct rxrpc_message *msg;
- struct kvec diov[1];
- int ret;
- __be32 _error;
-
- _enter("%p{%08x},%p{%d},%d",
- conn, ntohl(conn->conn_id), call, ntohl(call->call_id), errno);
-
- /* if this call is already aborted, then just wake up any waiters */
- if (call->app_call_state == RXRPC_CSTATE_ERROR) {
- spin_unlock(&call->lock);
- call->app_error_func(call);
- _leave(" = 0");
- return 0;
- }
-
- rxrpc_get_call(call);
-
- /* change the state _with_ the lock still held */
- call->app_call_state = RXRPC_CSTATE_ERROR;
- call->app_err_state = RXRPC_ESTATE_LOCAL_ABORT;
- call->app_errno = errno;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
- call->app_async_read = 0;
-
- _state(call);
-
- /* ask the app to translate the error code */
- call->app_aemap_func(call);
-
- spin_unlock(&call->lock);
-
- /* flush any outstanding ACKs */
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
-
- if (rxrpc_call_is_ack_pending(call))
- __rxrpc_call_gen_normal_ACK(call, 0);
-
- /* send the abort packet only if we actually traded some other
- * packets */
- ret = 0;
- if (call->pkt_snd_count || call->pkt_rcv_count) {
- /* actually send the abort */
- _proto("Rx Sending Call ABORT { data=%d }",
- call->app_abort_code);
-
- _error = htonl(call->app_abort_code);
-
- diov[0].iov_len = sizeof(_error);
- diov[0].iov_base = &_error;
-
- ret = rxrpc_conn_newmsg(conn, call, RXRPC_PACKET_TYPE_ABORT,
- 1, diov, GFP_KERNEL, &msg);
- if (ret == 0) {
- ret = rxrpc_conn_sendmsg(conn, msg);
- rxrpc_put_message(msg);
- }
- }
-
- /* tell the app layer to let go */
- call->app_error_func(call);
-
- rxrpc_put_call(call);
-
- _leave(" = %d", ret);
- return ret;
-} /* end __rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - the supplied error code is sent as the packet data
- */
-int rxrpc_call_abort(struct rxrpc_call *call, int error)
-{
- spin_lock(&call->lock);
-
- return __rxrpc_call_abort(call, error);
-
-} /* end rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * process packets waiting for this call
- */
-static void rxrpc_call_receive_packet(struct rxrpc_call *call)
-{
- struct rxrpc_message *msg;
- struct list_head *_p;
-
- _enter("%p", call);
-
- rxrpc_get_call(call); /* must not go away too soon if aborted by
- * app-layer */
-
- while (!list_empty(&call->rcv_receiveq)) {
- /* try to get next packet */
- _p = NULL;
- spin_lock(&call->lock);
- if (!list_empty(&call->rcv_receiveq)) {
- _p = call->rcv_receiveq.next;
- list_del_init(_p);
- }
- spin_unlock(&call->lock);
-
- if (!_p)
- break;
-
- msg = list_entry(_p, struct rxrpc_message, link);
-
- _proto("Rx %05lu Received %s packet (%%%u,#%u,%c%c%c%c%c)",
- jiffies - call->cjif,
- rxrpc_pkts[msg->hdr.type],
- ntohl(msg->hdr.serial),
- msg->seq,
- msg->hdr.flags & RXRPC_JUMBO_PACKET ? 'j' : '-',
- msg->hdr.flags & RXRPC_MORE_PACKETS ? 'm' : '-',
- msg->hdr.flags & RXRPC_LAST_PACKET ? 'l' : '-',
- msg->hdr.flags & RXRPC_REQUEST_ACK ? 'r' : '-',
- msg->hdr.flags & RXRPC_CLIENT_INITIATED ? 'C' : 'S'
- );
-
- switch (msg->hdr.type) {
- /* deal with data packets */
- case RXRPC_PACKET_TYPE_DATA:
- /* ACK the packet if necessary */
- switch (rxrpc_call_generate_ACK(call, &msg->hdr,
- NULL)) {
- case 0: /* useful packet */
- rxrpc_call_receive_data_packet(call, msg);
- break;
- case 1: /* duplicate or out-of-window packet */
- break;
- default:
- rxrpc_put_message(msg);
- goto out;
- }
- break;
-
- /* deal with ACK packets */
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_call_receive_ack_packet(call, msg);
- break;
-
- /* deal with abort packets */
- case RXRPC_PACKET_TYPE_ABORT: {
- __be32 _dbuf, *dp;
-
- dp = skb_header_pointer(msg->pkt, msg->offset,
- sizeof(_dbuf), &_dbuf);
- if (dp == NULL)
- printk("Rx Received short ABORT packet\n");
-
- _proto("Rx Received Call ABORT { data=%d }",
- (dp ? ntohl(*dp) : 0));
-
- spin_lock(&call->lock);
- call->app_call_state = RXRPC_CSTATE_ERROR;
- call->app_err_state = RXRPC_ESTATE_PEER_ABORT;
- call->app_abort_code = (dp ? ntohl(*dp) : 0);
- call->app_errno = -ECONNABORTED;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
- call->app_async_read = 0;
-
- /* ask the app to translate the error code */
- call->app_aemap_func(call);
- _state(call);
- spin_unlock(&call->lock);
- call->app_error_func(call);
- break;
- }
- default:
- /* deal with other packet types */
- _proto("Rx Unsupported packet type %u (#%u)",
- msg->hdr.type, msg->seq);
- break;
- }
-
- rxrpc_put_message(msg);
- }
-
- out:
- rxrpc_put_call(call);
- _leave("");
-} /* end rxrpc_call_receive_packet() */
-
-/*****************************************************************************/
-/*
- * process next data packet
- * - as the next data packet arrives:
- * - it is queued on app_readyq _if_ it is the next one expected
- * (app_ready_seq+1)
- * - it is queued on app_unreadyq _if_ it is not the next one expected
- * - if a packet placed on app_readyq completely fills a hole leading up to
- * the first packet on app_unreadyq, then packets now in sequence are
- * tranferred to app_readyq
- * - the application layer can only see packets on app_readyq
- * (app_ready_qty bytes)
- * - the application layer is prodded every time a new packet arrives
- */
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg)
-{
- const struct rxrpc_operation *optbl, *op;
- struct rxrpc_message *pmsg;
- struct list_head *_p;
- int ret, lo, hi, rmtimo;
- __be32 opid;
-
- _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
- rxrpc_get_message(msg);
-
- /* add to the unready queue if we'd have to create a hole in the ready
- * queue otherwise */
- if (msg->seq != call->app_ready_seq + 1) {
- _debug("Call add packet %d to unreadyq", msg->seq);
-
- /* insert in seq order */
- list_for_each(_p, &call->app_unreadyq) {
- pmsg = list_entry(_p, struct rxrpc_message, link);
- if (pmsg->seq > msg->seq)
- break;
- }
-
- list_add_tail(&msg->link, _p);
-
- _leave(" [unreadyq]");
- return;
- }
-
- /* next in sequence - simply append into the call's ready queue */
- _debug("Call add packet %d to readyq (+%Zd => %Zd bytes)",
- msg->seq, msg->dsize, call->app_ready_qty);
-
- spin_lock(&call->lock);
- call->app_ready_seq = msg->seq;
- call->app_ready_qty += msg->dsize;
- list_add_tail(&msg->link, &call->app_readyq);
-
- /* move unready packets to the readyq if we got rid of a hole */
- while (!list_empty(&call->app_unreadyq)) {
- pmsg = list_entry(call->app_unreadyq.next,
- struct rxrpc_message, link);
-
- if (pmsg->seq != call->app_ready_seq + 1)
- break;
-
- /* next in sequence - just move list-to-list */
- _debug("Call transfer packet %d to readyq (+%Zd => %Zd bytes)",
- pmsg->seq, pmsg->dsize, call->app_ready_qty);
-
- call->app_ready_seq = pmsg->seq;
- call->app_ready_qty += pmsg->dsize;
- list_move_tail(&pmsg->link, &call->app_readyq);
- }
-
- /* see if we've got the last packet yet */
- if (!list_empty(&call->app_readyq)) {
- pmsg = list_entry(call->app_readyq.prev,
- struct rxrpc_message, link);
- if (pmsg->hdr.flags & RXRPC_LAST_PACKET) {
- call->app_last_rcv = 1;
- _debug("Last packet on readyq");
- }
- }
-
- switch (call->app_call_state) {
- /* do nothing if call already aborted */
- case RXRPC_CSTATE_ERROR:
- spin_unlock(&call->lock);
- _leave(" [error]");
- return;
-
- /* extract the operation ID from an incoming call if that's not
- * yet been done */
- case RXRPC_CSTATE_SRVR_RCV_OPID:
- spin_unlock(&call->lock);
-
- /* handle as yet insufficient data for the operation ID */
- if (call->app_ready_qty < 4) {
- if (call->app_last_rcv)
- /* trouble - last packet seen */
- rxrpc_call_abort(call, -EINVAL);
-
- _leave("");
- return;
- }
-
- /* pull the operation ID out of the buffer */
- ret = rxrpc_call_read_data(call, &opid, sizeof(opid), 0);
- if (ret < 0) {
- printk("Unexpected error from read-data: %d\n", ret);
- if (call->app_call_state != RXRPC_CSTATE_ERROR)
- rxrpc_call_abort(call, ret);
- _leave("");
- return;
- }
- call->app_opcode = ntohl(opid);
-
- /* locate the operation in the available ops table */
- optbl = call->conn->service->ops_begin;
- lo = 0;
- hi = call->conn->service->ops_end - optbl;
-
- while (lo < hi) {
- int mid = (hi + lo) / 2;
- op = &optbl[mid];
- if (call->app_opcode == op->id)
- goto found_op;
- if (call->app_opcode > op->id)
- lo = mid + 1;
- else
- hi = mid;
- }
-
- /* search failed */
- kproto("Rx Client requested operation %d from %s service",
- call->app_opcode, call->conn->service->name);
- rxrpc_call_abort(call, -EINVAL);
- _leave(" [inval]");
- return;
-
- found_op:
- _proto("Rx Client requested operation %s from %s service",
- op->name, call->conn->service->name);
-
- /* we're now waiting for the argument block (unless the call
- * was aborted) */
- spin_lock(&call->lock);
- if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_OPID ||
- call->app_call_state == RXRPC_CSTATE_SRVR_SND_REPLY) {
- if (!call->app_last_rcv)
- call->app_call_state =
- RXRPC_CSTATE_SRVR_RCV_ARGS;
- else if (call->app_ready_qty > 0)
- call->app_call_state =
- RXRPC_CSTATE_SRVR_GOT_ARGS;
- else
- call->app_call_state =
- RXRPC_CSTATE_SRVR_SND_REPLY;
- call->app_mark = op->asize;
- call->app_user = op->user;
- }
- spin_unlock(&call->lock);
-
- _state(call);
- break;
-
- case RXRPC_CSTATE_SRVR_RCV_ARGS:
- /* change state if just received last packet of arg block */
- if (call->app_last_rcv)
- call->app_call_state = RXRPC_CSTATE_SRVR_GOT_ARGS;
- spin_unlock(&call->lock);
-
- _state(call);
- break;
-
- case RXRPC_CSTATE_CLNT_RCV_REPLY:
- /* change state if just received last packet of reply block */
- rmtimo = 0;
- if (call->app_last_rcv) {
- call->app_call_state = RXRPC_CSTATE_CLNT_GOT_REPLY;
- rmtimo = 1;
- }
- spin_unlock(&call->lock);
-
- if (rmtimo) {
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- }
-
- _state(call);
- break;
-
- default:
- /* deal with data reception in an unexpected state */
- printk("Unexpected state [[[ %u ]]]\n", call->app_call_state);
- __rxrpc_call_abort(call, -EBADMSG);
- _leave("");
- return;
- }
-
- if (call->app_call_state == RXRPC_CSTATE_CLNT_RCV_REPLY &&
- call->app_last_rcv)
- BUG();
-
- /* otherwise just invoke the data function whenever we can satisfy its desire for more
- * data
- */
- _proto("Rx Received Op Data: st=%u qty=%Zu mk=%Zu%s",
- call->app_call_state, call->app_ready_qty, call->app_mark,
- call->app_last_rcv ? " last-rcvd" : "");
-
- spin_lock(&call->lock);
-
- ret = __rxrpc_call_read_data(call);
- switch (ret) {
- case 0:
- spin_unlock(&call->lock);
- call->app_attn_func(call);
- break;
- case -EAGAIN:
- spin_unlock(&call->lock);
- break;
- case -ECONNABORTED:
- spin_unlock(&call->lock);
- break;
- default:
- __rxrpc_call_abort(call, ret);
- break;
- }
-
- _state(call);
-
- _leave("");
-
-} /* end rxrpc_call_receive_data_packet() */
-
-/*****************************************************************************/
-/*
- * received an ACK packet
- */
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
- struct rxrpc_message *msg)
-{
- struct rxrpc_ackpacket _ack, *ap;
- rxrpc_serial_net_t serial;
- rxrpc_seq_t seq;
- int ret;
-
- _enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
- /* extract the basic ACK record */
- ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
- if (ap == NULL) {
- printk("Rx Received short ACK packet\n");
- return;
- }
- msg->offset += sizeof(_ack);
-
- serial = ap->serial;
- seq = ntohl(ap->firstPacket);
-
- _proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
- ntohl(msg->hdr.serial),
- ntohs(ap->bufferSpace),
- ntohs(ap->maxSkew),
- seq,
- ntohl(ap->previousPacket),
- ntohl(serial),
- rxrpc_acks[ap->reason],
- call->ackr.nAcks
- );
-
- /* check the other side isn't ACK'ing a sequence number I haven't sent
- * yet */
- if (ap->nAcks > 0 &&
- (seq > call->snd_seq_count ||
- seq + ap->nAcks - 1 > call->snd_seq_count)) {
- printk("Received ACK (#%u-#%u) for unsent packet\n",
- seq, seq + ap->nAcks - 1);
- rxrpc_call_abort(call, -EINVAL);
- _leave("");
- return;
- }
-
- /* deal with RTT calculation */
- if (serial) {
- struct rxrpc_message *rttmsg;
-
- /* find the prompting packet */
- spin_lock(&call->lock);
- if (call->snd_ping && call->snd_ping->hdr.serial == serial) {
- /* it was a ping packet */
- rttmsg = call->snd_ping;
- call->snd_ping = NULL;
- spin_unlock(&call->lock);
-
- if (rttmsg) {
- rttmsg->rttdone = 1;
- rxrpc_peer_calculate_rtt(call->conn->peer,
- rttmsg, msg);
- rxrpc_put_message(rttmsg);
- }
- }
- else {
- struct list_head *_p;
-
- /* it ought to be a data packet - look in the pending
- * ACK list */
- list_for_each(_p, &call->acks_pendq) {
- rttmsg = list_entry(_p, struct rxrpc_message,
- link);
- if (rttmsg->hdr.serial == serial) {
- if (rttmsg->rttdone)
- /* never do RTT twice without
- * resending */
- break;
-
- rttmsg->rttdone = 1;
- rxrpc_peer_calculate_rtt(
- call->conn->peer, rttmsg, msg);
- break;
- }
- }
- spin_unlock(&call->lock);
- }
- }
-
- switch (ap->reason) {
- /* deal with negative/positive acknowledgement of data
- * packets */
- case RXRPC_ACK_REQUESTED:
- case RXRPC_ACK_DELAY:
- case RXRPC_ACK_IDLE:
- rxrpc_call_definitively_ACK(call, seq - 1);
-
- case RXRPC_ACK_DUPLICATE:
- case RXRPC_ACK_OUT_OF_SEQUENCE:
- case RXRPC_ACK_EXCEEDS_WINDOW:
- call->snd_resend_cnt = 0;
- ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
- if (ret < 0)
- rxrpc_call_abort(call, ret);
- break;
-
- /* respond to ping packets immediately */
- case RXRPC_ACK_PING:
- rxrpc_call_generate_ACK(call, &msg->hdr, ap);
- break;
-
- /* only record RTT on ping response packets */
- case RXRPC_ACK_PING_RESPONSE:
- if (call->snd_ping) {
- struct rxrpc_message *rttmsg;
-
- /* only do RTT stuff if the response matches the
- * retained ping */
- rttmsg = NULL;
- spin_lock(&call->lock);
- if (call->snd_ping &&
- call->snd_ping->hdr.serial == ap->serial) {
- rttmsg = call->snd_ping;
- call->snd_ping = NULL;
- }
- spin_unlock(&call->lock);
-
- if (rttmsg) {
- rttmsg->rttdone = 1;
- rxrpc_peer_calculate_rtt(call->conn->peer,
- rttmsg, msg);
- rxrpc_put_message(rttmsg);
- }
- }
- break;
-
- default:
- printk("Unsupported ACK reason %u\n", ap->reason);
- break;
- }
-
- _leave("");
-} /* end rxrpc_call_receive_ack_packet() */
-
-/*****************************************************************************/
-/*
- * record definitive ACKs for all messages up to and including the one with the
- * 'highest' seq
- */
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
- rxrpc_seq_t highest)
-{
- struct rxrpc_message *msg;
- int now_complete;
-
- _enter("%p{ads=%u},%u", call, call->acks_dftv_seq, highest);
-
- while (call->acks_dftv_seq < highest) {
- call->acks_dftv_seq++;
-
- _proto("Definitive ACK on packet #%u", call->acks_dftv_seq);
-
- /* discard those at front of queue until message with highest
- * ACK is found */
- spin_lock(&call->lock);
- msg = NULL;
- if (!list_empty(&call->acks_pendq)) {
- msg = list_entry(call->acks_pendq.next,
- struct rxrpc_message, link);
- list_del_init(&msg->link); /* dequeue */
- if (msg->state == RXRPC_MSG_SENT)
- call->acks_pend_cnt--;
- }
- spin_unlock(&call->lock);
-
- /* insanity check */
- if (!msg)
- panic("%s(): acks_pendq unexpectedly empty\n",
- __FUNCTION__);
-
- if (msg->seq != call->acks_dftv_seq)
- panic("%s(): Packet #%u expected at front of acks_pendq"
- " (#%u found)\n",
- __FUNCTION__, call->acks_dftv_seq, msg->seq);
-
- /* discard the message */
- msg->state = RXRPC_MSG_DONE;
- rxrpc_put_message(msg);
- }
-
- /* if all sent packets are definitively ACK'd then prod any sleepers just in case */
- now_complete = 0;
- spin_lock(&call->lock);
- if (call->acks_dftv_seq == call->snd_seq_count) {
- if (call->app_call_state != RXRPC_CSTATE_COMPLETE) {
- call->app_call_state = RXRPC_CSTATE_COMPLETE;
- _state(call);
- now_complete = 1;
- }
- }
- spin_unlock(&call->lock);
-
- if (now_complete) {
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- call->app_attn_func(call);
- }
-
- _leave("");
-} /* end rxrpc_call_definitively_ACK() */
-
-/*****************************************************************************/
-/*
- * record the specified amount of ACKs/NAKs
- */
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
- struct rxrpc_message *msg,
- rxrpc_seq_t seq,
- size_t count)
-{
- struct rxrpc_message *dmsg;
- struct list_head *_p;
- rxrpc_seq_t highest;
- unsigned ix;
- size_t chunk;
- char resend, now_complete;
- u8 acks[16];
-
- _enter("%p{apc=%u ads=%u},%p,%u,%Zu",
- call, call->acks_pend_cnt, call->acks_dftv_seq,
- msg, seq, count);
-
- /* handle re-ACK'ing of definitively ACK'd packets (may be out-of-order
- * ACKs) */
- if (seq <= call->acks_dftv_seq) {
- unsigned delta = call->acks_dftv_seq - seq;
-
- if (count <= delta) {
- _leave(" = 0 [all definitively ACK'd]");
- return 0;
- }
-
- seq += delta;
- count -= delta;
- msg->offset += delta;
- }
-
- highest = seq + count - 1;
- resend = 0;
- while (count > 0) {
- /* extract up to 16 ACK slots at a time */
- chunk = min(count, sizeof(acks));
- count -= chunk;
-
- memset(acks, 2, sizeof(acks));
-
- if (skb_copy_bits(msg->pkt, msg->offset, &acks, chunk) < 0) {
- printk("Rx Received short ACK packet\n");
- _leave(" = -EINVAL");
- return -EINVAL;
- }
- msg->offset += chunk;
-
- /* check that the ACK set is valid */
- for (ix = 0; ix < chunk; ix++) {
- switch (acks[ix]) {
- case RXRPC_ACK_TYPE_ACK:
- break;
- case RXRPC_ACK_TYPE_NACK:
- resend = 1;
- break;
- default:
- printk("Rx Received unsupported ACK state"
- " %u\n", acks[ix]);
- _leave(" = -EINVAL");
- return -EINVAL;
- }
- }
-
- _proto("Rx ACK of packets #%u-#%u "
- "[%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c] (pend=%u)",
- seq, (unsigned) (seq + chunk - 1),
- _acktype[acks[0x0]],
- _acktype[acks[0x1]],
- _acktype[acks[0x2]],
- _acktype[acks[0x3]],
- _acktype[acks[0x4]],
- _acktype[acks[0x5]],
- _acktype[acks[0x6]],
- _acktype[acks[0x7]],
- _acktype[acks[0x8]],
- _acktype[acks[0x9]],
- _acktype[acks[0xA]],
- _acktype[acks[0xB]],
- _acktype[acks[0xC]],
- _acktype[acks[0xD]],
- _acktype[acks[0xE]],
- _acktype[acks[0xF]],
- call->acks_pend_cnt
- );
-
- /* mark the packets in the ACK queue as being provisionally
- * ACK'd */
- ix = 0;
- spin_lock(&call->lock);
-
- /* find the first packet ACK'd/NAK'd here */
- list_for_each(_p, &call->acks_pendq) {
- dmsg = list_entry(_p, struct rxrpc_message, link);
- if (dmsg->seq == seq)
- goto found_first;
- _debug("- %u: skipping #%u", ix, dmsg->seq);
- }
- goto bad_queue;
-
- found_first:
- do {
- _debug("- %u: processing #%u (%c) apc=%u",
- ix, dmsg->seq, _acktype[acks[ix]],
- call->acks_pend_cnt);
-
- if (acks[ix] == RXRPC_ACK_TYPE_ACK) {
- if (dmsg->state == RXRPC_MSG_SENT)
- call->acks_pend_cnt--;
- dmsg->state = RXRPC_MSG_ACKED;
- }
- else {
- if (dmsg->state == RXRPC_MSG_ACKED)
- call->acks_pend_cnt++;
- dmsg->state = RXRPC_MSG_SENT;
- }
- ix++;
- seq++;
-
- _p = dmsg->link.next;
- dmsg = list_entry(_p, struct rxrpc_message, link);
- } while(ix < chunk &&
- _p != &call->acks_pendq &&
- dmsg->seq == seq);
-
- if (ix < chunk)
- goto bad_queue;
-
- spin_unlock(&call->lock);
- }
-
- if (resend)
- rxrpc_call_resend(call, highest);
-
- /* if all packets are provisionally ACK'd, then wake up anyone who's
- * waiting for that */
- now_complete = 0;
- spin_lock(&call->lock);
- if (call->acks_pend_cnt == 0) {
- if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_FINAL_ACK) {
- call->app_call_state = RXRPC_CSTATE_COMPLETE;
- _state(call);
- }
- now_complete = 1;
- }
- spin_unlock(&call->lock);
-
- if (now_complete) {
- _debug("- wake up waiters");
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- call->app_attn_func(call);
- }
-
- _leave(" = 0 (apc=%u)", call->acks_pend_cnt);
- return 0;
-
- bad_queue:
- panic("%s(): acks_pendq in bad state (packet #%u absent)\n",
- __FUNCTION__, seq);
-
-} /* end rxrpc_call_record_ACK() */
-
-/*****************************************************************************/
-/*
- * transfer data from the ready packet queue to the asynchronous read buffer
- * - since this func is the only one going to look at packets queued on
- * app_readyq, we don't need a lock to modify or access them, only to modify
- * the queue pointers
- * - called with call->lock held
- * - the buffer must be in kernel space
- * - returns:
- * 0 if buffer filled
- * -EAGAIN if buffer not filled and more data to come
- * -EBADMSG if last packet received and insufficient data left
- * -ECONNABORTED if the call has in an error state
- */
-static int __rxrpc_call_read_data(struct rxrpc_call *call)
-{
- struct rxrpc_message *msg;
- size_t qty;
- int ret;
-
- _enter("%p{as=%d buf=%p qty=%Zu/%Zu}",
- call,
- call->app_async_read, call->app_read_buf,
- call->app_ready_qty, call->app_mark);
-
- /* check the state */
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_RCV_ARGS:
- case RXRPC_CSTATE_CLNT_RCV_REPLY:
- if (call->app_last_rcv) {
- printk("%s(%p,%p,%Zd):"
- " Inconsistent call state (%s, last pkt)",
- __FUNCTION__,
- call, call->app_read_buf, call->app_mark,
- rxrpc_call_states[call->app_call_state]);
- BUG();
- }
- break;
-
- case RXRPC_CSTATE_SRVR_RCV_OPID:
- case RXRPC_CSTATE_SRVR_GOT_ARGS:
- case RXRPC_CSTATE_CLNT_GOT_REPLY:
- break;
-
- case RXRPC_CSTATE_SRVR_SND_REPLY:
- if (!call->app_last_rcv) {
- printk("%s(%p,%p,%Zd):"
- " Inconsistent call state (%s, not last pkt)",
- __FUNCTION__,
- call, call->app_read_buf, call->app_mark,
- rxrpc_call_states[call->app_call_state]);
- BUG();
- }
- _debug("Trying to read data from call in SND_REPLY state");
- break;
-
- case RXRPC_CSTATE_ERROR:
- _leave(" = -ECONNABORTED");
- return -ECONNABORTED;
-
- default:
- printk("reading in unexpected state [[[ %u ]]]\n",
- call->app_call_state);
- BUG();
- }
-
- /* handle the case of not having an async buffer */
- if (!call->app_async_read) {
- if (call->app_mark == RXRPC_APP_MARK_EOF) {
- ret = call->app_last_rcv ? 0 : -EAGAIN;
- }
- else {
- if (call->app_mark >= call->app_ready_qty) {
- call->app_mark = RXRPC_APP_MARK_EOF;
- ret = 0;
- }
- else {
- ret = call->app_last_rcv ? -EBADMSG : -EAGAIN;
- }
- }
-
- _leave(" = %d [no buf]", ret);
- return 0;
- }
-
- while (!list_empty(&call->app_readyq) && call->app_mark > 0) {
- msg = list_entry(call->app_readyq.next,
- struct rxrpc_message, link);
-
- /* drag as much data as we need out of this packet */
- qty = min(call->app_mark, msg->dsize);
-
- _debug("reading %Zu from skb=%p off=%lu",
- qty, msg->pkt, msg->offset);
-
- if (call->app_read_buf)
- if (skb_copy_bits(msg->pkt, msg->offset,
- call->app_read_buf, qty) < 0)
- panic("%s: Failed to copy data from packet:"
- " (%p,%p,%Zd)",
- __FUNCTION__,
- call, call->app_read_buf, qty);
-
- /* if that packet is now empty, discard it */
- call->app_ready_qty -= qty;
- msg->dsize -= qty;
-
- if (msg->dsize == 0) {
- list_del_init(&msg->link);
- rxrpc_put_message(msg);
- }
- else {
- msg->offset += qty;
- }
-
- call->app_mark -= qty;
- if (call->app_read_buf)
- call->app_read_buf += qty;
- }
-
- if (call->app_mark == 0) {
- call->app_async_read = 0;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
-
- /* adjust the state if used up all packets */
- if (list_empty(&call->app_readyq) && call->app_last_rcv) {
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_RCV_OPID:
- call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
- call->app_mark = RXRPC_APP_MARK_EOF;
- _state(call);
- del_timer_sync(&call->rcv_timeout);
- break;
- case RXRPC_CSTATE_SRVR_GOT_ARGS:
- call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
- _state(call);
- del_timer_sync(&call->rcv_timeout);
- break;
- default:
- call->app_call_state = RXRPC_CSTATE_COMPLETE;
- _state(call);
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
- del_timer_sync(&call->rcv_timeout);
- break;
- }
- }
-
- _leave(" = 0");
- return 0;
- }
-
- if (call->app_last_rcv) {
- _debug("Insufficient data (%Zu/%Zu)",
- call->app_ready_qty, call->app_mark);
- call->app_async_read = 0;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
-
- _leave(" = -EBADMSG");
- return -EBADMSG;
- }
-
- _leave(" = -EAGAIN");
- return -EAGAIN;
-} /* end __rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * attempt to read the specified amount of data from the call's ready queue
- * into the buffer provided
- * - since this func is the only one going to look at packets queued on
- * app_readyq, we don't need a lock to modify or access them, only to modify
- * the queue pointers
- * - if the buffer pointer is NULL, then data is merely drained, not copied
- * - if flags&RXRPC_CALL_READ_BLOCK, then the function will wait until there is
- * enough data or an error will be generated
- * - note that the caller must have added the calling task to the call's wait
- * queue beforehand
- * - if flags&RXRPC_CALL_READ_ALL, then an error will be generated if this
- * function doesn't read all available data
- */
-int rxrpc_call_read_data(struct rxrpc_call *call,
- void *buffer, size_t size, int flags)
-{
- int ret;
-
- _enter("%p{arq=%Zu},%p,%Zd,%x",
- call, call->app_ready_qty, buffer, size, flags);
-
- spin_lock(&call->lock);
-
- if (unlikely(!!call->app_read_buf)) {
- spin_unlock(&call->lock);
- _leave(" = -EBUSY");
- return -EBUSY;
- }
-
- call->app_mark = size;
- call->app_read_buf = buffer;
- call->app_async_read = 1;
- call->app_read_count++;
-
- /* read as much data as possible */
- ret = __rxrpc_call_read_data(call);
- switch (ret) {
- case 0:
- if (flags & RXRPC_CALL_READ_ALL &&
- (!call->app_last_rcv || call->app_ready_qty > 0)) {
- _leave(" = -EBADMSG");
- __rxrpc_call_abort(call, -EBADMSG);
- return -EBADMSG;
- }
-
- spin_unlock(&call->lock);
- call->app_attn_func(call);
- _leave(" = 0");
- return ret;
-
- case -ECONNABORTED:
- spin_unlock(&call->lock);
- _leave(" = %d [aborted]", ret);
- return ret;
-
- default:
- __rxrpc_call_abort(call, ret);
- _leave(" = %d", ret);
- return ret;
-
- case -EAGAIN:
- spin_unlock(&call->lock);
-
- if (!(flags & RXRPC_CALL_READ_BLOCK)) {
- _leave(" = -EAGAIN");
- return -EAGAIN;
- }
-
- /* wait for the data to arrive */
- _debug("blocking for data arrival");
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!call->app_async_read || signal_pending(current))
- break;
- schedule();
- }
- set_current_state(TASK_RUNNING);
-
- if (signal_pending(current)) {
- _leave(" = -EINTR");
- return -EINTR;
- }
-
- if (call->app_call_state == RXRPC_CSTATE_ERROR) {
- _leave(" = -ECONNABORTED");
- return -ECONNABORTED;
- }
-
- _leave(" = 0");
- return 0;
- }
-
-} /* end rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * write data to a call
- * - the data may not be sent immediately if it doesn't fill a buffer
- * - if we can't queue all the data for buffering now, siov[] will have been
- * adjusted to take account of what has been sent
- */
-int rxrpc_call_write_data(struct rxrpc_call *call,
- size_t sioc,
- struct kvec *siov,
- u8 rxhdr_flags,
- gfp_t alloc_flags,
- int dup_data,
- size_t *size_sent)
-{
- struct rxrpc_message *msg;
- struct kvec *sptr;
- size_t space, size, chunk, tmp;
- char *buf;
- int ret;
-
- _enter("%p,%Zu,%p,%02x,%x,%d,%p",
- call, sioc, siov, rxhdr_flags, alloc_flags, dup_data,
- size_sent);
-
- *size_sent = 0;
- size = 0;
- ret = -EINVAL;
-
- /* can't send more if we've sent last packet from this end */
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_SND_REPLY:
- case RXRPC_CSTATE_CLNT_SND_ARGS:
- break;
- case RXRPC_CSTATE_ERROR:
- ret = call->app_errno;
- default:
- goto out;
- }
-
- /* calculate how much data we've been given */
- sptr = siov;
- for (; sioc > 0; sptr++, sioc--) {
- if (!sptr->iov_len)
- continue;
-
- if (!sptr->iov_base)
- goto out;
-
- size += sptr->iov_len;
- }
-
- _debug("- size=%Zu mtu=%Zu", size, call->conn->mtu_size);
-
- do {
- /* make sure there's a message under construction */
- if (!call->snd_nextmsg) {
- /* no - allocate a message with no data yet attached */
- ret = rxrpc_conn_newmsg(call->conn, call,
- RXRPC_PACKET_TYPE_DATA,
- 0, NULL, alloc_flags,
- &call->snd_nextmsg);
- if (ret < 0)
- goto out;
- _debug("- allocated new message [ds=%Zu]",
- call->snd_nextmsg->dsize);
- }
-
- msg = call->snd_nextmsg;
- msg->hdr.flags |= rxhdr_flags;
-
- /* deal with zero-length terminal packet */
- if (size == 0) {
- if (rxhdr_flags & RXRPC_LAST_PACKET) {
- ret = rxrpc_call_flush(call);
- if (ret < 0)
- goto out;
- }
- break;
- }
-
- /* work out how much space current packet has available */
- space = call->conn->mtu_size - msg->dsize;
- chunk = min(space, size);
-
- _debug("- [before] space=%Zu chunk=%Zu", space, chunk);
-
- while (!siov->iov_len)
- siov++;
-
- /* if we are going to have to duplicate the data then coalesce
- * it too */
- if (dup_data) {
- /* don't allocate more that 1 page at a time */
- if (chunk > PAGE_SIZE)
- chunk = PAGE_SIZE;
-
- /* allocate a data buffer and attach to the message */
- buf = kmalloc(chunk, alloc_flags);
- if (unlikely(!buf)) {
- if (msg->dsize ==
- sizeof(struct rxrpc_header)) {
- /* discard an empty msg and wind back
- * the seq counter */
- rxrpc_put_message(msg);
- call->snd_nextmsg = NULL;
- call->snd_seq_count--;
- }
-
- ret = -ENOMEM;
- goto out;
- }
-
- tmp = msg->dcount++;
- set_bit(tmp, &msg->dfree);
- msg->data[tmp].iov_base = buf;
- msg->data[tmp].iov_len = chunk;
- msg->dsize += chunk;
- *size_sent += chunk;
- size -= chunk;
-
- /* load the buffer with data */
- while (chunk > 0) {
- tmp = min(chunk, siov->iov_len);
- memcpy(buf, siov->iov_base, tmp);
- buf += tmp;
- siov->iov_base += tmp;
- siov->iov_len -= tmp;
- if (!siov->iov_len)
- siov++;
- chunk -= tmp;
- }
- }
- else {
- /* we want to attach the supplied buffers directly */
- while (chunk > 0 &&
- msg->dcount < RXRPC_MSG_MAX_IOCS) {
- tmp = msg->dcount++;
- msg->data[tmp].iov_base = siov->iov_base;
- msg->data[tmp].iov_len = siov->iov_len;
- msg->dsize += siov->iov_len;
- *size_sent += siov->iov_len;
- size -= siov->iov_len;
- chunk -= siov->iov_len;
- siov++;
- }
- }
-
- _debug("- [loaded] chunk=%Zu size=%Zu", chunk, size);
-
- /* dispatch the message when full, final or requesting ACK */
- if (msg->dsize >= call->conn->mtu_size || rxhdr_flags) {
- ret = rxrpc_call_flush(call);
- if (ret < 0)
- goto out;
- }
-
- } while(size > 0);
-
- ret = 0;
- out:
- _leave(" = %d (%Zd queued, %Zd rem)", ret, *size_sent, size);
- return ret;
-
-} /* end rxrpc_call_write_data() */
-
-/*****************************************************************************/
-/*
- * flush outstanding packets to the network
- */
-static int rxrpc_call_flush(struct rxrpc_call *call)
-{
- struct rxrpc_message *msg;
- int ret = 0;
-
- _enter("%p", call);
-
- rxrpc_get_call(call);
-
- /* if there's a packet under construction, then dispatch it now */
- if (call->snd_nextmsg) {
- msg = call->snd_nextmsg;
- call->snd_nextmsg = NULL;
-
- if (msg->hdr.flags & RXRPC_LAST_PACKET) {
- msg->hdr.flags &= ~RXRPC_MORE_PACKETS;
- if (call->app_call_state != RXRPC_CSTATE_CLNT_SND_ARGS)
- msg->hdr.flags |= RXRPC_REQUEST_ACK;
- }
- else {
- msg->hdr.flags |= RXRPC_MORE_PACKETS;
- }
-
- _proto("Sending DATA message { ds=%Zu dc=%u df=%02lu }",
- msg->dsize, msg->dcount, msg->dfree);
-
- /* queue and adjust call state */
- spin_lock(&call->lock);
- list_add_tail(&msg->link, &call->acks_pendq);
-
- /* decide what to do depending on current state and if this is
- * the last packet */
- ret = -EINVAL;
- switch (call->app_call_state) {
- case RXRPC_CSTATE_SRVR_SND_REPLY:
- if (msg->hdr.flags & RXRPC_LAST_PACKET) {
- call->app_call_state =
- RXRPC_CSTATE_SRVR_RCV_FINAL_ACK;
- _state(call);
- }
- break;
-
- case RXRPC_CSTATE_CLNT_SND_ARGS:
- if (msg->hdr.flags & RXRPC_LAST_PACKET) {
- call->app_call_state =
- RXRPC_CSTATE_CLNT_RCV_REPLY;
- _state(call);
- }
- break;
-
- case RXRPC_CSTATE_ERROR:
- ret = call->app_errno;
- default:
- spin_unlock(&call->lock);
- goto out;
- }
-
- call->acks_pend_cnt++;
-
- mod_timer(&call->acks_timeout,
- __rxrpc_rtt_based_timeout(call,
- rxrpc_call_acks_timeout));
-
- spin_unlock(&call->lock);
-
- ret = rxrpc_conn_sendmsg(call->conn, msg);
- if (ret == 0)
- call->pkt_snd_count++;
- }
-
- out:
- rxrpc_put_call(call);
-
- _leave(" = %d", ret);
- return ret;
-
-} /* end rxrpc_call_flush() */
-
-/*****************************************************************************/
-/*
- * resend NAK'd or unacknowledged packets up to the highest one specified
- */
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest)
-{
- struct rxrpc_message *msg;
- struct list_head *_p;
- rxrpc_seq_t seq = 0;
-
- _enter("%p,%u", call, highest);
-
- _proto("Rx Resend required");
-
- /* handle too many resends */
- if (call->snd_resend_cnt >= rxrpc_call_max_resend) {
- _debug("Aborting due to too many resends (rcv=%d)",
- call->pkt_rcv_count);
- rxrpc_call_abort(call,
- call->pkt_rcv_count > 0 ? -EIO : -ETIMEDOUT);
- _leave("");
- return;
- }
-
- spin_lock(&call->lock);
- call->snd_resend_cnt++;
- for (;;) {
- /* determine which the next packet we might need to ACK is */
- if (seq <= call->acks_dftv_seq)
- seq = call->acks_dftv_seq;
- seq++;
-
- if (seq > highest)
- break;
-
- /* look for the packet in the pending-ACK queue */
- list_for_each(_p, &call->acks_pendq) {
- msg = list_entry(_p, struct rxrpc_message, link);
- if (msg->seq == seq)
- goto found_msg;
- }
-
- panic("%s(%p,%d):"
- " Inconsistent pending-ACK queue (ds=%u sc=%u sq=%u)\n",
- __FUNCTION__, call, highest,
- call->acks_dftv_seq, call->snd_seq_count, seq);
-
- found_msg:
- if (msg->state != RXRPC_MSG_SENT)
- continue; /* only un-ACK'd packets */
-
- rxrpc_get_message(msg);
- spin_unlock(&call->lock);
-
- /* send each message again (and ignore any errors we might
- * incur) */
- _proto("Resending DATA message { ds=%Zu dc=%u df=%02lu }",
- msg->dsize, msg->dcount, msg->dfree);
-
- if (rxrpc_conn_sendmsg(call->conn, msg) == 0)
- call->pkt_snd_count++;
-
- rxrpc_put_message(msg);
-
- spin_lock(&call->lock);
- }
-
- /* reset the timeout */
- mod_timer(&call->acks_timeout,
- __rxrpc_rtt_based_timeout(call, rxrpc_call_acks_timeout));
-
- spin_unlock(&call->lock);
-
- _leave("");
-} /* end rxrpc_call_resend() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a call
- */
-void rxrpc_call_handle_error(struct rxrpc_call *call, int local, int errno)
-{
- _enter("%p{%u},%d", call, ntohl(call->call_id), errno);
-
- /* if this call is already aborted, then just wake up any waiters */
- if (call->app_call_state == RXRPC_CSTATE_ERROR) {
- call->app_error_func(call);
- }
- else {
- /* tell the app layer what happened */
- spin_lock(&call->lock);
- call->app_call_state = RXRPC_CSTATE_ERROR;
- _state(call);
- if (local)
- call->app_err_state = RXRPC_ESTATE_LOCAL_ERROR;
- else
- call->app_err_state = RXRPC_ESTATE_REMOTE_ERROR;
- call->app_errno = errno;
- call->app_mark = RXRPC_APP_MARK_EOF;
- call->app_read_buf = NULL;
- call->app_async_read = 0;
-
- /* map the error */
- call->app_aemap_func(call);
-
- del_timer_sync(&call->acks_timeout);
- del_timer_sync(&call->rcv_timeout);
- del_timer_sync(&call->ackr_dfr_timo);
-
- spin_unlock(&call->lock);
-
- call->app_error_func(call);
- }
-
- _leave("");
-} /* end rxrpc_call_handle_error() */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
deleted file mode 100644
index a7c929a9fdc..00000000000
--- a/net/rxrpc/connection.c
+++ /dev/null
@@ -1,777 +0,0 @@
-/* connection.c: Rx connection routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_connection_count);
-
-LIST_HEAD(rxrpc_conns);
-DECLARE_RWSEM(rxrpc_conns_sem);
-unsigned long rxrpc_conn_timeout = 60 * 60;
-
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn);
-
-static void __rxrpc_conn_timeout(rxrpc_timer_t *timer)
-{
- struct rxrpc_connection *conn =
- list_entry(timer, struct rxrpc_connection, timeout);
-
- _debug("Rx CONN TIMEOUT [%p{u=%d}]", conn, atomic_read(&conn->usage));
-
- rxrpc_conn_do_timeout(conn);
-}
-
-static const struct rxrpc_timer_ops rxrpc_conn_timer_ops = {
- .timed_out = __rxrpc_conn_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a new connection record
- */
-static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
- struct rxrpc_connection **_conn)
-{
- struct rxrpc_connection *conn;
-
- _enter("%p",peer);
-
- /* allocate and initialise a connection record */
- conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
- if (!conn) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&conn->usage, 1);
-
- INIT_LIST_HEAD(&conn->link);
- INIT_LIST_HEAD(&conn->id_link);
- init_waitqueue_head(&conn->chanwait);
- spin_lock_init(&conn->lock);
- rxrpc_timer_init(&conn->timeout, &rxrpc_conn_timer_ops);
-
- do_gettimeofday(&conn->atime);
- conn->mtu_size = 1024;
- conn->peer = peer;
- conn->trans = peer->trans;
-
- __RXACCT(atomic_inc(&rxrpc_connection_count));
- *_conn = conn;
- _leave(" = 0 (%p)", conn);
-
- return 0;
-} /* end __rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * create a new connection record for outgoing connections
- */
-int rxrpc_create_connection(struct rxrpc_transport *trans,
- __be16 port,
- __be32 addr,
- uint16_t service_id,
- void *security,
- struct rxrpc_connection **_conn)
-{
- struct rxrpc_connection *candidate, *conn;
- struct rxrpc_peer *peer;
- struct list_head *_p;
- __be32 connid;
- int ret;
-
- _enter("%p{%hu},%u,%hu", trans, trans->port, ntohs(port), service_id);
-
- /* get a peer record */
- ret = rxrpc_peer_lookup(trans, addr, &peer);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- /* allocate and initialise a connection record */
- ret = __rxrpc_create_connection(peer, &candidate);
- if (ret < 0) {
- rxrpc_put_peer(peer);
- _leave(" = %d", ret);
- return ret;
- }
-
- /* fill in the specific bits */
- candidate->addr.sin_family = AF_INET;
- candidate->addr.sin_port = port;
- candidate->addr.sin_addr.s_addr = addr;
-
- candidate->in_epoch = rxrpc_epoch;
- candidate->out_epoch = rxrpc_epoch;
- candidate->in_clientflag = 0;
- candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->service_id = htons(service_id);
-
- /* invent a unique connection ID */
- write_lock(&peer->conn_idlock);
-
- try_next_id:
- connid = htonl(peer->conn_idcounter & RXRPC_CIDMASK);
- peer->conn_idcounter += RXRPC_MAXCALLS;
-
- list_for_each(_p, &peer->conn_idlist) {
- conn = list_entry(_p, struct rxrpc_connection, id_link);
- if (connid == conn->conn_id)
- goto try_next_id;
- if (connid > conn->conn_id)
- break;
- }
-
- _debug("selected candidate conn ID %x.%u",
- ntohl(peer->addr.s_addr), ntohl(connid));
-
- candidate->conn_id = connid;
- list_add_tail(&candidate->id_link, _p);
-
- write_unlock(&peer->conn_idlock);
-
- /* attach to peer */
- candidate->peer = peer;
-
- write_lock(&peer->conn_lock);
-
- /* search the peer's transport graveyard list */
- spin_lock(&peer->conn_gylock);
- list_for_each(_p, &peer->conn_graveyard) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == candidate->addr.sin_port &&
- conn->security_ix == candidate->security_ix &&
- conn->service_id == candidate->service_id &&
- conn->in_clientflag == 0)
- goto found_in_graveyard;
- }
- spin_unlock(&peer->conn_gylock);
-
- /* pick the new candidate */
- _debug("created connection: {%08x} [out]", ntohl(candidate->conn_id));
- atomic_inc(&peer->conn_count);
- conn = candidate;
- candidate = NULL;
-
- make_active:
- list_add_tail(&conn->link, &peer->conn_active);
- write_unlock(&peer->conn_lock);
-
- if (candidate) {
- write_lock(&peer->conn_idlock);
- list_del(&candidate->id_link);
- write_unlock(&peer->conn_idlock);
-
- __RXACCT(atomic_dec(&rxrpc_connection_count));
- kfree(candidate);
- }
- else {
- down_write(&rxrpc_conns_sem);
- list_add_tail(&conn->proc_link, &rxrpc_conns);
- up_write(&rxrpc_conns_sem);
- }
-
- *_conn = conn;
- _leave(" = 0 (%p)", conn);
-
- return 0;
-
- /* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
- _debug("resurrecting connection: {%08x} [out]", ntohl(conn->conn_id));
- rxrpc_get_connection(conn);
- rxrpc_krxtimod_del_timer(&conn->timeout);
- list_del_init(&conn->link);
- spin_unlock(&peer->conn_gylock);
- goto make_active;
-} /* end rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * lookup the connection for an incoming packet
- * - create a new connection record for unrecorded incoming connections
- */
-int rxrpc_connection_lookup(struct rxrpc_peer *peer,
- struct rxrpc_message *msg,
- struct rxrpc_connection **_conn)
-{
- struct rxrpc_connection *conn, *candidate = NULL;
- struct list_head *_p;
- struct sk_buff *pkt = msg->pkt;
- int ret, fresh = 0;
- __be32 x_epoch, x_connid;
- __be16 x_port, x_servid;
- __u32 x_secix;
- u8 x_clflag;
-
- _enter("%p{{%hu}},%u,%hu",
- peer,
- peer->trans->port,
- ntohs(pkt->h.uh->source),
- ntohs(msg->hdr.serviceId));
-
- x_port = pkt->h.uh->source;
- x_epoch = msg->hdr.epoch;
- x_clflag = msg->hdr.flags & RXRPC_CLIENT_INITIATED;
- x_connid = htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
- x_servid = msg->hdr.serviceId;
- x_secix = msg->hdr.securityIndex;
-
- /* [common case] search the transport's active list first */
- read_lock(&peer->conn_lock);
- list_for_each(_p, &peer->conn_active) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == x_port &&
- conn->in_epoch == x_epoch &&
- conn->conn_id == x_connid &&
- conn->security_ix == x_secix &&
- conn->service_id == x_servid &&
- conn->in_clientflag == x_clflag)
- goto found_active;
- }
- read_unlock(&peer->conn_lock);
-
- /* [uncommon case] not active
- * - create a candidate for a new record if an inbound connection
- * - only examine the graveyard for an outbound connection
- */
- if (x_clflag) {
- ret = __rxrpc_create_connection(peer, &candidate);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- /* fill in the specifics */
- candidate->addr.sin_family = AF_INET;
- candidate->addr.sin_port = x_port;
- candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
- candidate->in_epoch = x_epoch;
- candidate->out_epoch = x_epoch;
- candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
- candidate->out_clientflag = 0;
- candidate->conn_id = x_connid;
- candidate->service_id = x_servid;
- candidate->security_ix = x_secix;
- }
-
- /* search the active list again, just in case it appeared whilst we
- * were busy */
- write_lock(&peer->conn_lock);
- list_for_each(_p, &peer->conn_active) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == x_port &&
- conn->in_epoch == x_epoch &&
- conn->conn_id == x_connid &&
- conn->security_ix == x_secix &&
- conn->service_id == x_servid &&
- conn->in_clientflag == x_clflag)
- goto found_active_second_chance;
- }
-
- /* search the transport's graveyard list */
- spin_lock(&peer->conn_gylock);
- list_for_each(_p, &peer->conn_graveyard) {
- conn = list_entry(_p, struct rxrpc_connection, link);
- if (conn->addr.sin_port == x_port &&
- conn->in_epoch == x_epoch &&
- conn->conn_id == x_connid &&
- conn->security_ix == x_secix &&
- conn->service_id == x_servid &&
- conn->in_clientflag == x_clflag)
- goto found_in_graveyard;
- }
- spin_unlock(&peer->conn_gylock);
-
- /* outbound connections aren't created here */
- if (!x_clflag) {
- write_unlock(&peer->conn_lock);
- _leave(" = -ENOENT");
- return -ENOENT;
- }
-
- /* we can now add the new candidate to the list */
- _debug("created connection: {%08x} [in]", ntohl(candidate->conn_id));
- rxrpc_get_peer(peer);
- conn = candidate;
- candidate = NULL;
- atomic_inc(&peer->conn_count);
- fresh = 1;
-
- make_active:
- list_add_tail(&conn->link, &peer->conn_active);
-
- success_uwfree:
- write_unlock(&peer->conn_lock);
-
- if (candidate) {
- write_lock(&peer->conn_idlock);
- list_del(&candidate->id_link);
- write_unlock(&peer->conn_idlock);
-
- __RXACCT(atomic_dec(&rxrpc_connection_count));
- kfree(candidate);
- }
-
- if (fresh) {
- down_write(&rxrpc_conns_sem);
- list_add_tail(&conn->proc_link, &rxrpc_conns);
- up_write(&rxrpc_conns_sem);
- }
-
- success:
- *_conn = conn;
- _leave(" = 0 (%p)", conn);
- return 0;
-
- /* handle the connection being found in the active list straight off */
- found_active:
- rxrpc_get_connection(conn);
- read_unlock(&peer->conn_lock);
- goto success;
-
- /* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
- _debug("resurrecting connection: {%08x} [in]", ntohl(conn->conn_id));
- rxrpc_get_peer(peer);
- rxrpc_get_connection(conn);
- rxrpc_krxtimod_del_timer(&conn->timeout);
- list_del_init(&conn->link);
- spin_unlock(&peer->conn_gylock);
- goto make_active;
-
- /* handle finding the connection on the second time through the active
- * list */
- found_active_second_chance:
- rxrpc_get_connection(conn);
- goto success_uwfree;
-
-} /* end rxrpc_connection_lookup() */
-
-/*****************************************************************************/
-/*
- * finish using a connection record
- * - it will be transferred to the peer's connection graveyard when refcount
- * reaches 0
- */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
- struct rxrpc_peer *peer;
-
- if (!conn)
- return;
-
- _enter("%p{u=%d p=%hu}",
- conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
- peer = conn->peer;
- spin_lock(&peer->conn_gylock);
-
- /* sanity check */
- if (atomic_read(&conn->usage) <= 0)
- BUG();
-
- if (likely(!atomic_dec_and_test(&conn->usage))) {
- spin_unlock(&peer->conn_gylock);
- _leave("");
- return;
- }
-
- /* move to graveyard queue */
- _debug("burying connection: {%08x}", ntohl(conn->conn_id));
- list_move_tail(&conn->link, &peer->conn_graveyard);
-
- rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
-
- spin_unlock(&peer->conn_gylock);
-
- rxrpc_put_peer(conn->peer);
-
- _leave(" [killed]");
-} /* end rxrpc_put_connection() */
-
-/*****************************************************************************/
-/*
- * free a connection record
- */
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn)
-{
- struct rxrpc_peer *peer;
-
- _enter("%p{u=%d p=%hu}",
- conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
- peer = conn->peer;
-
- if (atomic_read(&conn->usage) < 0)
- BUG();
-
- /* remove from graveyard if still dead */
- spin_lock(&peer->conn_gylock);
- if (atomic_read(&conn->usage) == 0) {
- list_del_init(&conn->link);
- }
- else {
- conn = NULL;
- }
- spin_unlock(&peer->conn_gylock);
-
- if (!conn) {
- _leave("");
- return; /* resurrected */
- }
-
- _debug("--- Destroying Connection %p{%08x} ---",
- conn, ntohl(conn->conn_id));
-
- down_write(&rxrpc_conns_sem);
- list_del(&conn->proc_link);
- up_write(&rxrpc_conns_sem);
-
- write_lock(&peer->conn_idlock);
- list_del(&conn->id_link);
- write_unlock(&peer->conn_idlock);
-
- __RXACCT(atomic_dec(&rxrpc_connection_count));
- kfree(conn);
-
- /* if the graveyard is now empty, wake up anyone waiting for that */
- if (atomic_dec_and_test(&peer->conn_count))
- wake_up(&peer->conn_gy_waitq);
-
- _leave(" [destroyed]");
-} /* end rxrpc_conn_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all connection records from a peer endpoint
- */
-void rxrpc_conn_clearall(struct rxrpc_peer *peer)
-{
- DECLARE_WAITQUEUE(myself, current);
-
- struct rxrpc_connection *conn;
- int err;
-
- _enter("%p", peer);
-
- /* there shouldn't be any active conns remaining */
- if (!list_empty(&peer->conn_active))
- BUG();
-
- /* manually timeout all conns in the graveyard */
- spin_lock(&peer->conn_gylock);
- while (!list_empty(&peer->conn_graveyard)) {
- conn = list_entry(peer->conn_graveyard.next,
- struct rxrpc_connection, link);
- err = rxrpc_krxtimod_del_timer(&conn->timeout);
- spin_unlock(&peer->conn_gylock);
-
- if (err == 0)
- rxrpc_conn_do_timeout(conn);
-
- spin_lock(&peer->conn_gylock);
- }
- spin_unlock(&peer->conn_gylock);
-
- /* wait for the the conn graveyard to be completely cleared */
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&peer->conn_gy_waitq, &myself);
-
- while (atomic_read(&peer->conn_count) != 0) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- remove_wait_queue(&peer->conn_gy_waitq, &myself);
- set_current_state(TASK_RUNNING);
-
- _leave("");
-} /* end rxrpc_conn_clearall() */
-
-/*****************************************************************************/
-/*
- * allocate and prepare a message for sending out through the transport
- * endpoint
- */
-int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- uint8_t type,
- int dcount,
- struct kvec diov[],
- gfp_t alloc_flags,
- struct rxrpc_message **_msg)
-{
- struct rxrpc_message *msg;
- int loop;
-
- _enter("%p{%d},%p,%u", conn, ntohs(conn->addr.sin_port), call, type);
-
- if (dcount > 3) {
- _leave(" = -EINVAL");
- return -EINVAL;
- }
-
- msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
- if (!msg) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&msg->usage, 1);
-
- INIT_LIST_HEAD(&msg->link);
-
- msg->state = RXRPC_MSG_PREPARED;
-
- msg->hdr.epoch = conn->out_epoch;
- msg->hdr.cid = conn->conn_id | (call ? call->chan_ix : 0);
- msg->hdr.callNumber = call ? call->call_id : 0;
- msg->hdr.type = type;
- msg->hdr.flags = conn->out_clientflag;
- msg->hdr.securityIndex = conn->security_ix;
- msg->hdr.serviceId = conn->service_id;
-
- /* generate sequence numbers for data packets */
- if (call) {
- switch (type) {
- case RXRPC_PACKET_TYPE_DATA:
- msg->seq = ++call->snd_seq_count;
- msg->hdr.seq = htonl(msg->seq);
- break;
- case RXRPC_PACKET_TYPE_ACK:
- /* ACK sequence numbers are complicated. The following
- * may be wrong:
- * - jumbo packet ACKs should have a seq number
- * - normal ACKs should not
- */
- default:
- break;
- }
- }
-
- msg->dcount = dcount + 1;
- msg->dsize = sizeof(msg->hdr);
- msg->data[0].iov_len = sizeof(msg->hdr);
- msg->data[0].iov_base = &msg->hdr;
-
- for (loop=0; loop < dcount; loop++) {
- msg->dsize += diov[loop].iov_len;
- msg->data[loop+1].iov_len = diov[loop].iov_len;
- msg->data[loop+1].iov_base = diov[loop].iov_base;
- }
-
- __RXACCT(atomic_inc(&rxrpc_message_count));
- *_msg = msg;
- _leave(" = 0 (%p) #%d", msg, atomic_read(&rxrpc_message_count));
- return 0;
-} /* end rxrpc_conn_newmsg() */
-
-/*****************************************************************************/
-/*
- * free a message
- */
-void __rxrpc_put_message(struct rxrpc_message *msg)
-{
- int loop;
-
- _enter("%p #%d", msg, atomic_read(&rxrpc_message_count));
-
- if (msg->pkt)
- kfree_skb(msg->pkt);
- rxrpc_put_connection(msg->conn);
-
- for (loop = 0; loop < 8; loop++)
- if (test_bit(loop, &msg->dfree))
- kfree(msg->data[loop].iov_base);
-
- __RXACCT(atomic_dec(&rxrpc_message_count));
- kfree(msg);
-
- _leave("");
-} /* end __rxrpc_put_message() */
-
-/*****************************************************************************/
-/*
- * send a message out through the transport endpoint
- */
-int rxrpc_conn_sendmsg(struct rxrpc_connection *conn,
- struct rxrpc_message *msg)
-{
- struct msghdr msghdr;
- int ret;
-
- _enter("%p{%d}", conn, ntohs(conn->addr.sin_port));
-
- /* fill in some fields in the header */
- spin_lock(&conn->lock);
- msg->hdr.serial = htonl(++conn->serial_counter);
- msg->rttdone = 0;
- spin_unlock(&conn->lock);
-
- /* set up the message to be transmitted */
- msghdr.msg_name = &conn->addr;
- msghdr.msg_namelen = sizeof(conn->addr);
- msghdr.msg_control = NULL;
- msghdr.msg_controllen = 0;
- msghdr.msg_flags = MSG_CONFIRM | MSG_DONTWAIT;
-
- _net("Sending message type %d of %Zd bytes to %08x:%d",
- msg->hdr.type,
- msg->dsize,
- ntohl(conn->addr.sin_addr.s_addr),
- ntohs(conn->addr.sin_port));
-
- /* send the message */
- ret = kernel_sendmsg(conn->trans->socket, &msghdr,
- msg->data, msg->dcount, msg->dsize);
- if (ret < 0) {
- msg->state = RXRPC_MSG_ERROR;
- } else {
- msg->state = RXRPC_MSG_SENT;
- ret = 0;
-
- spin_lock(&conn->lock);
- do_gettimeofday(&conn->atime);
- msg->stamp = conn->atime;
- spin_unlock(&conn->lock);
- }
-
- _leave(" = %d", ret);
-
- return ret;
-} /* end rxrpc_conn_sendmsg() */
-
-/*****************************************************************************/
-/*
- * deal with a subsequent call packet
- */
-int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- struct rxrpc_message *msg)
-{
- struct rxrpc_message *pmsg;
- struct dst_entry *dst;
- struct list_head *_p;
- unsigned cix, seq;
- int ret = 0;
-
- _enter("%p,%p,%p", conn, call, msg);
-
- if (!call) {
- cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
- spin_lock(&conn->lock);
- call = conn->channels[cix];
-
- if (!call || call->call_id != msg->hdr.callNumber) {
- spin_unlock(&conn->lock);
- rxrpc_trans_immediate_abort(conn->trans, msg, -ENOENT);
- goto out;
- }
- else {
- rxrpc_get_call(call);
- spin_unlock(&conn->lock);
- }
- }
- else {
- rxrpc_get_call(call);
- }
-
- _proto("Received packet %%%u [%u] on call %hu:%u:%u",
- ntohl(msg->hdr.serial),
- ntohl(msg->hdr.seq),
- ntohs(msg->hdr.serviceId),
- ntohl(conn->conn_id),
- ntohl(call->call_id));
-
- call->pkt_rcv_count++;
-
- dst = msg->pkt->dst;
- if (dst && dst->dev)
- conn->peer->if_mtu =
- dst->dev->mtu - dst->dev->hard_header_len;
-
- /* queue on the call in seq order */
- rxrpc_get_message(msg);
- seq = msg->seq;
-
- spin_lock(&call->lock);
- list_for_each(_p, &call->rcv_receiveq) {
- pmsg = list_entry(_p, struct rxrpc_message, link);
- if (pmsg->seq > seq)
- break;
- }
- list_add_tail(&msg->link, _p);
-
- /* reset the activity timeout */
- call->flags |= RXRPC_CALL_RCV_PKT;
- mod_timer(&call->rcv_timeout,jiffies + rxrpc_call_rcv_timeout * HZ);
-
- spin_unlock(&call->lock);
-
- rxrpc_krxiod_queue_call(call);
-
- rxrpc_put_call(call);
- out:
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_conn_receive_call_packet() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a connection
- */
-void rxrpc_conn_handle_error(struct rxrpc_connection *conn,
- int local, int errno)
-{
- struct rxrpc_call *calls[4];
- int loop;
-
- _enter("%p{%d},%d", conn, ntohs(conn->addr.sin_port), errno);
-
- /* get a ref to all my calls in one go */
- memset(calls, 0, sizeof(calls));
- spin_lock(&conn->lock);
-
- for (loop = 3; loop >= 0; loop--) {
- if (conn->channels[loop]) {
- calls[loop] = conn->channels[loop];
- rxrpc_get_call(calls[loop]);
- }
- }
-
- spin_unlock(&conn->lock);
-
- /* now kick them all */
- for (loop = 3; loop >= 0; loop--) {
- if (calls[loop]) {
- rxrpc_call_handle_error(calls[loop], local, errno);
- rxrpc_put_call(calls[loop]);
- }
- }
-
- _leave("");
-} /* end rxrpc_conn_handle_error() */
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
deleted file mode 100644
index cc0c5795a10..00000000000
--- a/net/rxrpc/internal.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* internal.h: internal Rx RPC stuff
- *
- * Copyright (c) 2002 David Howells (dhowells@redhat.com).
- */
-
-#ifndef RXRPC_INTERNAL_H
-#define RXRPC_INTERNAL_H
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-
-/*
- * debug accounting
- */
-#if 1
-#define __RXACCT_DECL(X) X
-#define __RXACCT(X) do { X; } while(0)
-#else
-#define __RXACCT_DECL(X)
-#define __RXACCT(X) do { } while(0)
-#endif
-
-__RXACCT_DECL(extern atomic_t rxrpc_transport_count);
-__RXACCT_DECL(extern atomic_t rxrpc_peer_count);
-__RXACCT_DECL(extern atomic_t rxrpc_connection_count);
-__RXACCT_DECL(extern atomic_t rxrpc_call_count);
-__RXACCT_DECL(extern atomic_t rxrpc_message_count);
-
-/*
- * debug tracing
- */
-#define kenter(FMT, a...) printk("==> %s("FMT")\n",__FUNCTION__ , ##a)
-#define kleave(FMT, a...) printk("<== %s()"FMT"\n",__FUNCTION__ , ##a)
-#define kdebug(FMT, a...) printk(" "FMT"\n" , ##a)
-#define kproto(FMT, a...) printk("### "FMT"\n" , ##a)
-#define knet(FMT, a...) printk(" "FMT"\n" , ##a)
-
-#if 0
-#define _enter(FMT, a...) kenter(FMT , ##a)
-#define _leave(FMT, a...) kleave(FMT , ##a)
-#define _debug(FMT, a...) kdebug(FMT , ##a)
-#define _proto(FMT, a...) kproto(FMT , ##a)
-#define _net(FMT, a...) knet(FMT , ##a)
-#else
-#define _enter(FMT, a...) do { if (rxrpc_ktrace) kenter(FMT , ##a); } while(0)
-#define _leave(FMT, a...) do { if (rxrpc_ktrace) kleave(FMT , ##a); } while(0)
-#define _debug(FMT, a...) do { if (rxrpc_kdebug) kdebug(FMT , ##a); } while(0)
-#define _proto(FMT, a...) do { if (rxrpc_kproto) kproto(FMT , ##a); } while(0)
-#define _net(FMT, a...) do { if (rxrpc_knet) knet (FMT , ##a); } while(0)
-#endif
-
-static inline void rxrpc_discard_my_signals(void)
-{
- while (signal_pending(current)) {
- siginfo_t sinfo;
-
- spin_lock_irq(&current->sighand->siglock);
- dequeue_signal(current, &current->blocked, &sinfo);
- spin_unlock_irq(&current->sighand->siglock);
- }
-}
-
-/*
- * call.c
- */
-extern struct list_head rxrpc_calls;
-extern struct rw_semaphore rxrpc_calls_sem;
-
-/*
- * connection.c
- */
-extern struct list_head rxrpc_conns;
-extern struct rw_semaphore rxrpc_conns_sem;
-extern unsigned long rxrpc_conn_timeout;
-
-extern void rxrpc_conn_clearall(struct rxrpc_peer *peer);
-
-/*
- * peer.c
- */
-extern struct list_head rxrpc_peers;
-extern struct rw_semaphore rxrpc_peers_sem;
-extern unsigned long rxrpc_peer_timeout;
-
-extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
- struct rxrpc_message *msg,
- struct rxrpc_message *resp);
-
-extern void rxrpc_peer_clearall(struct rxrpc_transport *trans);
-
-
-/*
- * proc.c
- */
-#ifdef CONFIG_PROC_FS
-extern int rxrpc_proc_init(void);
-extern void rxrpc_proc_cleanup(void);
-#endif
-
-/*
- * transport.c
- */
-extern struct list_head rxrpc_proc_transports;
-extern struct rw_semaphore rxrpc_proc_transports_sem;
-
-#endif /* RXRPC_INTERNAL_H */
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
deleted file mode 100644
index bbbcd6c2404..00000000000
--- a/net/rxrpc/krxiod.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* krxiod.c: Rx I/O daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/freezer.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxiod_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxiod_dead);
-
-static atomic_t rxrpc_krxiod_qcount = ATOMIC_INIT(0);
-
-static LIST_HEAD(rxrpc_krxiod_transportq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_transportq_lock);
-
-static LIST_HEAD(rxrpc_krxiod_callq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_callq_lock);
-
-static volatile int rxrpc_krxiod_die;
-
-/*****************************************************************************/
-/*
- * Rx I/O daemon
- */
-static int rxrpc_krxiod(void *arg)
-{
- DECLARE_WAITQUEUE(krxiod,current);
-
- printk("Started krxiod %d\n",current->pid);
-
- daemonize("krxiod");
-
- /* loop around waiting for work to do */
- do {
- /* wait for work or to be told to exit */
- _debug("### Begin Wait");
- if (!atomic_read(&rxrpc_krxiod_qcount)) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- add_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&rxrpc_krxiod_qcount) ||
- rxrpc_krxiod_die ||
- signal_pending(current))
- break;
-
- schedule();
- }
-
- remove_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
- set_current_state(TASK_RUNNING);
- }
- _debug("### End Wait");
-
- /* do work if been given some to do */
- _debug("### Begin Work");
-
- /* see if there's a transport in need of attention */
- if (!list_empty(&rxrpc_krxiod_transportq)) {
- struct rxrpc_transport *trans = NULL;
-
- spin_lock_irq(&rxrpc_krxiod_transportq_lock);
-
- if (!list_empty(&rxrpc_krxiod_transportq)) {
- trans = list_entry(
- rxrpc_krxiod_transportq.next,
- struct rxrpc_transport,
- krxiodq_link);
-
- list_del_init(&trans->krxiodq_link);
- atomic_dec(&rxrpc_krxiod_qcount);
-
- /* make sure it hasn't gone away and doesn't go
- * away */
- if (atomic_read(&trans->usage)>0)
- rxrpc_get_transport(trans);
- else
- trans = NULL;
- }
-
- spin_unlock_irq(&rxrpc_krxiod_transportq_lock);
-
- if (trans) {
- rxrpc_trans_receive_packet(trans);
- rxrpc_put_transport(trans);
- }
- }
-
- /* see if there's a call in need of attention */
- if (!list_empty(&rxrpc_krxiod_callq)) {
- struct rxrpc_call *call = NULL;
-
- spin_lock_irq(&rxrpc_krxiod_callq_lock);
-
- if (!list_empty(&rxrpc_krxiod_callq)) {
- call = list_entry(rxrpc_krxiod_callq.next,
- struct rxrpc_call,
- rcv_krxiodq_lk);
- list_del_init(&call->rcv_krxiodq_lk);
- atomic_dec(&rxrpc_krxiod_qcount);
-
- /* make sure it hasn't gone away and doesn't go
- * away */
- if (atomic_read(&call->usage) > 0) {
- _debug("@@@ KRXIOD"
- " Begin Attend Call %p", call);
- rxrpc_get_call(call);
- }
- else {
- call = NULL;
- }
- }
-
- spin_unlock_irq(&rxrpc_krxiod_callq_lock);
-
- if (call) {
- rxrpc_call_do_stuff(call);
- rxrpc_put_call(call);
- _debug("@@@ KRXIOD End Attend Call %p", call);
- }
- }
-
- _debug("### End Work");
-
- try_to_freeze();
-
- /* discard pending signals */
- rxrpc_discard_my_signals();
-
- } while (!rxrpc_krxiod_die);
-
- /* and that's all */
- complete_and_exit(&rxrpc_krxiod_dead, 0);
-
-} /* end rxrpc_krxiod() */
-
-/*****************************************************************************/
-/*
- * start up a krxiod daemon
- */
-int __init rxrpc_krxiod_init(void)
-{
- return kernel_thread(rxrpc_krxiod, NULL, 0);
-
-} /* end rxrpc_krxiod_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxiod daemon and wait for it to complete
- */
-void rxrpc_krxiod_kill(void)
-{
- rxrpc_krxiod_die = 1;
- wake_up_all(&rxrpc_krxiod_sleepq);
- wait_for_completion(&rxrpc_krxiod_dead);
-
-} /* end rxrpc_krxiod_kill() */
-
-/*****************************************************************************/
-/*
- * queue a transport for attention by krxiod
- */
-void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans)
-{
- unsigned long flags;
-
- _enter("");
-
- if (list_empty(&trans->krxiodq_link)) {
- spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
-
- if (list_empty(&trans->krxiodq_link)) {
- if (atomic_read(&trans->usage) > 0) {
- list_add_tail(&trans->krxiodq_link,
- &rxrpc_krxiod_transportq);
- atomic_inc(&rxrpc_krxiod_qcount);
- }
- }
-
- spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
- wake_up_all(&rxrpc_krxiod_sleepq);
- }
-
- _leave("");
-
-} /* end rxrpc_krxiod_queue_transport() */
-
-/*****************************************************************************/
-/*
- * dequeue a transport from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans)
-{
- unsigned long flags;
-
- _enter("");
-
- spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
- if (!list_empty(&trans->krxiodq_link)) {
- list_del_init(&trans->krxiodq_link);
- atomic_dec(&rxrpc_krxiod_qcount);
- }
- spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
-
- _leave("");
-
-} /* end rxrpc_krxiod_dequeue_transport() */
-
-/*****************************************************************************/
-/*
- * queue a call for attention by krxiod
- */
-void rxrpc_krxiod_queue_call(struct rxrpc_call *call)
-{
- unsigned long flags;
-
- if (list_empty(&call->rcv_krxiodq_lk)) {
- spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
- if (atomic_read(&call->usage) > 0) {
- list_add_tail(&call->rcv_krxiodq_lk,
- &rxrpc_krxiod_callq);
- atomic_inc(&rxrpc_krxiod_qcount);
- }
- spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
- }
- wake_up_all(&rxrpc_krxiod_sleepq);
-
-} /* end rxrpc_krxiod_queue_call() */
-
-/*****************************************************************************/
-/*
- * dequeue a call from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
- if (!list_empty(&call->rcv_krxiodq_lk)) {
- list_del_init(&call->rcv_krxiodq_lk);
- atomic_dec(&rxrpc_krxiod_qcount);
- }
- spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
-
-} /* end rxrpc_krxiod_dequeue_call() */
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
deleted file mode 100644
index 9a1e7f5e034..00000000000
--- a/net/rxrpc/krxsecd.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/* krxsecd.c: Rx security daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This daemon deals with:
- * - consulting the application as to whether inbound peers and calls should be authorised
- * - generating security challenges for inbound connections
- * - responding to security challenges on outbound connections
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/message.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <linux/freezer.h>
-#include <net/sock.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxsecd_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxsecd_dead);
-static volatile int rxrpc_krxsecd_die;
-
-static atomic_t rxrpc_krxsecd_qcount;
-
-/* queue of unprocessed inbound messages with seqno #1 and
- * RXRPC_CLIENT_INITIATED flag set */
-static LIST_HEAD(rxrpc_krxsecd_initmsgq);
-static DEFINE_SPINLOCK(rxrpc_krxsecd_initmsgq_lock);
-
-static void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg);
-
-/*****************************************************************************/
-/*
- * Rx security daemon
- */
-static int rxrpc_krxsecd(void *arg)
-{
- DECLARE_WAITQUEUE(krxsecd, current);
-
- int die;
-
- printk("Started krxsecd %d\n", current->pid);
-
- daemonize("krxsecd");
-
- /* loop around waiting for work to do */
- do {
- /* wait for work or to be told to exit */
- _debug("### Begin Wait");
- if (!atomic_read(&rxrpc_krxsecd_qcount)) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- add_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&rxrpc_krxsecd_qcount) ||
- rxrpc_krxsecd_die ||
- signal_pending(current))
- break;
-
- schedule();
- }
-
- remove_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
- set_current_state(TASK_RUNNING);
- }
- die = rxrpc_krxsecd_die;
- _debug("### End Wait");
-
- /* see if there're incoming calls in need of authenticating */
- _debug("### Begin Inbound Calls");
-
- if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
- struct rxrpc_message *msg = NULL;
-
- spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
- if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
- msg = list_entry(rxrpc_krxsecd_initmsgq.next,
- struct rxrpc_message, link);
- list_del_init(&msg->link);
- atomic_dec(&rxrpc_krxsecd_qcount);
- }
-
- spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
- if (msg) {
- rxrpc_krxsecd_process_incoming_call(msg);
- rxrpc_put_message(msg);
- }
- }
-
- _debug("### End Inbound Calls");
-
- try_to_freeze();
-
- /* discard pending signals */
- rxrpc_discard_my_signals();
-
- } while (!die);
-
- /* and that's all */
- complete_and_exit(&rxrpc_krxsecd_dead, 0);
-
-} /* end rxrpc_krxsecd() */
-
-/*****************************************************************************/
-/*
- * start up a krxsecd daemon
- */
-int __init rxrpc_krxsecd_init(void)
-{
- return kernel_thread(rxrpc_krxsecd, NULL, 0);
-
-} /* end rxrpc_krxsecd_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxsecd daemon and wait for it to complete
- */
-void rxrpc_krxsecd_kill(void)
-{
- rxrpc_krxsecd_die = 1;
- wake_up_all(&rxrpc_krxsecd_sleepq);
- wait_for_completion(&rxrpc_krxsecd_dead);
-
-} /* end rxrpc_krxsecd_kill() */
-
-/*****************************************************************************/
-/*
- * clear all pending incoming calls for the specified transport
- */
-void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
-{
- LIST_HEAD(tmp);
-
- struct rxrpc_message *msg;
- struct list_head *_p, *_n;
-
- _enter("%p",trans);
-
- /* move all the messages for this transport onto a temp list */
- spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
- list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
- msg = list_entry(_p, struct rxrpc_message, link);
- if (msg->trans == trans) {
- list_move_tail(&msg->link, &tmp);
- atomic_dec(&rxrpc_krxsecd_qcount);
- }
- }
-
- spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
- /* zap all messages on the temp list */
- while (!list_empty(&tmp)) {
- msg = list_entry(tmp.next, struct rxrpc_message, link);
- list_del_init(&msg->link);
- rxrpc_put_message(msg);
- }
-
- _leave("");
-} /* end rxrpc_krxsecd_clear_transport() */
-
-/*****************************************************************************/
-/*
- * queue a message on the incoming calls list
- */
-void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg)
-{
- _enter("%p", msg);
-
- /* queue for processing by krxsecd */
- spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
- if (!rxrpc_krxsecd_die) {
- rxrpc_get_message(msg);
- list_add_tail(&msg->link, &rxrpc_krxsecd_initmsgq);
- atomic_inc(&rxrpc_krxsecd_qcount);
- }
-
- spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
- wake_up(&rxrpc_krxsecd_sleepq);
-
- _leave("");
-} /* end rxrpc_krxsecd_queue_incoming_call() */
-
-/*****************************************************************************/
-/*
- * process the initial message of an incoming call
- */
-void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg)
-{
- struct rxrpc_transport *trans = msg->trans;
- struct rxrpc_service *srv;
- struct rxrpc_call *call;
- struct list_head *_p;
- unsigned short sid;
- int ret;
-
- _enter("%p{tr=%p}", msg, trans);
-
- ret = rxrpc_incoming_call(msg->conn, msg, &call);
- if (ret < 0)
- goto out;
-
- /* find the matching service on the transport */
- sid = ntohs(msg->hdr.serviceId);
- srv = NULL;
-
- spin_lock(&trans->lock);
- list_for_each(_p, &trans->services) {
- srv = list_entry(_p, struct rxrpc_service, link);
- if (srv->service_id == sid && try_module_get(srv->owner)) {
- /* found a match (made sure it won't vanish) */
- _debug("found service '%s'", srv->name);
- call->owner = srv->owner;
- break;
- }
- }
- spin_unlock(&trans->lock);
-
- /* report the new connection
- * - the func must inc the call's usage count to keep it
- */
- ret = -ENOENT;
- if (_p != &trans->services) {
- /* attempt to accept the call */
- call->conn->service = srv;
- call->app_attn_func = srv->attn_func;
- call->app_error_func = srv->error_func;
- call->app_aemap_func = srv->aemap_func;
-
- ret = srv->new_call(call);
-
- /* send an abort if an error occurred */
- if (ret < 0) {
- rxrpc_call_abort(call, ret);
- }
- else {
- /* formally receive and ACK the new packet */
- ret = rxrpc_conn_receive_call_packet(call->conn,
- call, msg);
- }
- }
-
- rxrpc_put_call(call);
- out:
- if (ret < 0)
- rxrpc_trans_immediate_abort(trans, msg, ret);
-
- _leave(" (%d)", ret);
-} /* end rxrpc_krxsecd_process_incoming_call() */
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
deleted file mode 100644
index 9a9b6132dba..00000000000
--- a/net/rxrpc/krxtimod.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/* krxtimod.c: RXRPC timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxtimod.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-static DECLARE_COMPLETION(krxtimod_alive);
-static DECLARE_COMPLETION(krxtimod_dead);
-static DECLARE_WAIT_QUEUE_HEAD(krxtimod_sleepq);
-static int krxtimod_die;
-
-static LIST_HEAD(krxtimod_list);
-static DEFINE_SPINLOCK(krxtimod_lock);
-
-static int krxtimod(void *arg);
-
-/*****************************************************************************/
-/*
- * start the timeout daemon
- */
-int rxrpc_krxtimod_start(void)
-{
- int ret;
-
- ret = kernel_thread(krxtimod, NULL, 0);
- if (ret < 0)
- return ret;
-
- wait_for_completion(&krxtimod_alive);
-
- return ret;
-} /* end rxrpc_krxtimod_start() */
-
-/*****************************************************************************/
-/*
- * stop the timeout daemon
- */
-void rxrpc_krxtimod_kill(void)
-{
- /* get rid of my daemon */
- krxtimod_die = 1;
- wake_up(&krxtimod_sleepq);
- wait_for_completion(&krxtimod_dead);
-
-} /* end rxrpc_krxtimod_kill() */
-
-/*****************************************************************************/
-/*
- * timeout processing daemon
- */
-static int krxtimod(void *arg)
-{
- DECLARE_WAITQUEUE(myself, current);
-
- rxrpc_timer_t *timer;
-
- printk("Started krxtimod %d\n", current->pid);
-
- daemonize("krxtimod");
-
- complete(&krxtimod_alive);
-
- /* loop around looking for things to attend to */
- loop:
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&krxtimod_sleepq, &myself);
-
- for (;;) {
- unsigned long jif;
- long timeout;
-
- /* deal with the server being asked to die */
- if (krxtimod_die) {
- remove_wait_queue(&krxtimod_sleepq, &myself);
- _leave("");
- complete_and_exit(&krxtimod_dead, 0);
- }
-
- try_to_freeze();
-
- /* discard pending signals */
- rxrpc_discard_my_signals();
-
- /* work out the time to elapse before the next event */
- spin_lock(&krxtimod_lock);
- if (list_empty(&krxtimod_list)) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- }
- else {
- timer = list_entry(krxtimod_list.next,
- rxrpc_timer_t, link);
- timeout = timer->timo_jif;
- jif = jiffies;
-
- if (time_before_eq((unsigned long) timeout, jif))
- goto immediate;
-
- else {
- timeout = (long) timeout - (long) jiffies;
- }
- }
- spin_unlock(&krxtimod_lock);
-
- schedule_timeout(timeout);
-
- set_current_state(TASK_INTERRUPTIBLE);
- }
-
- /* the thing on the front of the queue needs processing
- * - we come here with the lock held and timer pointing to the expired
- * entry
- */
- immediate:
- remove_wait_queue(&krxtimod_sleepq, &myself);
- set_current_state(TASK_RUNNING);
-
- _debug("@@@ Begin Timeout of %p", timer);
-
- /* dequeue the timer */
- list_del_init(&timer->link);
- spin_unlock(&krxtimod_lock);
-
- /* call the timeout function */
- timer->ops->timed_out(timer);
-
- _debug("@@@ End Timeout");
- goto loop;
-
-} /* end krxtimod() */
-
-/*****************************************************************************/
-/*
- * (re-)queue a timer
- */
-void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout)
-{
- struct list_head *_p;
- rxrpc_timer_t *ptimer;
-
- _enter("%p,%lu", timer, timeout);
-
- spin_lock(&krxtimod_lock);
-
- list_del(&timer->link);
-
- /* the timer was deferred or reset - put it back in the queue at the
- * right place */
- timer->timo_jif = jiffies + timeout;
-
- list_for_each(_p, &krxtimod_list) {
- ptimer = list_entry(_p, rxrpc_timer_t, link);
- if (time_before(timer->timo_jif, ptimer->timo_jif))
- break;
- }
-
- list_add_tail(&timer->link, _p); /* insert before stopping point */
-
- spin_unlock(&krxtimod_lock);
-
- wake_up(&krxtimod_sleepq);
-
- _leave("");
-} /* end rxrpc_krxtimod_add_timer() */
-
-/*****************************************************************************/
-/*
- * dequeue a timer
- * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
- */
-int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer)
-{
- int ret = 0;
-
- _enter("%p", timer);
-
- spin_lock(&krxtimod_lock);
-
- if (list_empty(&timer->link))
- ret = -ENOENT;
- else
- list_del_init(&timer->link);
-
- spin_unlock(&krxtimod_lock);
-
- wake_up(&krxtimod_sleepq);
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_krxtimod_del_timer() */
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
deleted file mode 100644
index baec1f7fd8b..00000000000
--- a/net/rxrpc/main.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* main.c: Rx RPC interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/krxtimod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-MODULE_DESCRIPTION("Rx RPC implementation");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
-__be32 rxrpc_epoch;
-
-/*****************************************************************************/
-/*
- * initialise the Rx module
- */
-static int __init rxrpc_initialise(void)
-{
- int ret;
-
- /* my epoch value */
- rxrpc_epoch = htonl(xtime.tv_sec);
-
- /* register the /proc interface */
-#ifdef CONFIG_PROC_FS
- ret = rxrpc_proc_init();
- if (ret<0)
- return ret;
-#endif
-
- /* register the sysctl files */
-#ifdef CONFIG_SYSCTL
- ret = rxrpc_sysctl_init();
- if (ret<0)
- goto error_proc;
-#endif
-
- /* start the krxtimod daemon */
- ret = rxrpc_krxtimod_start();
- if (ret<0)
- goto error_sysctl;
-
- /* start the krxiod daemon */
- ret = rxrpc_krxiod_init();
- if (ret<0)
- goto error_krxtimod;
-
- /* start the krxsecd daemon */
- ret = rxrpc_krxsecd_init();
- if (ret<0)
- goto error_krxiod;
-
- kdebug("\n\n");
-
- return 0;
-
- error_krxiod:
- rxrpc_krxiod_kill();
- error_krxtimod:
- rxrpc_krxtimod_kill();
- error_sysctl:
-#ifdef CONFIG_SYSCTL
- rxrpc_sysctl_cleanup();
- error_proc:
-#endif
-#ifdef CONFIG_PROC_FS
- rxrpc_proc_cleanup();
-#endif
- return ret;
-} /* end rxrpc_initialise() */
-
-module_init(rxrpc_initialise);
-
-/*****************************************************************************/
-/*
- * clean up the Rx module
- */
-static void __exit rxrpc_cleanup(void)
-{
- kenter("");
-
- __RXACCT(printk("Outstanding Messages : %d\n",
- atomic_read(&rxrpc_message_count)));
- __RXACCT(printk("Outstanding Calls : %d\n",
- atomic_read(&rxrpc_call_count)));
- __RXACCT(printk("Outstanding Connections: %d\n",
- atomic_read(&rxrpc_connection_count)));
- __RXACCT(printk("Outstanding Peers : %d\n",
- atomic_read(&rxrpc_peer_count)));
- __RXACCT(printk("Outstanding Transports : %d\n",
- atomic_read(&rxrpc_transport_count)));
-
- rxrpc_krxsecd_kill();
- rxrpc_krxiod_kill();
- rxrpc_krxtimod_kill();
-#ifdef CONFIG_SYSCTL
- rxrpc_sysctl_cleanup();
-#endif
-#ifdef CONFIG_PROC_FS
- rxrpc_proc_cleanup();
-#endif
-
- __RXACCT(printk("Outstanding Messages : %d\n",
- atomic_read(&rxrpc_message_count)));
- __RXACCT(printk("Outstanding Calls : %d\n",
- atomic_read(&rxrpc_call_count)));
- __RXACCT(printk("Outstanding Connections: %d\n",
- atomic_read(&rxrpc_connection_count)));
- __RXACCT(printk("Outstanding Peers : %d\n",
- atomic_read(&rxrpc_peer_count)));
- __RXACCT(printk("Outstanding Transports : %d\n",
- atomic_read(&rxrpc_transport_count)));
-
- kleave("");
-} /* end rxrpc_cleanup() */
-
-module_exit(rxrpc_cleanup);
-
-/*****************************************************************************/
-/*
- * clear the dead space between task_struct and kernel stack
- * - called by supplying -finstrument-functions to gcc
- */
-#if 0
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-{
- asm volatile(" movl %%esp,%%edi \n"
- " andl %0,%%edi \n"
- " addl %1,%%edi \n"
- " movl %%esp,%%ecx \n"
- " subl %%edi,%%ecx \n"
- " shrl $2,%%ecx \n"
- " movl $0xedededed,%%eax \n"
- " rep stosl \n"
- :
- : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
- : "eax", "ecx", "edi", "memory", "cc"
- );
-}
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-{
- asm volatile(" movl %%esp,%%edi \n"
- " andl %0,%%edi \n"
- " addl %1,%%edi \n"
- " movl %%esp,%%ecx \n"
- " subl %%edi,%%ecx \n"
- " shrl $2,%%ecx \n"
- " movl $0xdadadada,%%eax \n"
- " rep stosl \n"
- :
- : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
- : "eax", "ecx", "edi", "memory", "cc"
- );
-}
-#endif
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
deleted file mode 100644
index 8a275157a3b..00000000000
--- a/net/rxrpc/peer.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/* peer.c: Rx RPC peer management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include <asm/div64.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_peer_count);
-LIST_HEAD(rxrpc_peers);
-DECLARE_RWSEM(rxrpc_peers_sem);
-unsigned long rxrpc_peer_timeout = 12 * 60 * 60;
-
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer);
-
-static void __rxrpc_peer_timeout(rxrpc_timer_t *timer)
-{
- struct rxrpc_peer *peer =
- list_entry(timer, struct rxrpc_peer, timeout);
-
- _debug("Rx PEER TIMEOUT [%p{u=%d}]", peer, atomic_read(&peer->usage));
-
- rxrpc_peer_do_timeout(peer);
-}
-
-static const struct rxrpc_timer_ops rxrpc_peer_timer_ops = {
- .timed_out = __rxrpc_peer_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a peer record
- */
-static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
- struct rxrpc_peer **_peer)
-{
- struct rxrpc_peer *peer;
-
- _enter("%p,%08x", trans, ntohl(addr));
-
- /* allocate and initialise a peer record */
- peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
- if (!peer) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&peer->usage, 1);
-
- INIT_LIST_HEAD(&peer->link);
- INIT_LIST_HEAD(&peer->proc_link);
- INIT_LIST_HEAD(&peer->conn_idlist);
- INIT_LIST_HEAD(&peer->conn_active);
- INIT_LIST_HEAD(&peer->conn_graveyard);
- spin_lock_init(&peer->conn_gylock);
- init_waitqueue_head(&peer->conn_gy_waitq);
- rwlock_init(&peer->conn_idlock);
- rwlock_init(&peer->conn_lock);
- atomic_set(&peer->conn_count, 0);
- spin_lock_init(&peer->lock);
- rxrpc_timer_init(&peer->timeout, &rxrpc_peer_timer_ops);
-
- peer->addr.s_addr = addr;
-
- peer->trans = trans;
- peer->ops = trans->peer_ops;
-
- __RXACCT(atomic_inc(&rxrpc_peer_count));
- *_peer = peer;
- _leave(" = 0 (%p)", peer);
-
- return 0;
-} /* end __rxrpc_create_peer() */
-
-/*****************************************************************************/
-/*
- * find a peer record on the specified transport
- * - returns (if successful) with peer record usage incremented
- * - resurrects it from the graveyard if found there
- */
-int rxrpc_peer_lookup(struct rxrpc_transport *trans, __be32 addr,
- struct rxrpc_peer **_peer)
-{
- struct rxrpc_peer *peer, *candidate = NULL;
- struct list_head *_p;
- int ret;
-
- _enter("%p{%hu},%08x", trans, trans->port, ntohl(addr));
-
- /* [common case] search the transport's active list first */
- read_lock(&trans->peer_lock);
- list_for_each(_p, &trans->peer_active) {
- peer = list_entry(_p, struct rxrpc_peer, link);
- if (peer->addr.s_addr == addr)
- goto found_active;
- }
- read_unlock(&trans->peer_lock);
-
- /* [uncommon case] not active - create a candidate for a new record */
- ret = __rxrpc_create_peer(trans, addr, &candidate);
- if (ret < 0) {
- _leave(" = %d", ret);
- return ret;
- }
-
- /* search the active list again, just in case it appeared whilst we
- * were busy */
- write_lock(&trans->peer_lock);
- list_for_each(_p, &trans->peer_active) {
- peer = list_entry(_p, struct rxrpc_peer, link);
- if (peer->addr.s_addr == addr)
- goto found_active_second_chance;
- }
-
- /* search the transport's graveyard list */
- spin_lock(&trans->peer_gylock);
- list_for_each(_p, &trans->peer_graveyard) {
- peer = list_entry(_p, struct rxrpc_peer, link);
- if (peer->addr.s_addr == addr)
- goto found_in_graveyard;
- }
- spin_unlock(&trans->peer_gylock);
-
- /* we can now add the new candidate to the list
- * - tell the application layer that this peer has been added
- */
- rxrpc_get_transport(trans);
- peer = candidate;
- candidate = NULL;
-
- if (peer->ops && peer->ops->adding) {
- ret = peer->ops->adding(peer);
- if (ret < 0) {
- write_unlock(&trans->peer_lock);
- __RXACCT(atomic_dec(&rxrpc_peer_count));
- kfree(peer);
- rxrpc_put_transport(trans);
- _leave(" = %d", ret);
- return ret;
- }
- }
-
- atomic_inc(&trans->peer_count);
-
- make_active:
- list_add_tail(&peer->link, &trans->peer_active);
-
- success_uwfree:
- write_unlock(&trans->peer_lock);
-
- if (candidate) {
- __RXACCT(atomic_dec(&rxrpc_peer_count));
- kfree(candidate);
- }
-
- if (list_empty(&peer->proc_link)) {
- down_write(&rxrpc_peers_sem);
- list_add_tail(&peer->proc_link, &rxrpc_peers);
- up_write(&rxrpc_peers_sem);
- }
-
- success:
- *_peer = peer;
-
- _leave(" = 0 (%p{u=%d cc=%d})",
- peer,
- atomic_read(&peer->usage),
- atomic_read(&peer->conn_count));
- return 0;
-
- /* handle the peer being found in the active list straight off */
- found_active:
- rxrpc_get_peer(peer);
- read_unlock(&trans->peer_lock);
- goto success;
-
- /* handle resurrecting a peer from the graveyard */
- found_in_graveyard:
- rxrpc_get_peer(peer);
- rxrpc_get_transport(peer->trans);
- rxrpc_krxtimod_del_timer(&peer->timeout);
- list_del_init(&peer->link);
- spin_unlock(&trans->peer_gylock);
- goto make_active;
-
- /* handle finding the peer on the second time through the active
- * list */
- found_active_second_chance:
- rxrpc_get_peer(peer);
- goto success_uwfree;
-
-} /* end rxrpc_peer_lookup() */
-
-/*****************************************************************************/
-/*
- * finish with a peer record
- * - it gets sent to the graveyard from where it can be resurrected or timed
- * out
- */
-void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
- struct rxrpc_transport *trans = peer->trans;
-
- _enter("%p{cc=%d a=%08x}",
- peer,
- atomic_read(&peer->conn_count),
- ntohl(peer->addr.s_addr));
-
- /* sanity check */
- if (atomic_read(&peer->usage) <= 0)
- BUG();
-
- write_lock(&trans->peer_lock);
- spin_lock(&trans->peer_gylock);
- if (likely(!atomic_dec_and_test(&peer->usage))) {
- spin_unlock(&trans->peer_gylock);
- write_unlock(&trans->peer_lock);
- _leave("");
- return;
- }
-
- /* move to graveyard queue */
- list_del(&peer->link);
- write_unlock(&trans->peer_lock);
-
- list_add_tail(&peer->link, &trans->peer_graveyard);
-
- BUG_ON(!list_empty(&peer->conn_active));
-
- rxrpc_krxtimod_add_timer(&peer->timeout, rxrpc_peer_timeout * HZ);
-
- spin_unlock(&trans->peer_gylock);
-
- rxrpc_put_transport(trans);
-
- _leave(" [killed]");
-} /* end rxrpc_put_peer() */
-
-/*****************************************************************************/
-/*
- * handle a peer timing out in the graveyard
- * - called from krxtimod
- */
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer)
-{
- struct rxrpc_transport *trans = peer->trans;
-
- _enter("%p{u=%d cc=%d a=%08x}",
- peer,
- atomic_read(&peer->usage),
- atomic_read(&peer->conn_count),
- ntohl(peer->addr.s_addr));
-
- BUG_ON(atomic_read(&peer->usage) < 0);
-
- /* remove from graveyard if still dead */
- spin_lock(&trans->peer_gylock);
- if (atomic_read(&peer->usage) == 0)
- list_del_init(&peer->link);
- else
- peer = NULL;
- spin_unlock(&trans->peer_gylock);
-
- if (!peer) {
- _leave("");
- return; /* resurrected */
- }
-
- /* clear all connections on this peer */
- rxrpc_conn_clearall(peer);
-
- BUG_ON(!list_empty(&peer->conn_active));
- BUG_ON(!list_empty(&peer->conn_graveyard));
-
- /* inform the application layer */
- if (peer->ops && peer->ops->discarding)
- peer->ops->discarding(peer);
-
- if (!list_empty(&peer->proc_link)) {
- down_write(&rxrpc_peers_sem);
- list_del(&peer->proc_link);
- up_write(&rxrpc_peers_sem);
- }
-
- __RXACCT(atomic_dec(&rxrpc_peer_count));
- kfree(peer);
-
- /* if the graveyard is now empty, wake up anyone waiting for that */
- if (atomic_dec_and_test(&trans->peer_count))
- wake_up(&trans->peer_gy_waitq);
-
- _leave(" [destroyed]");
-} /* end rxrpc_peer_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all peer records from a transport endpoint
- */
-void rxrpc_peer_clearall(struct rxrpc_transport *trans)
-{
- DECLARE_WAITQUEUE(myself,current);
-
- struct rxrpc_peer *peer;
- int err;
-
- _enter("%p",trans);
-
- /* there shouldn't be any active peers remaining */
- BUG_ON(!list_empty(&trans->peer_active));
-
- /* manually timeout all peers in the graveyard */
- spin_lock(&trans->peer_gylock);
- while (!list_empty(&trans->peer_graveyard)) {
- peer = list_entry(trans->peer_graveyard.next,
- struct rxrpc_peer, link);
- _debug("Clearing peer %p\n", peer);
- err = rxrpc_krxtimod_del_timer(&peer->timeout);
- spin_unlock(&trans->peer_gylock);
-
- if (err == 0)
- rxrpc_peer_do_timeout(peer);
-
- spin_lock(&trans->peer_gylock);
- }
- spin_unlock(&trans->peer_gylock);
-
- /* wait for the the peer graveyard to be completely cleared */
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&trans->peer_gy_waitq, &myself);
-
- while (atomic_read(&trans->peer_count) != 0) {
- schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
- }
-
- remove_wait_queue(&trans->peer_gy_waitq, &myself);
- set_current_state(TASK_RUNNING);
-
- _leave("");
-} /* end rxrpc_peer_clearall() */
-
-/*****************************************************************************/
-/*
- * calculate and cache the Round-Trip-Time for a message and its response
- */
-void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
- struct rxrpc_message *msg,
- struct rxrpc_message *resp)
-{
- unsigned long long rtt;
- int loop;
-
- _enter("%p,%p,%p", peer, msg, resp);
-
- /* calculate the latest RTT */
- rtt = resp->stamp.tv_sec - msg->stamp.tv_sec;
- rtt *= 1000000UL;
- rtt += resp->stamp.tv_usec - msg->stamp.tv_usec;
-
- /* add to cache */
- peer->rtt_cache[peer->rtt_point] = rtt;
- peer->rtt_point++;
- peer->rtt_point %= RXRPC_RTT_CACHE_SIZE;
-
- if (peer->rtt_usage < RXRPC_RTT_CACHE_SIZE)
- peer->rtt_usage++;
-
- /* recalculate RTT */
- rtt = 0;
- for (loop = peer->rtt_usage - 1; loop >= 0; loop--)
- rtt += peer->rtt_cache[loop];
-
- do_div(rtt, peer->rtt_usage);
- peer->rtt = rtt;
-
- _leave(" RTT=%lu.%lums",
- (long) (peer->rtt / 1000), (long) (peer->rtt % 1000));
-
-} /* end rxrpc_peer_calculate_rtt() */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
deleted file mode 100644
index 8551c879e45..00000000000
--- a/net/rxrpc/proc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/* proc.c: /proc interface for RxRPC
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-static struct proc_dir_entry *proc_rxrpc;
-
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_transports_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_transports_ops = {
- .start = rxrpc_proc_transports_start,
- .next = rxrpc_proc_transports_next,
- .stop = rxrpc_proc_transports_stop,
- .show = rxrpc_proc_transports_show,
-};
-
-static const struct file_operations rxrpc_proc_transports_fops = {
- .open = rxrpc_proc_transports_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_peers_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_peers_ops = {
- .start = rxrpc_proc_peers_start,
- .next = rxrpc_proc_peers_next,
- .stop = rxrpc_proc_peers_stop,
- .show = rxrpc_proc_peers_show,
-};
-
-static const struct file_operations rxrpc_proc_peers_fops = {
- .open = rxrpc_proc_peers_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_conns_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_conns_ops = {
- .start = rxrpc_proc_conns_start,
- .next = rxrpc_proc_conns_next,
- .stop = rxrpc_proc_conns_stop,
- .show = rxrpc_proc_conns_show,
-};
-
-static const struct file_operations rxrpc_proc_conns_fops = {
- .open = rxrpc_proc_conns_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_calls_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_calls_ops = {
- .start = rxrpc_proc_calls_start,
- .next = rxrpc_proc_calls_next,
- .stop = rxrpc_proc_calls_stop,
- .show = rxrpc_proc_calls_show,
-};
-
-static const struct file_operations rxrpc_proc_calls_fops = {
- .open = rxrpc_proc_calls_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static const char *rxrpc_call_states7[] = {
- "complet",
- "error ",
- "rcv_op ",
- "rcv_arg",
- "got_arg",
- "snd_rpl",
- "fin_ack",
- "snd_arg",
- "rcv_rpl",
- "got_rpl"
-};
-
-static const char *rxrpc_call_error_states7[] = {
- "no_err ",
- "loc_abt",
- "rmt_abt",
- "loc_err",
- "rmt_err"
-};
-
-/*****************************************************************************/
-/*
- * initialise the /proc/net/rxrpc/ directory
- */
-int rxrpc_proc_init(void)
-{
- struct proc_dir_entry *p;
-
- proc_rxrpc = proc_mkdir("rxrpc", proc_net);
- if (!proc_rxrpc)
- goto error;
- proc_rxrpc->owner = THIS_MODULE;
-
- p = create_proc_entry("calls", 0, proc_rxrpc);
- if (!p)
- goto error_proc;
- p->proc_fops = &rxrpc_proc_calls_fops;
- p->owner = THIS_MODULE;
-
- p = create_proc_entry("connections", 0, proc_rxrpc);
- if (!p)
- goto error_calls;
- p->proc_fops = &rxrpc_proc_conns_fops;
- p->owner = THIS_MODULE;
-
- p = create_proc_entry("peers", 0, proc_rxrpc);
- if (!p)
- goto error_calls;
- p->proc_fops = &rxrpc_proc_peers_fops;
- p->owner = THIS_MODULE;
-
- p = create_proc_entry("transports", 0, proc_rxrpc);
- if (!p)
- goto error_conns;
- p->proc_fops = &rxrpc_proc_transports_fops;
- p->owner = THIS_MODULE;
-
- return 0;
-
- error_conns:
- remove_proc_entry("connections", proc_rxrpc);
- error_calls:
- remove_proc_entry("calls", proc_rxrpc);
- error_proc:
- remove_proc_entry("rxrpc", proc_net);
- error:
- return -ENOMEM;
-} /* end rxrpc_proc_init() */
-
-/*****************************************************************************/
-/*
- * clean up the /proc/net/rxrpc/ directory
- */
-void rxrpc_proc_cleanup(void)
-{
- remove_proc_entry("transports", proc_rxrpc);
- remove_proc_entry("peers", proc_rxrpc);
- remove_proc_entry("connections", proc_rxrpc);
- remove_proc_entry("calls", proc_rxrpc);
-
- remove_proc_entry("rxrpc", proc_net);
-
-} /* end rxrpc_proc_cleanup() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/transports" which provides a summary of extant transports
- */
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_transports_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_transports_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the transports list and return the first item
- */
-static void *rxrpc_proc_transports_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_proc_transports_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_proc_transports)
- if (!pos--)
- break;
-
- return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in transports list
- */
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_proc_transports.next : _p->next;
-
- return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the transports list
- */
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_proc_transports_sem);
-
-} /* end rxrpc_proc_transports_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v)
-{
- struct rxrpc_transport *trans =
- list_entry(v, struct rxrpc_transport, proc_link);
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m, "LOCAL USE\n");
- return 0;
- }
-
- /* display one transport per line on subsequent lines */
- seq_printf(m, "%5hu %3d\n",
- trans->port,
- atomic_read(&trans->usage)
- );
-
- return 0;
-} /* end rxrpc_proc_transports_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/peers" which provides a summary of extant peers
- */
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_peers_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_peers_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the peers list and return the
- * first item
- */
-static void *rxrpc_proc_peers_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_peers_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_peers)
- if (!pos--)
- break;
-
- return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in peers list
- */
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_peers.next : _p->next;
-
- return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the peers list
- */
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_peers_sem);
-
-} /* end rxrpc_proc_peers_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
-{
- struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
- long timeout;
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m, "LOCAL REMOTE USAGE CONNS TIMEOUT"
- " MTU RTT(uS)\n");
- return 0;
- }
-
- /* display one peer per line on subsequent lines */
- timeout = 0;
- if (!list_empty(&peer->timeout.link))
- timeout = (long) peer->timeout.timo_jif -
- (long) jiffies;
-
- seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
- peer->trans->port,
- ntohl(peer->addr.s_addr),
- atomic_read(&peer->usage),
- atomic_read(&peer->conn_count),
- timeout,
- peer->if_mtu,
- (long) peer->rtt
- );
-
- return 0;
-} /* end rxrpc_proc_peers_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/connections" which provides a summary of extant
- * connections
- */
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_conns_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_conns_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the conns list and return the
- * first item
- */
-static void *rxrpc_proc_conns_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_conns_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_conns)
- if (!pos--)
- break;
-
- return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in conns list
- */
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_conns.next : _p->next;
-
- return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the conns list
- */
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_conns_sem);
-
-} /* end rxrpc_proc_conns_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
-{
- struct rxrpc_connection *conn;
- long timeout;
-
- conn = list_entry(v, struct rxrpc_connection, proc_link);
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m,
- "LOCAL REMOTE RPORT SRVC CONN END SERIALNO "
- "CALLNO MTU TIMEOUT"
- "\n");
- return 0;
- }
-
- /* display one conn per line on subsequent lines */
- timeout = 0;
- if (!list_empty(&conn->timeout.link))
- timeout = (long) conn->timeout.timo_jif -
- (long) jiffies;
-
- seq_printf(m,
- "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
- conn->trans->port,
- ntohl(conn->addr.sin_addr.s_addr),
- ntohs(conn->addr.sin_port),
- ntohs(conn->service_id),
- ntohl(conn->conn_id),
- conn->out_clientflag ? "CLT" : "SRV",
- conn->serial_counter,
- conn->call_counter,
- conn->mtu_size,
- timeout
- );
-
- return 0;
-} /* end rxrpc_proc_conns_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/calls" which provides a summary of extant calls
- */
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file)
-{
- struct seq_file *m;
- int ret;
-
- ret = seq_open(file, &rxrpc_proc_calls_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = PDE(inode)->data;
-
- return 0;
-} /* end rxrpc_proc_calls_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the calls list and return the
- * first item
- */
-static void *rxrpc_proc_calls_start(struct seq_file *m, loff_t *_pos)
-{
- struct list_head *_p;
- loff_t pos = *_pos;
-
- /* lock the list against modification */
- down_read(&rxrpc_calls_sem);
-
- /* allow for the header line */
- if (!pos)
- return SEQ_START_TOKEN;
- pos--;
-
- /* find the n'th element in the list */
- list_for_each(_p, &rxrpc_calls)
- if (!pos--)
- break;
-
- return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in calls list
- */
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos)
-{
- struct list_head *_p;
-
- (*pos)++;
-
- _p = v;
- _p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
-
- return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the calls list
- */
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v)
-{
- up_read(&rxrpc_calls_sem);
-
-} /* end rxrpc_proc_calls_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v)
-{
- struct rxrpc_call *call = list_entry(v, struct rxrpc_call, call_link);
-
- /* display header on line 1 */
- if (v == SEQ_START_TOKEN) {
- seq_puts(m,
- "LOCAL REMOT SRVC CONN CALL DIR USE "
- " L STATE OPCODE ABORT ERRNO\n"
- );
- return 0;
- }
-
- /* display one call per line on subsequent lines */
- seq_printf(m,
- "%5hu %5hu %04hx %08x %08x %s %3u%c"
- " %c %-7.7s %6d %08x %5d\n",
- call->conn->trans->port,
- ntohs(call->conn->addr.sin_port),
- ntohs(call->conn->service_id),
- ntohl(call->conn->conn_id),
- ntohl(call->call_id),
- call->conn->service ? "SVC" : "CLT",
- atomic_read(&call->usage),
- waitqueue_active(&call->waitq) ? 'w' : ' ',
- call->app_last_rcv ? 'Y' : '-',
- (call->app_call_state!=RXRPC_CSTATE_ERROR ?
- rxrpc_call_states7[call->app_call_state] :
- rxrpc_call_error_states7[call->app_err_state]),
- call->app_opcode,
- call->app_abort_code,
- call->app_errno
- );
-
- return 0;
-} /* end rxrpc_proc_calls_show() */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
new file mode 100644
index 00000000000..5ec705144e1
--- /dev/null
+++ b/net/rxrpc/rxkad.c
@@ -0,0 +1,1154 @@
+/* Kerberos-based RxRPC security
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/ctype.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#define rxrpc_debug rxkad_debug
+#include "ar-internal.h"
+
+#define RXKAD_VERSION 2
+#define MAXKRB5TICKETLEN 1024
+#define RXKAD_TKT_TYPE_KERBEROS_V5 256
+#define ANAME_SZ 40 /* size of authentication name */
+#define INST_SZ 40 /* size of principal's instance */
+#define REALM_SZ 40 /* size of principal's auth domain */
+#define SNAME_SZ 40 /* size of service name */
+
+unsigned rxrpc_debug;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "rxkad debugging mask");
+
+struct rxkad_level1_hdr {
+ __be32 data_size; /* true data size (excluding padding) */
+};
+
+struct rxkad_level2_hdr {
+ __be32 data_size; /* true data size (excluding padding) */
+ __be32 checksum; /* decrypted data checksum */
+};
+
+MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+/*
+ * this holds a pinned cipher so that keventd doesn't get called by the cipher
+ * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
+ * packets
+ */
+static struct crypto_blkcipher *rxkad_ci;
+static DEFINE_MUTEX(rxkad_ci_mutex);
+
+/*
+ * initialise connection security
+ */
+static int rxkad_init_connection_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_key_payload *payload;
+ struct crypto_blkcipher *ci;
+ int ret;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+
+ payload = conn->key->payload.data;
+ conn->security_ix = payload->k.security_index;
+
+ ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ci)) {
+ _debug("no cipher");
+ ret = PTR_ERR(ci);
+ goto error;
+ }
+
+ if (crypto_blkcipher_setkey(ci, payload->k.session_key,
+ sizeof(payload->k.session_key)) < 0)
+ BUG();
+
+ switch (conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ break;
+ case RXRPC_SECURITY_AUTH:
+ conn->size_align = 8;
+ conn->security_size = sizeof(struct rxkad_level1_hdr);
+ conn->header_size += sizeof(struct rxkad_level1_hdr);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ conn->size_align = 8;
+ conn->security_size = sizeof(struct rxkad_level2_hdr);
+ conn->header_size += sizeof(struct rxkad_level2_hdr);
+ break;
+ default:
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+
+ conn->cipher = ci;
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * prime the encryption state with the invariant parts of a connection's
+ * description
+ */
+static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_key_payload *payload;
+ struct blkcipher_desc desc;
+ struct scatterlist sg[2];
+ struct rxrpc_crypt iv;
+ struct {
+ __be32 x[4];
+ } tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+
+ _enter("");
+
+ if (!conn->key)
+ return;
+
+ payload = conn->key->payload.data;
+ memcpy(&iv, payload->k.session_key, sizeof(iv));
+
+ desc.tfm = conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ tmpbuf.x[0] = conn->epoch;
+ tmpbuf.x[1] = conn->cid;
+ tmpbuf.x[2] = 0;
+ tmpbuf.x[3] = htonl(conn->security_ix);
+
+ memset(sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
+ ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
+
+ _leave("");
+}
+
+/*
+ * partially encrypt a packet (level 1 security)
+ */
+static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 data_size,
+ void *sechdr)
+{
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct {
+ struct rxkad_level1_hdr hdr;
+ __be32 first; /* first four bytes of data and padding */
+ } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ u16 check;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("");
+
+ check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+ data_size |= (u32) check << 16;
+
+ tmpbuf.hdr.data_size = htonl(data_size);
+ memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+
+ /* start the encryption afresh */
+ memset(&iv, 0, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ memset(sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * wholly encrypt a packet (level 2 security)
+ */
+static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 data_size,
+ void *sechdr)
+{
+ const struct rxrpc_key_payload *payload;
+ struct rxkad_level2_hdr rxkhdr
+ __attribute__((aligned(8))); /* must be all on one page */
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[16];
+ struct sk_buff *trailer;
+ unsigned len;
+ u16 check;
+ int nsg;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("");
+
+ check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+
+ rxkhdr.data_size = htonl(data_size | (u32) check << 16);
+ rxkhdr.checksum = 0;
+
+ /* encrypt from the session key */
+ payload = call->conn->key->payload.data;
+ memcpy(&iv, payload->k.session_key, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ memset(sg, 0, sizeof(sg[0]) * 2);
+ sg_set_buf(&sg[0], sechdr, sizeof(rxkhdr));
+ sg_set_buf(&sg[1], &rxkhdr, sizeof(rxkhdr));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
+
+ /* we want to encrypt the skbuff in-place */
+ nsg = skb_cow_data(skb, 0, &trailer);
+ if (nsg < 0 || nsg > 16)
+ return -ENOMEM;
+
+ len = data_size + call->conn->size_align - 1;
+ len &= ~(call->conn->size_align - 1);
+
+ skb_to_sgvec(skb, sg, 0, len);
+ crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * checksum an RxRPC packet header
+ */
+static int rxkad_secure_packet(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ size_t data_size,
+ void *sechdr)
+{
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct {
+ __be32 x[2];
+ } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ __be32 x;
+ int ret;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("{%d{%x}},{#%u},%zu,",
+ call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq),
+ data_size);
+
+ if (!call->conn->cipher)
+ return 0;
+
+ ret = key_validate(call->conn->key);
+ if (ret < 0)
+ return ret;
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ /* calculate the security checksum */
+ x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+ x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+ tmpbuf.x[0] = sp->hdr.callNumber;
+ tmpbuf.x[1] = x;
+
+ memset(&sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ x = ntohl(tmpbuf.x[1]);
+ x = (x >> 16) & 0xffff;
+ if (x == 0)
+ x = 1; /* zero checksums are not permitted */
+ sp->hdr.cksum = htons(x);
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ ret = 0;
+ break;
+ case RXRPC_SECURITY_AUTH:
+ ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ ret = rxkad_secure_packet_encrypt(call, skb, data_size,
+ sechdr);
+ break;
+ default:
+ ret = -EPERM;
+ break;
+ }
+
+ _leave(" = %d [set %hx]", ret, x);
+ return ret;
+}
+
+/*
+ * decrypt partial encryption on a packet (level 1 security)
+ */
+static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct rxkad_level1_hdr sechdr;
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct sk_buff *trailer;
+ u32 data_size, buf;
+ u16 check;
+
+ _enter("");
+
+ sp = rxrpc_skb(skb);
+
+ /* we want to decrypt the skbuff in-place */
+ if (skb_cow_data(skb, 0, &trailer) < 0)
+ goto nomem;
+
+ skb_to_sgvec(skb, sg, 0, 8);
+
+ /* start the decryption afresh */
+ memset(&iv, 0, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
+
+ /* remove the decrypted packet length */
+ if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+ goto datalen_error;
+ if (!skb_pull(skb, sizeof(sechdr)))
+ BUG();
+
+ buf = ntohl(sechdr.data_size);
+ data_size = buf & 0xffff;
+
+ check = buf >> 16;
+ check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+ check &= 0xffff;
+ if (check != 0) {
+ *_abort_code = RXKADSEALEDINCON;
+ goto protocol_error;
+ }
+
+ /* shorten the packet to remove the padding */
+ if (data_size > skb->len)
+ goto datalen_error;
+ else if (data_size < skb->len)
+ skb->len = data_size;
+
+ _leave(" = 0 [dlen=%x]", data_size);
+ return 0;
+
+datalen_error:
+ *_abort_code = RXKADDATALEN;
+protocol_error:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+
+nomem:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * wholly decrypt a packet (level 2 security)
+ */
+static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ const struct rxrpc_key_payload *payload;
+ struct rxkad_level2_hdr sechdr;
+ struct rxrpc_skb_priv *sp;
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist _sg[4], *sg;
+ struct sk_buff *trailer;
+ u32 data_size, buf;
+ u16 check;
+ int nsg;
+
+ _enter(",{%d}", skb->len);
+
+ sp = rxrpc_skb(skb);
+
+ /* we want to decrypt the skbuff in-place */
+ nsg = skb_cow_data(skb, 0, &trailer);
+ if (nsg < 0)
+ goto nomem;
+
+ sg = _sg;
+ if (unlikely(nsg > 4)) {
+ sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO);
+ if (!sg)
+ goto nomem;
+ }
+
+ skb_to_sgvec(skb, sg, 0, skb->len);
+
+ /* decrypt from the session key */
+ payload = call->conn->key->payload.data;
+ memcpy(&iv, payload->k.session_key, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
+ if (sg != _sg)
+ kfree(sg);
+
+ /* remove the decrypted packet length */
+ if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+ goto datalen_error;
+ if (!skb_pull(skb, sizeof(sechdr)))
+ BUG();
+
+ buf = ntohl(sechdr.data_size);
+ data_size = buf & 0xffff;
+
+ check = buf >> 16;
+ check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+ check &= 0xffff;
+ if (check != 0) {
+ *_abort_code = RXKADSEALEDINCON;
+ goto protocol_error;
+ }
+
+ /* shorten the packet to remove the padding */
+ if (data_size > skb->len)
+ goto datalen_error;
+ else if (data_size < skb->len)
+ skb->len = data_size;
+
+ _leave(" = 0 [dlen=%x]", data_size);
+ return 0;
+
+datalen_error:
+ *_abort_code = RXKADDATALEN;
+protocol_error:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+
+nomem:
+ _leave(" = -ENOMEM");
+ return -ENOMEM;
+}
+
+/*
+ * verify the security on a received packet
+ */
+static int rxkad_verify_packet(const struct rxrpc_call *call,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct blkcipher_desc desc;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[2];
+ struct {
+ __be32 x[2];
+ } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+ __be32 x;
+ __be16 cksum;
+ int ret;
+
+ sp = rxrpc_skb(skb);
+
+ _enter("{%d{%x}},{#%u}",
+ call->debug_id, key_serial(call->conn->key),
+ ntohl(sp->hdr.seq));
+
+ if (!call->conn->cipher)
+ return 0;
+
+ if (sp->hdr.securityIndex != 2) {
+ *_abort_code = RXKADINCONSISTENCY;
+ _leave(" = -EPROTO [not rxkad]");
+ return -EPROTO;
+ }
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+ desc.tfm = call->conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ /* validate the security checksum */
+ x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+ x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+ tmpbuf.x[0] = call->call_id;
+ tmpbuf.x[1] = x;
+
+ memset(&sg, 0, sizeof(sg));
+ sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+ sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+ crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+ x = ntohl(tmpbuf.x[1]);
+ x = (x >> 16) & 0xffff;
+ if (x == 0)
+ x = 1; /* zero checksums are not permitted */
+
+ cksum = htons(x);
+ if (sp->hdr.cksum != cksum) {
+ *_abort_code = RXKADSEALEDINCON;
+ _leave(" = -EPROTO [csum failed]");
+ return -EPROTO;
+ }
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ ret = 0;
+ break;
+ case RXRPC_SECURITY_AUTH:
+ ret = rxkad_verify_packet_auth(call, skb, _abort_code);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
+ break;
+ default:
+ ret = -ENOANO;
+ break;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * issue a challenge
+ */
+static int rxkad_issue_challenge(struct rxrpc_connection *conn)
+{
+ struct rxkad_challenge challenge;
+ struct rxrpc_header hdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ ret = key_validate(conn->key);
+ if (ret < 0)
+ return ret;
+
+ get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce));
+
+ challenge.version = htonl(2);
+ challenge.nonce = htonl(conn->security_nonce);
+ challenge.min_level = htonl(0);
+ challenge.__padding = 0;
+
+ msg.msg_name = &conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr.epoch = conn->epoch;
+ hdr.cid = conn->cid;
+ hdr.callNumber = 0;
+ hdr.seq = 0;
+ hdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
+ hdr.flags = conn->out_clientflag;
+ hdr.userStatus = 0;
+ hdr.securityIndex = conn->security_ix;
+ hdr._rsvd = 0;
+ hdr.serviceId = conn->service_id;
+
+ iov[0].iov_base = &hdr;
+ iov[0].iov_len = sizeof(hdr);
+ iov[1].iov_base = &challenge;
+ iov[1].iov_len = sizeof(challenge);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ hdr.serial = htonl(atomic_inc_return(&conn->serial));
+ _proto("Tx CHALLENGE %%%u", ntohl(hdr.serial));
+
+ ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * send a Kerberos security response
+ */
+static int rxkad_send_response(struct rxrpc_connection *conn,
+ struct rxrpc_header *hdr,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
+{
+ struct msghdr msg;
+ struct kvec iov[3];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ msg.msg_name = &conn->trans->peer->srx.transport.sin;
+ msg.msg_namelen = sizeof(conn->trans->peer->srx.transport.sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr->epoch = conn->epoch;
+ hdr->seq = 0;
+ hdr->type = RXRPC_PACKET_TYPE_RESPONSE;
+ hdr->flags = conn->out_clientflag;
+ hdr->userStatus = 0;
+ hdr->_rsvd = 0;
+
+ iov[0].iov_base = hdr;
+ iov[0].iov_len = sizeof(*hdr);
+ iov[1].iov_base = resp;
+ iov[1].iov_len = sizeof(*resp);
+ iov[2].iov_base = (void *) s2->ticket;
+ iov[2].iov_len = s2->ticket_len;
+
+ len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
+
+ hdr->serial = htonl(atomic_inc_return(&conn->serial));
+ _proto("Tx RESPONSE %%%u", ntohl(hdr->serial));
+
+ ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
+ if (ret < 0) {
+ _debug("sendmsg failed: %d", ret);
+ return -EAGAIN;
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * calculate the response checksum
+ */
+static void rxkad_calc_response_checksum(struct rxkad_response *response)
+{
+ u32 csum = 1000003;
+ int loop;
+ u8 *p = (u8 *) response;
+
+ for (loop = sizeof(*response); loop > 0; loop--)
+ csum = csum * 0x10204081 + *p++;
+
+ response->encrypted.checksum = htonl(csum);
+}
+
+/*
+ * load a scatterlist with a potentially split-page buffer
+ */
+static void rxkad_sg_set_buf2(struct scatterlist sg[2],
+ void *buf, size_t buflen)
+{
+
+ memset(sg, 0, sizeof(sg));
+
+ sg_set_buf(&sg[0], buf, buflen);
+ if (sg[0].offset + buflen > PAGE_SIZE) {
+ /* the buffer was split over two pages */
+ sg[0].length = PAGE_SIZE - sg[0].offset;
+ sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
+ }
+
+ ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
+}
+
+/*
+ * encrypt the response packet
+ */
+static void rxkad_encrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
+{
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv;
+ struct scatterlist ssg[2], dsg[2];
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, s2->session_key, sizeof(iv));
+ desc.tfm = conn->cipher;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+ memcpy(dsg, ssg, sizeof(dsg));
+ crypto_blkcipher_encrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+}
+
+/*
+ * respond to a challenge packet
+ */
+static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ const struct rxrpc_key_payload *payload;
+ struct rxkad_challenge challenge;
+ struct rxkad_response resp
+ __attribute__((aligned(8))); /* must be aligned for crypto */
+ struct rxrpc_skb_priv *sp;
+ u32 version, nonce, min_level, abort_code;
+ int ret;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ if (!conn->key) {
+ _leave(" = -EPROTO [no key]");
+ return -EPROTO;
+ }
+
+ ret = key_validate(conn->key);
+ if (ret < 0) {
+ *_abort_code = RXKADEXPIRED;
+ return ret;
+ }
+
+ abort_code = RXKADPACKETSHORT;
+ sp = rxrpc_skb(skb);
+ if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+ goto protocol_error;
+
+ version = ntohl(challenge.version);
+ nonce = ntohl(challenge.nonce);
+ min_level = ntohl(challenge.min_level);
+
+ _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
+ ntohl(sp->hdr.serial), version, nonce, min_level);
+
+ abort_code = RXKADINCONSISTENCY;
+ if (version != RXKAD_VERSION)
+ goto protocol_error;
+
+ abort_code = RXKADLEVELFAIL;
+ if (conn->security_level < min_level)
+ goto protocol_error;
+
+ payload = conn->key->payload.data;
+
+ /* build the response packet */
+ memset(&resp, 0, sizeof(resp));
+
+ resp.version = RXKAD_VERSION;
+ resp.encrypted.epoch = conn->epoch;
+ resp.encrypted.cid = conn->cid;
+ resp.encrypted.securityIndex = htonl(conn->security_ix);
+ resp.encrypted.call_id[0] =
+ (conn->channels[0] ? conn->channels[0]->call_id : 0);
+ resp.encrypted.call_id[1] =
+ (conn->channels[1] ? conn->channels[1]->call_id : 0);
+ resp.encrypted.call_id[2] =
+ (conn->channels[2] ? conn->channels[2]->call_id : 0);
+ resp.encrypted.call_id[3] =
+ (conn->channels[3] ? conn->channels[3]->call_id : 0);
+ resp.encrypted.inc_nonce = htonl(nonce + 1);
+ resp.encrypted.level = htonl(conn->security_level);
+ resp.kvno = htonl(payload->k.kvno);
+ resp.ticket_len = htonl(payload->k.ticket_len);
+
+ /* calculate the response checksum and then do the encryption */
+ rxkad_calc_response_checksum(&resp);
+ rxkad_encrypt_response(conn, &resp, &payload->k);
+ return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
+
+protocol_error:
+ *_abort_code = abort_code;
+ _leave(" = -EPROTO [%d]", abort_code);
+ return -EPROTO;
+}
+
+/*
+ * decrypt the kerberos IV ticket in the response
+ */
+static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+ void *ticket, size_t ticket_len,
+ struct rxrpc_crypt *_session_key,
+ time_t *_expiry,
+ u32 *_abort_code)
+{
+ struct blkcipher_desc desc;
+ struct rxrpc_crypt iv, key;
+ struct scatterlist ssg[1], dsg[1];
+ struct in_addr addr;
+ unsigned life;
+ time_t issue, now;
+ bool little_endian;
+ int ret;
+ u8 *p, *q, *name, *end;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
+
+ *_expiry = 0;
+
+ ret = key_validate(conn->server_key);
+ if (ret < 0) {
+ switch (ret) {
+ case -EKEYEXPIRED:
+ *_abort_code = RXKADEXPIRED;
+ goto error;
+ default:
+ *_abort_code = RXKADNOAUTH;
+ goto error;
+ }
+ }
+
+ ASSERT(conn->server_key->payload.data != NULL);
+ ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
+
+ memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
+
+ desc.tfm = conn->server_key->payload.data;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ sg_init_one(&ssg[0], ticket, ticket_len);
+ memcpy(dsg, ssg, sizeof(dsg));
+ crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, ticket_len);
+
+ p = ticket;
+ end = p + ticket_len;
+
+#define Z(size) \
+ ({ \
+ u8 *__str = p; \
+ q = memchr(p, 0, end - p); \
+ if (!q || q - p > (size)) \
+ goto bad_ticket; \
+ for (; p < q; p++) \
+ if (!isprint(*p)) \
+ goto bad_ticket; \
+ p++; \
+ __str; \
+ })
+
+ /* extract the ticket flags */
+ _debug("KIV FLAGS: %x", *p);
+ little_endian = *p & 1;
+ p++;
+
+ /* extract the authentication name */
+ name = Z(ANAME_SZ);
+ _debug("KIV ANAME: %s", name);
+
+ /* extract the principal's instance */
+ name = Z(INST_SZ);
+ _debug("KIV INST : %s", name);
+
+ /* extract the principal's authentication domain */
+ name = Z(REALM_SZ);
+ _debug("KIV REALM: %s", name);
+
+ if (end - p < 4 + 8 + 4 + 2)
+ goto bad_ticket;
+
+ /* get the IPv4 address of the entity that requested the ticket */
+ memcpy(&addr, p, sizeof(addr));
+ p += 4;
+ _debug("KIV ADDR : "NIPQUAD_FMT, NIPQUAD(addr));
+
+ /* get the session key from the ticket */
+ memcpy(&key, p, sizeof(key));
+ p += 8;
+ _debug("KIV KEY : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1]));
+ memcpy(_session_key, &key, sizeof(key));
+
+ /* get the ticket's lifetime */
+ life = *p++ * 5 * 60;
+ _debug("KIV LIFE : %u", life);
+
+ /* get the issue time of the ticket */
+ if (little_endian) {
+ __le32 stamp;
+ memcpy(&stamp, p, 4);
+ issue = le32_to_cpu(stamp);
+ } else {
+ __be32 stamp;
+ memcpy(&stamp, p, 4);
+ issue = be32_to_cpu(stamp);
+ }
+ p += 4;
+ now = xtime.tv_sec;
+ _debug("KIV ISSUE: %lx [%lx]", issue, now);
+
+ /* check the ticket is in date */
+ if (issue > now) {
+ *_abort_code = RXKADNOAUTH;
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+
+ if (issue < now - life) {
+ *_abort_code = RXKADEXPIRED;
+ ret = -EKEYEXPIRED;
+ goto error;
+ }
+
+ *_expiry = issue + life;
+
+ /* get the service name */
+ name = Z(SNAME_SZ);
+ _debug("KIV SNAME: %s", name);
+
+ /* get the service instance name */
+ name = Z(INST_SZ);
+ _debug("KIV SINST: %s", name);
+
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+
+bad_ticket:
+ *_abort_code = RXKADBADTICKET;
+ ret = -EBADMSG;
+ goto error;
+}
+
+/*
+ * decrypt the response packet
+ */
+static void rxkad_decrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxrpc_crypt *session_key)
+{
+ struct blkcipher_desc desc;
+ struct scatterlist ssg[2], dsg[2];
+ struct rxrpc_crypt iv;
+
+ _enter(",,%08x%08x",
+ ntohl(session_key->n[0]), ntohl(session_key->n[1]));
+
+ ASSERT(rxkad_ci != NULL);
+
+ mutex_lock(&rxkad_ci_mutex);
+ if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
+ sizeof(*session_key)) < 0)
+ BUG();
+
+ memcpy(&iv, session_key, sizeof(iv));
+ desc.tfm = rxkad_ci;
+ desc.info = iv.x;
+ desc.flags = 0;
+
+ rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+ memcpy(dsg, ssg, sizeof(dsg));
+ crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+ mutex_unlock(&rxkad_ci_mutex);
+
+ _leave("");
+}
+
+/*
+ * verify a response
+ */
+static int rxkad_verify_response(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 *_abort_code)
+{
+ struct rxkad_response response
+ __attribute__((aligned(8))); /* must be aligned for crypto */
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_crypt session_key;
+ time_t expiry;
+ void *ticket;
+ u32 abort_code, version, kvno, ticket_len, csum, level;
+ int ret;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+
+ abort_code = RXKADPACKETSHORT;
+ if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+ goto protocol_error;
+ if (!pskb_pull(skb, sizeof(response)))
+ BUG();
+
+ version = ntohl(response.version);
+ ticket_len = ntohl(response.ticket_len);
+ kvno = ntohl(response.kvno);
+ sp = rxrpc_skb(skb);
+ _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
+ ntohl(sp->hdr.serial), version, kvno, ticket_len);
+
+ abort_code = RXKADINCONSISTENCY;
+ if (version != RXKAD_VERSION)
+
+ abort_code = RXKADTICKETLEN;
+ if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+ goto protocol_error;
+
+ abort_code = RXKADUNKNOWNKEY;
+ if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+ goto protocol_error;
+
+ /* extract the kerberos ticket and decrypt and decode it */
+ ticket = kmalloc(ticket_len, GFP_NOFS);
+ if (!ticket)
+ return -ENOMEM;
+
+ abort_code = RXKADPACKETSHORT;
+ if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+ goto protocol_error_free;
+
+ ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
+ &expiry, &abort_code);
+ if (ret < 0) {
+ *_abort_code = abort_code;
+ kfree(ticket);
+ return ret;
+ }
+
+ /* use the session key from inside the ticket to decrypt the
+ * response */
+ rxkad_decrypt_response(conn, &response, &session_key);
+
+ abort_code = RXKADSEALEDINCON;
+ if (response.encrypted.epoch != conn->epoch)
+ goto protocol_error_free;
+ if (response.encrypted.cid != conn->cid)
+ goto protocol_error_free;
+ if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
+ goto protocol_error_free;
+ csum = response.encrypted.checksum;
+ response.encrypted.checksum = 0;
+ rxkad_calc_response_checksum(&response);
+ if (response.encrypted.checksum != csum)
+ goto protocol_error_free;
+
+ if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
+ ntohl(response.encrypted.call_id[1]) > INT_MAX ||
+ ntohl(response.encrypted.call_id[2]) > INT_MAX ||
+ ntohl(response.encrypted.call_id[3]) > INT_MAX)
+ goto protocol_error_free;
+
+ abort_code = RXKADOUTOFSEQUENCE;
+ if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1))
+ goto protocol_error_free;
+
+ abort_code = RXKADLEVELFAIL;
+ level = ntohl(response.encrypted.level);
+ if (level > RXRPC_SECURITY_ENCRYPT)
+ goto protocol_error_free;
+ conn->security_level = level;
+
+ /* create a key to hold the security data and expiration time - after
+ * this the connection security can be handled in exactly the same way
+ * as for a client connection */
+ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
+ if (ret < 0) {
+ kfree(ticket);
+ return ret;
+ }
+
+ kfree(ticket);
+ _leave(" = 0");
+ return 0;
+
+protocol_error_free:
+ kfree(ticket);
+protocol_error:
+ *_abort_code = abort_code;
+ _leave(" = -EPROTO [%d]", abort_code);
+ return -EPROTO;
+}
+
+/*
+ * clear the connection security
+ */
+static void rxkad_clear(struct rxrpc_connection *conn)
+{
+ _enter("");
+
+ if (conn->cipher)
+ crypto_free_blkcipher(conn->cipher);
+}
+
+/*
+ * RxRPC Kerberos-based security
+ */
+static struct rxrpc_security rxkad = {
+ .owner = THIS_MODULE,
+ .name = "rxkad",
+ .security_index = RXKAD_VERSION,
+ .init_connection_security = rxkad_init_connection_security,
+ .prime_packet_security = rxkad_prime_packet_security,
+ .secure_packet = rxkad_secure_packet,
+ .verify_packet = rxkad_verify_packet,
+ .issue_challenge = rxkad_issue_challenge,
+ .respond_to_challenge = rxkad_respond_to_challenge,
+ .verify_response = rxkad_verify_response,
+ .clear = rxkad_clear,
+};
+
+static __init int rxkad_init(void)
+{
+ _enter("");
+
+ /* pin the cipher we need so that the crypto layer doesn't invoke
+ * keventd to go get it */
+ rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(rxkad_ci))
+ return PTR_ERR(rxkad_ci);
+
+ return rxrpc_register_security(&rxkad);
+}
+
+module_init(rxkad_init);
+
+static __exit void rxkad_exit(void)
+{
+ _enter("");
+
+ rxrpc_unregister_security(&rxkad);
+ crypto_free_blkcipher(rxkad_ci);
+}
+
+module_exit(rxkad_exit);
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
deleted file mode 100644
index 9896fd87a4d..00000000000
--- a/net/rxrpc/rxrpc_syms.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* rxrpc_syms.c: exported Rx RPC layer interface symbols
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/krxiod.h>
-
-/* call.c */
-EXPORT_SYMBOL(rxrpc_create_call);
-EXPORT_SYMBOL(rxrpc_put_call);
-EXPORT_SYMBOL(rxrpc_call_abort);
-EXPORT_SYMBOL(rxrpc_call_read_data);
-EXPORT_SYMBOL(rxrpc_call_write_data);
-
-/* connection.c */
-EXPORT_SYMBOL(rxrpc_create_connection);
-EXPORT_SYMBOL(rxrpc_put_connection);
-
-/* transport.c */
-EXPORT_SYMBOL(rxrpc_create_transport);
-EXPORT_SYMBOL(rxrpc_put_transport);
-EXPORT_SYMBOL(rxrpc_add_service);
-EXPORT_SYMBOL(rxrpc_del_service);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
deleted file mode 100644
index 884290754af..00000000000
--- a/net/rxrpc/sysctl.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/* sysctl.c: Rx RPC control
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-#include <rxrpc/types.h>
-#include <rxrpc/rxrpc.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-int rxrpc_ktrace;
-int rxrpc_kdebug;
-int rxrpc_kproto;
-int rxrpc_knet;
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *rxrpc_sysctl = NULL;
-
-static ctl_table rxrpc_sysctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "kdebug",
- .data = &rxrpc_kdebug,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 2,
- .procname = "ktrace",
- .data = &rxrpc_ktrace,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 3,
- .procname = "kproto",
- .data = &rxrpc_kproto,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 4,
- .procname = "knet",
- .data = &rxrpc_knet,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = 5,
- .procname = "peertimo",
- .data = &rxrpc_peer_timeout,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = &proc_doulongvec_minmax
- },
- {
- .ctl_name = 6,
- .procname = "conntimo",
- .data = &rxrpc_conn_timeout,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = &proc_doulongvec_minmax
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table rxrpc_dir_sysctl_table[] = {
- {
- .ctl_name = 1,
- .procname = "rxrpc",
- .maxlen = 0,
- .mode = 0555,
- .child = rxrpc_sysctl_table
- },
- { .ctl_name = 0 }
-};
-#endif /* CONFIG_SYSCTL */
-
-/*****************************************************************************/
-/*
- * initialise the sysctl stuff for Rx RPC
- */
-int rxrpc_sysctl_init(void)
-{
-#ifdef CONFIG_SYSCTL
- rxrpc_sysctl = register_sysctl_table(rxrpc_dir_sysctl_table);
- if (!rxrpc_sysctl)
- return -ENOMEM;
-#endif /* CONFIG_SYSCTL */
-
- return 0;
-} /* end rxrpc_sysctl_init() */
-
-/*****************************************************************************/
-/*
- * clean up the sysctl stuff for Rx RPC
- */
-void rxrpc_sysctl_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
- if (rxrpc_sysctl) {
- unregister_sysctl_table(rxrpc_sysctl);
- rxrpc_sysctl = NULL;
- }
-#endif /* CONFIG_SYSCTL */
-
-} /* end rxrpc_sysctl_cleanup() */
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
deleted file mode 100644
index 8e57be2df93..00000000000
--- a/net/rxrpc/transport.c
+++ /dev/null
@@ -1,846 +0,0 @@
-/* transport.c: Rx Transport routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-#include <linux/ipv6.h> /* this should _really_ be in errqueue.h.. */
-#endif
-#include <linux/errqueue.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-struct errormsg {
- struct cmsghdr cmsg; /* control message header */
- struct sock_extended_err ee; /* extended error information */
- struct sockaddr_in icmp_src; /* ICMP packet source address */
-};
-
-static DEFINE_SPINLOCK(rxrpc_transports_lock);
-static struct list_head rxrpc_transports = LIST_HEAD_INIT(rxrpc_transports);
-
-__RXACCT_DECL(atomic_t rxrpc_transport_count);
-LIST_HEAD(rxrpc_proc_transports);
-DECLARE_RWSEM(rxrpc_proc_transports_sem);
-
-static void rxrpc_data_ready(struct sock *sk, int count);
-static void rxrpc_error_report(struct sock *sk);
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
- struct list_head *msgq);
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans);
-
-/*****************************************************************************/
-/*
- * create a new transport endpoint using the specified UDP port
- */
-int rxrpc_create_transport(unsigned short port,
- struct rxrpc_transport **_trans)
-{
- struct rxrpc_transport *trans;
- struct sockaddr_in sin;
- mm_segment_t oldfs;
- struct sock *sock;
- int ret, opt;
-
- _enter("%hu", port);
-
- trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
- if (!trans)
- return -ENOMEM;
-
- atomic_set(&trans->usage, 1);
- INIT_LIST_HEAD(&trans->services);
- INIT_LIST_HEAD(&trans->link);
- INIT_LIST_HEAD(&trans->krxiodq_link);
- spin_lock_init(&trans->lock);
- INIT_LIST_HEAD(&trans->peer_active);
- INIT_LIST_HEAD(&trans->peer_graveyard);
- spin_lock_init(&trans->peer_gylock);
- init_waitqueue_head(&trans->peer_gy_waitq);
- rwlock_init(&trans->peer_lock);
- atomic_set(&trans->peer_count, 0);
- trans->port = port;
-
- /* create a UDP socket to be my actual transport endpoint */
- ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &trans->socket);
- if (ret < 0)
- goto error;
-
- /* use the specified port */
- if (port) {
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_port = htons(port);
- ret = trans->socket->ops->bind(trans->socket,
- (struct sockaddr *) &sin,
- sizeof(sin));
- if (ret < 0)
- goto error;
- }
-
- opt = 1;
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- ret = trans->socket->ops->setsockopt(trans->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- set_fs(oldfs);
-
- spin_lock(&rxrpc_transports_lock);
- list_add(&trans->link, &rxrpc_transports);
- spin_unlock(&rxrpc_transports_lock);
-
- /* set the socket up */
- sock = trans->socket->sk;
- sock->sk_user_data = trans;
- sock->sk_data_ready = rxrpc_data_ready;
- sock->sk_error_report = rxrpc_error_report;
-
- down_write(&rxrpc_proc_transports_sem);
- list_add_tail(&trans->proc_link, &rxrpc_proc_transports);
- up_write(&rxrpc_proc_transports_sem);
-
- __RXACCT(atomic_inc(&rxrpc_transport_count));
-
- *_trans = trans;
- _leave(" = 0 (%p)", trans);
- return 0;
-
- error:
- /* finish cleaning up the transport (not really needed here, but...) */
- if (trans->socket)
- trans->socket->ops->shutdown(trans->socket, 2);
-
- /* close the socket */
- if (trans->socket) {
- trans->socket->sk->sk_user_data = NULL;
- sock_release(trans->socket);
- trans->socket = NULL;
- }
-
- kfree(trans);
-
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_create_transport() */
-
-/*****************************************************************************/
-/*
- * destroy a transport endpoint
- */
-void rxrpc_put_transport(struct rxrpc_transport *trans)
-{
- _enter("%p{u=%d p=%hu}",
- trans, atomic_read(&trans->usage), trans->port);
-
- BUG_ON(atomic_read(&trans->usage) <= 0);
-
- /* to prevent a race, the decrement and the dequeue must be
- * effectively atomic */
- spin_lock(&rxrpc_transports_lock);
- if (likely(!atomic_dec_and_test(&trans->usage))) {
- spin_unlock(&rxrpc_transports_lock);
- _leave("");
- return;
- }
-
- list_del(&trans->link);
- spin_unlock(&rxrpc_transports_lock);
-
- /* finish cleaning up the transport */
- if (trans->socket)
- trans->socket->ops->shutdown(trans->socket, 2);
-
- rxrpc_krxsecd_clear_transport(trans);
- rxrpc_krxiod_dequeue_transport(trans);
-
- /* discard all peer information */
- rxrpc_peer_clearall(trans);
-
- down_write(&rxrpc_proc_transports_sem);
- list_del(&trans->proc_link);
- up_write(&rxrpc_proc_transports_sem);
- __RXACCT(atomic_dec(&rxrpc_transport_count));
-
- /* close the socket */
- if (trans->socket) {
- trans->socket->sk->sk_user_data = NULL;
- sock_release(trans->socket);
- trans->socket = NULL;
- }
-
- kfree(trans);
-
- _leave("");
-} /* end rxrpc_put_transport() */
-
-/*****************************************************************************/
-/*
- * add a service to a transport to be listened upon
- */
-int rxrpc_add_service(struct rxrpc_transport *trans,
- struct rxrpc_service *newsrv)
-{
- struct rxrpc_service *srv;
- struct list_head *_p;
- int ret = -EEXIST;
-
- _enter("%p{%hu},%p{%hu}",
- trans, trans->port, newsrv, newsrv->service_id);
-
- /* verify that the service ID is not already present */
- spin_lock(&trans->lock);
-
- list_for_each(_p, &trans->services) {
- srv = list_entry(_p, struct rxrpc_service, link);
- if (srv->service_id == newsrv->service_id)
- goto out;
- }
-
- /* okay - add the transport to the list */
- list_add_tail(&newsrv->link, &trans->services);
- rxrpc_get_transport(trans);
- ret = 0;
-
- out:
- spin_unlock(&trans->lock);
-
- _leave("= %d", ret);
- return ret;
-} /* end rxrpc_add_service() */
-
-/*****************************************************************************/
-/*
- * remove a service from a transport
- */
-void rxrpc_del_service(struct rxrpc_transport *trans, struct rxrpc_service *srv)
-{
- _enter("%p{%hu},%p{%hu}", trans, trans->port, srv, srv->service_id);
-
- spin_lock(&trans->lock);
- list_del(&srv->link);
- spin_unlock(&trans->lock);
-
- rxrpc_put_transport(trans);
-
- _leave("");
-} /* end rxrpc_del_service() */
-
-/*****************************************************************************/
-/*
- * INET callback when data has been received on the socket.
- */
-static void rxrpc_data_ready(struct sock *sk, int count)
-{
- struct rxrpc_transport *trans;
-
- _enter("%p{t=%p},%d", sk, sk->sk_user_data, count);
-
- /* queue the transport for attention by krxiod */
- trans = (struct rxrpc_transport *) sk->sk_user_data;
- if (trans)
- rxrpc_krxiod_queue_transport(trans);
-
- /* wake up anyone waiting on the socket */
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
-
- _leave("");
-} /* end rxrpc_data_ready() */
-
-/*****************************************************************************/
-/*
- * INET callback when an ICMP error packet is received
- * - sk->err is error (EHOSTUNREACH, EPROTO or EMSGSIZE)
- */
-static void rxrpc_error_report(struct sock *sk)
-{
- struct rxrpc_transport *trans;
-
- _enter("%p{t=%p}", sk, sk->sk_user_data);
-
- /* queue the transport for attention by krxiod */
- trans = (struct rxrpc_transport *) sk->sk_user_data;
- if (trans) {
- trans->error_rcvd = 1;
- rxrpc_krxiod_queue_transport(trans);
- }
-
- /* wake up anyone waiting on the socket */
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
-
- _leave("");
-} /* end rxrpc_error_report() */
-
-/*****************************************************************************/
-/*
- * split a message up, allocating message records and filling them in
- * from the contents of a socket buffer
- */
-static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
- struct sk_buff *pkt,
- struct list_head *msgq)
-{
- struct rxrpc_message *msg;
- int ret;
-
- _enter("");
-
- msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
- if (!msg) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
- }
-
- atomic_set(&msg->usage, 1);
- list_add_tail(&msg->link,msgq);
-
- /* dig out the Rx routing parameters */
- if (skb_copy_bits(pkt, sizeof(struct udphdr),
- &msg->hdr, sizeof(msg->hdr)) < 0) {
- ret = -EBADMSG;
- goto error;
- }
-
- msg->trans = trans;
- msg->state = RXRPC_MSG_RECEIVED;
- skb_get_timestamp(pkt, &msg->stamp);
- if (msg->stamp.tv_sec == 0) {
- do_gettimeofday(&msg->stamp);
- if (pkt->sk)
- sock_enable_timestamp(pkt->sk);
- }
- msg->seq = ntohl(msg->hdr.seq);
-
- /* attach the packet */
- skb_get(pkt);
- msg->pkt = pkt;
-
- msg->offset = sizeof(struct udphdr) + sizeof(struct rxrpc_header);
- msg->dsize = msg->pkt->len - msg->offset;
-
- _net("Rx Received packet from %s (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
- msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
- ntohl(msg->hdr.epoch),
- (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
- ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
- ntohl(msg->hdr.callNumber),
- rxrpc_pkts[msg->hdr.type],
- msg->hdr.flags,
- ntohs(msg->hdr.serviceId),
- msg->hdr.securityIndex);
-
- __RXACCT(atomic_inc(&rxrpc_message_count));
-
- /* split off jumbo packets */
- while (msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- msg->hdr.flags & RXRPC_JUMBO_PACKET
- ) {
- struct rxrpc_jumbo_header jumbo;
- struct rxrpc_message *jumbomsg = msg;
-
- _debug("split jumbo packet");
-
- /* quick sanity check */
- ret = -EBADMSG;
- if (msg->dsize <
- RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
- goto error;
- if (msg->hdr.flags & RXRPC_LAST_PACKET)
- goto error;
-
- /* dig out the secondary header */
- if (skb_copy_bits(pkt, msg->offset + RXRPC_JUMBO_DATALEN,
- &jumbo, sizeof(jumbo)) < 0)
- goto error;
-
- /* allocate a new message record */
- ret = -ENOMEM;
- msg = kmemdup(jumbomsg, sizeof(struct rxrpc_message), GFP_KERNEL);
- if (!msg)
- goto error;
-
- list_add_tail(&msg->link, msgq);
-
- /* adjust the jumbo packet */
- jumbomsg->dsize = RXRPC_JUMBO_DATALEN;
-
- /* attach the packet here too */
- skb_get(pkt);
-
- /* adjust the parameters */
- msg->seq++;
- msg->hdr.seq = htonl(msg->seq);
- msg->hdr.serial = htonl(ntohl(msg->hdr.serial) + 1);
- msg->offset += RXRPC_JUMBO_DATALEN +
- sizeof(struct rxrpc_jumbo_header);
- msg->dsize -= RXRPC_JUMBO_DATALEN +
- sizeof(struct rxrpc_jumbo_header);
- msg->hdr.flags = jumbo.flags;
- msg->hdr._rsvd = jumbo._rsvd;
-
- _net("Rx Split jumbo packet from %s"
- " (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
- msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
- ntohl(msg->hdr.epoch),
- (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
- ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
- ntohl(msg->hdr.callNumber),
- rxrpc_pkts[msg->hdr.type],
- msg->hdr.flags,
- ntohs(msg->hdr.serviceId),
- msg->hdr.securityIndex);
-
- __RXACCT(atomic_inc(&rxrpc_message_count));
- }
-
- _leave(" = 0 #%d", atomic_read(&rxrpc_message_count));
- return 0;
-
- error:
- while (!list_empty(msgq)) {
- msg = list_entry(msgq->next, struct rxrpc_message, link);
- list_del_init(&msg->link);
-
- rxrpc_put_message(msg);
- }
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_incoming_msg() */
-
-/*****************************************************************************/
-/*
- * accept a new call
- * - called from krxiod in process context
- */
-void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
-{
- struct rxrpc_message *msg;
- struct rxrpc_peer *peer;
- struct sk_buff *pkt;
- int ret;
- __be32 addr;
- __be16 port;
-
- LIST_HEAD(msgq);
-
- _enter("%p{%d}", trans, trans->port);
-
- for (;;) {
- /* deal with outstanting errors first */
- if (trans->error_rcvd)
- rxrpc_trans_receive_error_report(trans);
-
- /* attempt to receive a packet */
- pkt = skb_recv_datagram(trans->socket->sk, 0, 1, &ret);
- if (!pkt) {
- if (ret == -EAGAIN) {
- _leave(" EAGAIN");
- return;
- }
-
- /* an icmp error may have occurred */
- rxrpc_krxiod_queue_transport(trans);
- _leave(" error %d\n", ret);
- return;
- }
-
- /* we'll probably need to checksum it (didn't call
- * sock_recvmsg) */
- if (skb_checksum_complete(pkt)) {
- kfree_skb(pkt);
- rxrpc_krxiod_queue_transport(trans);
- _leave(" CSUM failed");
- return;
- }
-
- addr = pkt->nh.iph->saddr;
- port = pkt->h.uh->source;
-
- _net("Rx Received UDP packet from %08x:%04hu",
- ntohl(addr), ntohs(port));
-
- /* unmarshall the Rx parameters and split jumbo packets */
- ret = rxrpc_incoming_msg(trans, pkt, &msgq);
- if (ret < 0) {
- kfree_skb(pkt);
- rxrpc_krxiod_queue_transport(trans);
- _leave(" bad packet");
- return;
- }
-
- BUG_ON(list_empty(&msgq));
-
- msg = list_entry(msgq.next, struct rxrpc_message, link);
-
- /* locate the record for the peer from which it
- * originated */
- ret = rxrpc_peer_lookup(trans, addr, &peer);
- if (ret < 0) {
- kdebug("Rx No connections from that peer");
- rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
- goto finished_msg;
- }
-
- /* try and find a matching connection */
- ret = rxrpc_connection_lookup(peer, msg, &msg->conn);
- if (ret < 0) {
- kdebug("Rx Unknown Connection");
- rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
- rxrpc_put_peer(peer);
- goto finished_msg;
- }
- rxrpc_put_peer(peer);
-
- /* deal with the first packet of a new call */
- if (msg->hdr.flags & RXRPC_CLIENT_INITIATED &&
- msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- ntohl(msg->hdr.seq) == 1
- ) {
- _debug("Rx New server call");
- rxrpc_trans_receive_new_call(trans, &msgq);
- goto finished_msg;
- }
-
- /* deal with subsequent packet(s) of call */
- _debug("Rx Call packet");
- while (!list_empty(&msgq)) {
- msg = list_entry(msgq.next, struct rxrpc_message, link);
- list_del_init(&msg->link);
-
- ret = rxrpc_conn_receive_call_packet(msg->conn, NULL, msg);
- if (ret < 0) {
- rxrpc_trans_immediate_abort(trans, msg, ret);
- rxrpc_put_message(msg);
- goto finished_msg;
- }
-
- rxrpc_put_message(msg);
- }
-
- goto finished_msg;
-
- /* dispose of the packets */
- finished_msg:
- while (!list_empty(&msgq)) {
- msg = list_entry(msgq.next, struct rxrpc_message, link);
- list_del_init(&msg->link);
-
- rxrpc_put_message(msg);
- }
- kfree_skb(pkt);
- }
-
- _leave("");
-
-} /* end rxrpc_trans_receive_packet() */
-
-/*****************************************************************************/
-/*
- * accept a new call from a client trying to connect to one of my services
- * - called in process context
- */
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
- struct list_head *msgq)
-{
- struct rxrpc_message *msg;
-
- _enter("");
-
- /* only bother with the first packet */
- msg = list_entry(msgq->next, struct rxrpc_message, link);
- list_del_init(&msg->link);
- rxrpc_krxsecd_queue_incoming_call(msg);
- rxrpc_put_message(msg);
-
- _leave(" = 0");
-
- return 0;
-} /* end rxrpc_trans_receive_new_call() */
-
-/*****************************************************************************/
-/*
- * perform an immediate abort without connection or call structures
- */
-int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
- struct rxrpc_message *msg,
- int error)
-{
- struct rxrpc_header ahdr;
- struct sockaddr_in sin;
- struct msghdr msghdr;
- struct kvec iov[2];
- __be32 _error;
- int len, ret;
-
- _enter("%p,%p,%d", trans, msg, error);
-
- /* don't abort an abort packet */
- if (msg->hdr.type == RXRPC_PACKET_TYPE_ABORT) {
- _leave(" = 0");
- return 0;
- }
-
- _error = htonl(-error);
-
- /* set up the message to be transmitted */
- memcpy(&ahdr, &msg->hdr, sizeof(ahdr));
- ahdr.epoch = msg->hdr.epoch;
- ahdr.serial = htonl(1);
- ahdr.seq = 0;
- ahdr.type = RXRPC_PACKET_TYPE_ABORT;
- ahdr.flags = RXRPC_LAST_PACKET;
- ahdr.flags |= ~msg->hdr.flags & RXRPC_CLIENT_INITIATED;
-
- iov[0].iov_len = sizeof(ahdr);
- iov[0].iov_base = &ahdr;
- iov[1].iov_len = sizeof(_error);
- iov[1].iov_base = &_error;
-
- len = sizeof(ahdr) + sizeof(_error);
-
- memset(&sin,0,sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_port = msg->pkt->h.uh->source;
- sin.sin_addr.s_addr = msg->pkt->nh.iph->saddr;
-
- msghdr.msg_name = &sin;
- msghdr.msg_namelen = sizeof(sin);
- msghdr.msg_control = NULL;
- msghdr.msg_controllen = 0;
- msghdr.msg_flags = MSG_DONTWAIT;
-
- _net("Sending message type %d of %d bytes to %08x:%d",
- ahdr.type,
- len,
- ntohl(sin.sin_addr.s_addr),
- ntohs(sin.sin_port));
-
- /* send the message */
- ret = kernel_sendmsg(trans->socket, &msghdr, iov, 2, len);
-
- _leave(" = %d", ret);
- return ret;
-} /* end rxrpc_trans_immediate_abort() */
-
-/*****************************************************************************/
-/*
- * receive an ICMP error report and percolate it to all connections
- * heading to the affected host or port
- */
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans)
-{
- struct rxrpc_connection *conn;
- struct sockaddr_in sin;
- struct rxrpc_peer *peer;
- struct list_head connq, *_p;
- struct errormsg emsg;
- struct msghdr msg;
- __be16 port;
- int local, err;
-
- _enter("%p", trans);
-
- for (;;) {
- trans->error_rcvd = 0;
-
- /* try and receive an error message */
- msg.msg_name = &sin;
- msg.msg_namelen = sizeof(sin);
- msg.msg_control = &emsg;
- msg.msg_controllen = sizeof(emsg);
- msg.msg_flags = 0;
-
- err = kernel_recvmsg(trans->socket, &msg, NULL, 0, 0,
- MSG_ERRQUEUE | MSG_DONTWAIT | MSG_TRUNC);
-
- if (err == -EAGAIN) {
- _leave("");
- return;
- }
-
- if (err < 0) {
- printk("%s: unable to recv an error report: %d\n",
- __FUNCTION__, err);
- _leave("");
- return;
- }
-
- msg.msg_controllen = (char *) msg.msg_control - (char *) &emsg;
-
- if (msg.msg_controllen < sizeof(emsg.cmsg) ||
- msg.msg_namelen < sizeof(sin)) {
- printk("%s: short control message"
- " (nlen=%u clen=%Zu fl=%x)\n",
- __FUNCTION__,
- msg.msg_namelen,
- msg.msg_controllen,
- msg.msg_flags);
- continue;
- }
-
- _net("Rx Received control message"
- " { len=%Zu level=%u type=%u }",
- emsg.cmsg.cmsg_len,
- emsg.cmsg.cmsg_level,
- emsg.cmsg.cmsg_type);
-
- if (sin.sin_family != AF_INET) {
- printk("Rx Ignoring error report with non-INET address"
- " (fam=%u)",
- sin.sin_family);
- continue;
- }
-
- _net("Rx Received message pertaining to host addr=%x port=%hu",
- ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-
- if (emsg.cmsg.cmsg_level != SOL_IP ||
- emsg.cmsg.cmsg_type != IP_RECVERR) {
- printk("Rx Ignoring unknown error report"
- " { level=%u type=%u }",
- emsg.cmsg.cmsg_level,
- emsg.cmsg.cmsg_type);
- continue;
- }
-
- if (msg.msg_controllen < sizeof(emsg.cmsg) + sizeof(emsg.ee)) {
- printk("%s: short error message (%Zu)\n",
- __FUNCTION__, msg.msg_controllen);
- _leave("");
- return;
- }
-
- port = sin.sin_port;
-
- switch (emsg.ee.ee_origin) {
- case SO_EE_ORIGIN_ICMP:
- local = 0;
- switch (emsg.ee.ee_type) {
- case ICMP_DEST_UNREACH:
- switch (emsg.ee.ee_code) {
- case ICMP_NET_UNREACH:
- _net("Rx Received ICMP Network Unreachable");
- port = 0;
- err = -ENETUNREACH;
- break;
- case ICMP_HOST_UNREACH:
- _net("Rx Received ICMP Host Unreachable");
- port = 0;
- err = -EHOSTUNREACH;
- break;
- case ICMP_PORT_UNREACH:
- _net("Rx Received ICMP Port Unreachable");
- err = -ECONNREFUSED;
- break;
- case ICMP_NET_UNKNOWN:
- _net("Rx Received ICMP Unknown Network");
- port = 0;
- err = -ENETUNREACH;
- break;
- case ICMP_HOST_UNKNOWN:
- _net("Rx Received ICMP Unknown Host");
- port = 0;
- err = -EHOSTUNREACH;
- break;
- default:
- _net("Rx Received ICMP DestUnreach { code=%u }",
- emsg.ee.ee_code);
- err = emsg.ee.ee_errno;
- break;
- }
- break;
-
- case ICMP_TIME_EXCEEDED:
- _net("Rx Received ICMP TTL Exceeded");
- err = emsg.ee.ee_errno;
- break;
-
- default:
- _proto("Rx Received ICMP error { type=%u code=%u }",
- emsg.ee.ee_type, emsg.ee.ee_code);
- err = emsg.ee.ee_errno;
- break;
- }
- break;
-
- case SO_EE_ORIGIN_LOCAL:
- _proto("Rx Received local error { error=%d }",
- emsg.ee.ee_errno);
- local = 1;
- err = emsg.ee.ee_errno;
- break;
-
- case SO_EE_ORIGIN_NONE:
- case SO_EE_ORIGIN_ICMP6:
- default:
- _proto("Rx Received error report { orig=%u }",
- emsg.ee.ee_origin);
- local = 0;
- err = emsg.ee.ee_errno;
- break;
- }
-
- /* find all the connections between this transport and the
- * affected destination */
- INIT_LIST_HEAD(&connq);
-
- if (rxrpc_peer_lookup(trans, sin.sin_addr.s_addr,
- &peer) == 0) {
- read_lock(&peer->conn_lock);
- list_for_each(_p, &peer->conn_active) {
- conn = list_entry(_p, struct rxrpc_connection,
- link);
- if (port && conn->addr.sin_port != port)
- continue;
- if (!list_empty(&conn->err_link))
- continue;
-
- rxrpc_get_connection(conn);
- list_add_tail(&conn->err_link, &connq);
- }
- read_unlock(&peer->conn_lock);
-
- /* service all those connections */
- while (!list_empty(&connq)) {
- conn = list_entry(connq.next,
- struct rxrpc_connection,
- err_link);
- list_del(&conn->err_link);
-
- rxrpc_conn_handle_error(conn, local, err);
-
- rxrpc_put_connection(conn);
- }
-
- rxrpc_put_peer(peer);
- }
- }
-
- _leave("");
- return;
-} /* end rxrpc_trans_receive_error_report() */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f4544dd8647..475df8449be 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -46,62 +46,6 @@ config NET_SCH_FIFO
if NET_SCHED
-choice
- prompt "Packet scheduler clock source"
- default NET_SCH_CLK_GETTIMEOFDAY
- ---help---
- Packet schedulers need a monotonic clock that increments at a static
- rate. The kernel provides several suitable interfaces, each with
- different properties:
-
- - high resolution (us or better)
- - fast to read (minimal locking, no i/o access)
- - synchronized on all processors
- - handles cpu clock frequency changes
-
- but nothing provides all of the above.
-
-config NET_SCH_CLK_JIFFIES
- bool "Timer interrupt"
- ---help---
- Say Y here if you want to use the timer interrupt (jiffies) as clock
- source. This clock source is fast, synchronized on all processors and
- handles cpu clock frequency changes, but its resolution is too low
- for accurate shaping except at very low speed.
-
-config NET_SCH_CLK_GETTIMEOFDAY
- bool "gettimeofday"
- ---help---
- Say Y here if you want to use gettimeofday as clock source. This clock
- source has high resolution, is synchronized on all processors and
- handles cpu clock frequency changes, but it is slow.
-
- Choose this if you need a high resolution clock source but can't use
- the CPU's cycle counter.
-
-# don't allow on SMP x86 because they can have unsynchronized TSCs.
-# gettimeofday is a good alternative
-config NET_SCH_CLK_CPU
- bool "CPU cycle counter"
- depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
- ---help---
- Say Y here if you want to use the CPU's cycle counter as clock source.
- This is a cheap and high resolution clock source, but on some
- architectures it is not synchronized on all processors and doesn't
- handle cpu clock frequency changes.
-
- The useable cycle counters are:
-
- x86/x86_64 - Timestamp Counter
- alpha - Cycle Counter
- sparc64 - %ticks register
- ppc64 - Time base
- ia64 - Interval Time Counter
-
- Choose this if your CPU's cycle counter is working properly.
-
-endchoice
-
comment "Queueing/Scheduling"
config NET_SCH_CBQ
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cb21617a567..711dd26c95c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -25,12 +25,12 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <net/sock.h>
#include <net/sch_generic.h>
#include <net/act_api.h>
+#include <net/netlink.h>
void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
{
@@ -93,15 +93,15 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
continue;
a->priv = p;
a->order = n_i;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, a->order, 0, NULL);
err = tcf_action_dump_1(skb, a, 0, 0);
if (err < 0) {
index--;
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
n_i++;
if (n_i >= TCA_ACT_MAX_PRIO)
goto done;
@@ -114,7 +114,7 @@ done:
return n_i;
rtattr_failure:
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
@@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
struct rtattr *r ;
int i= 0, n_i = 0;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, a->order, 0, NULL);
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
for (i = 0; i < (hinfo->hmask + 1); i++) {
@@ -140,11 +140,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
}
}
RTA_PUT(skb, TCA_FCNT, 4, &n_i);
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
return n_i;
rtattr_failure:
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
return -EINVAL;
}
@@ -423,7 +423,7 @@ int
tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
int err = -EINVAL;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *r;
if (a->ops == NULL || a->ops->dump == NULL)
@@ -432,15 +432,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
if (tcf_action_copy_stats(skb, a, 0))
goto rtattr_failure;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
return err;
}
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *r ;
while ((a = act) != NULL) {
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
act = a->next;
RTA_PUT(skb, a->order, 0, NULL);
err = tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
}
return 0;
@@ -467,7 +467,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
rtattr_failure:
err = -EINVAL;
errout:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return err;
}
@@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
{
struct tcamsg *t;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *x;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
@@ -645,20 +645,20 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr*) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
if (tcf_action_dump(skb, a, bind, ref) < 0)
goto rtattr_failure;
- x->rta_len = skb->tail - (u8*)x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
return -ENOBUFS;
}
- b = (unsigned char *)skb->tail;
+ b = skb_tail_pointer(skb);
if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
goto err_out;
@@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr *) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
if (err < 0)
goto rtattr_failure;
- x->rta_len = skb->tail - (u8 *) x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
module_put(a->ops->owner);
kfree(a);
@@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
if (!skb)
return -ENOBUFS;
- b = (unsigned char *)skb->tail;
+ b = skb_tail_pointer(skb);
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
t = NLMSG_DATA(nlh);
@@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr*) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
if (tcf_action_dump(skb, a, 0, 0) < 0)
goto rtattr_failure;
- x->rta_len = skb->tail - (u8*)x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
NETLINK_CB(skb).dst_group = RTNLGRP_TC;
err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
@@ -1015,7 +1015,7 @@ static int
tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *x;
struct tc_action_ops *a_o;
struct tc_action a;
@@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
t->tca__pad1 = 0;
t->tca__pad2 = 0;
- x = (struct rtattr *) skb->tail;
+ x = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
@@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
goto rtattr_failure;
if (ret > 0) {
- x->rta_len = skb->tail - (u8 *) x;
+ x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
ret = skb->len;
} else
- skb_trim(skb, (u8*)x - skb->data);
+ nlmsg_trim(skb, x);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
if (NETLINK_CB(cb->skb).pid && ret)
nlh->nlmsg_flags |= NLM_F_MULTI;
module_put(a_o->owner);
@@ -1070,20 +1070,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
rtattr_failure:
nlmsg_failure:
module_put(a_o->owner);
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return skb->len;
}
static int __init tc_action_init(void)
{
- struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
-
- if (link_p) {
- link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
- link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
- link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
- link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
- }
+ rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
return 0;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 87d0faf3286..7517f379154 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,6 +28,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_gact.h>
@@ -155,7 +156,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_gact opt;
struct tcf_gact *gact = a->priv;
struct tcf_t t;
@@ -181,7 +182,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 47f0b132423..00b05f422d4 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,7 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/kmod.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_ipt.h>
@@ -245,7 +246,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_ipt *ipt = a->priv;
struct ipt_entry_target *t;
struct tcf_t tm;
@@ -277,7 +278,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
kfree(t);
return -1;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3e93683e9ab..de21c92faaa 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_mirred.h>
@@ -206,7 +207,7 @@ bad_mirred:
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = a->priv;
struct tc_mirred opt;
struct tcf_t t;
@@ -225,7 +226,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3d6a2fcc9ce..45b3cda86a2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
@@ -136,7 +137,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
}
}
- pptr = skb->nh.raw;
+ pptr = skb_network_header(skb);
spin_lock(&p->tcf_lock);
@@ -195,7 +196,7 @@ done:
static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_pedit *p = a->priv;
struct tc_pedit *opt;
struct tcf_t t;
@@ -226,7 +227,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
kfree(opt);
return -1;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 10a5a5c36f7..616f465f407 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -30,6 +30,7 @@
#include <linux/init.h>
#include <net/sock.h>
#include <net/act_api.h>
+#include <net/netlink.h>
#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
@@ -80,7 +81,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
continue;
a->priv = p;
a->order = index;
- r = (struct rtattr*) skb->tail;
+ r = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, a->order, 0, NULL);
if (type == RTM_DELACTION)
err = tcf_action_dump_1(skb, a, 0, 1);
@@ -88,10 +89,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
err = tcf_action_dump_1(skb, a, 0, 0);
if (err < 0) {
index--;
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
- r->rta_len = skb->tail - (u8*)r;
+ r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
n_i++;
}
}
@@ -102,7 +103,7 @@ done:
return n_i;
rtattr_failure:
- skb_trim(skb, (u8*)r - skb->data);
+ nlmsg_trim(skb, r);
goto done;
}
#endif
@@ -240,7 +241,7 @@ override:
if (ret != ACT_P_CREATED)
return ret;
- PSCHED_GET_TIME(police->tcfp_t_c);
+ police->tcfp_t_c = psched_get_time();
police->tcf_index = parm->index ? parm->index :
tcf_hash_new_index(&police_idx_gen, &police_hash_info);
h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -295,10 +296,9 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
return police->tcfp_result;
}
- PSCHED_GET_TIME(now);
-
- toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
- police->tcfp_burst);
+ now = psched_get_time();
+ toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+ police->tcfp_burst);
if (police->tcfp_P_tab) {
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
static int
tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_police *police = a->priv;
struct tc_police opt;
@@ -355,7 +355,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -494,7 +494,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
}
if (police->tcfp_P_tab)
police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
- PSCHED_GET_TIME(police->tcfp_t_c);
+ police->tcfp_t_c = psched_get_time();
police->tcf_index = parm->index ? parm->index :
tcf_police_new_index();
police->tcf_action = parm->action;
@@ -542,9 +542,9 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
return police->tcfp_result;
}
- PSCHED_GET_TIME(now);
- toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
- police->tcfp_burst);
+ now = psched_get_time();
+ toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+ police->tcfp_burst);
if (police->tcfp_P_tab) {
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police);
int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_police opt;
opt.index = police->tcf_index;
@@ -598,7 +598,7 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index c7971182af0..36e1edad599 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -16,6 +16,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#define TCA_ACT_SIMP 22
@@ -155,7 +156,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tcf_defact *d = a->priv;
struct tc_defact opt;
struct tcf_t t;
@@ -173,7 +174,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5c6ffdb77d2..ebf94edf047 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,9 +29,10 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/kmod.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -323,7 +324,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
@@ -340,12 +341,12 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
goto rtattr_failure;
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -399,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
return skb->len;
- read_lock(&qdisc_tree_lock);
if (!tcm->tcm_parent)
q = dev->qdisc_sleeping;
else
@@ -456,7 +456,6 @@ errout:
if (cl)
cops->put(q, cl);
out:
- read_unlock(&qdisc_tree_lock);
dev_put(dev);
return skb->len;
}
@@ -563,30 +562,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
- struct rtattr * p_rta = (struct rtattr*) skb->tail;
+ struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
if (exts->action->type != TCA_OLD_COMPAT) {
RTA_PUT(skb, map->action, 0, NULL);
if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
goto rtattr_failure;
- p_rta->rta_len = skb->tail - (u8*)p_rta;
+ p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
} else if (map->police) {
RTA_PUT(skb, map->police, 0, NULL);
if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
goto rtattr_failure;
- p_rta->rta_len = skb->tail - (u8*)p_rta;
+ p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
}
}
#elif defined CONFIG_NET_CLS_POLICE
if (map->police && exts->police) {
- struct rtattr * p_rta = (struct rtattr*) skb->tail;
+ struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, map->police, 0, NULL);
if (tcf_police_dump(skb, exts->police) < 0)
goto rtattr_failure;
- p_rta->rta_len = skb->tail - (u8*)p_rta;
+ p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
}
#endif
return 0;
@@ -614,18 +613,11 @@ rtattr_failure: __attribute__ ((unused))
static int __init tc_filter_init(void)
{
- struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
+ tc_dump_tfilter);
- /* Setup rtnetlink links. It is made here to avoid
- exporting large number of public symbols.
- */
-
- if (link_p) {
- link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
- link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
- link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
- link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
- }
return 0;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4a91f082a81..c885412d79d 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -245,7 +246,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct basic_filter *f = (struct basic_filter *) fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (f == NULL)
@@ -263,11 +264,11 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto rtattr_failure;
- rta->rta_len = (skb->tail - b);
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5dbb9d451f7..bbec4a0d4dc 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -38,6 +38,7 @@
#include <linux/notifier.h>
#include <linux/netfilter.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -348,7 +349,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
{
struct fw_head *head = (struct fw_head *)tp->root;
struct fw_filter *f = (struct fw_filter*)fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (f == NULL)
@@ -374,7 +375,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
goto rtattr_failure;
@@ -382,7 +383,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index abc47cc48ad..cc941d0ee3a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -28,6 +28,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -88,9 +89,9 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
static inline
void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
{
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
memset(head->fastmap, 0, sizeof(head->fastmap));
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
static inline void
@@ -562,7 +563,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct route4_filter *f = (struct route4_filter*)fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
u32 id;
@@ -591,7 +592,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
goto rtattr_failure;
@@ -599,7 +600,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index 1d4a1fb1760..0a683c07c64 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -31,6 +31,7 @@
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 7853621a04c..22f9ede70e8 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,9 +143,9 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
u8 tunnelid = 0;
u8 *xprt;
#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr = skb->nh.ipv6h;
+ struct ipv6hdr *nhptr = ipv6_hdr(skb);
#else
- struct iphdr *nhptr = skb->nh.iph;
+ struct iphdr *nhptr = ip_hdr(skb);
#endif
restart:
@@ -160,7 +160,7 @@ restart:
dst = &nhptr->daddr;
protocol = nhptr->protocol;
xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
- if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
+ if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
return -1;
#endif
@@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
{
struct rsvp_filter *f = (struct rsvp_filter*)fh;
struct rsvp_session *s;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_rsvp_pinfo pinfo;
@@ -623,14 +623,14 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
goto rtattr_failure;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index a2979d89798..93b6abed57d 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -34,6 +34,7 @@
#include <net/sock.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/netlink.h>
#define RSVP_DST_LEN 4
#define RSVP_ID "rsvp6"
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7563fdcef4b..47ac0c55642 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/act_api.h>
+#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/route.h>
@@ -448,7 +449,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
{
struct tcindex_data *p = PRIV(tp);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -463,7 +464,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
&p->fall_through);
- rta->rta_len = skb->tail-b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
} else {
if (p->perfect) {
t->tcm_handle = r-p->perfect;
@@ -486,7 +487,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail-b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
goto rtattr_failure;
@@ -495,7 +496,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0bcb16928d2..c7a347bd6d7 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -50,6 +50,7 @@
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -119,7 +120,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
} stack[TC_U32_MAXDEPTH];
struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
- u8 *ptr = skb->nh.raw;
+ u8 *ptr = skb_network_header(skb);
struct tc_u_knode *n;
int sdepth = 0;
int off2 = 0;
@@ -213,7 +214,7 @@ check_terminal:
off2 = 0;
}
- if (ptr < skb->tail)
+ if (ptr < skb_tail_pointer(skb))
goto next_ht;
}
@@ -435,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp)
BUG_TRAP(ht->refcnt == 0);
kfree(ht);
- };
+ }
kfree(tp_c);
}
@@ -718,7 +719,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
struct tc_u_knode *n = (struct tc_u_knode*)fh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (n == NULL)
@@ -765,14 +766,14 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
#endif
}
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
if (TC_U32_KEY(n->handle))
if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
goto rtattr_failure;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index cd0600c6796..0a2a7fe08de 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
struct tcf_pkt_info *info)
{
struct tc_u32_key *key = (struct tc_u32_key *) em->data;
- unsigned char *ptr = skb->nh.raw;
+ const unsigned char *ptr = skb_network_header(skb);
if (info) {
if (info->ptr)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 959c306c571..63146d339d8 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
{
int i;
- struct rtattr * top_start = (struct rtattr*) skb->tail;
- struct rtattr * list_start;
+ u8 *tail;
+ struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb);
+ struct rtattr *list_start;
RTA_PUT(skb, tlv, 0, NULL);
RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
- list_start = (struct rtattr *) skb->tail;
+ list_start = (struct rtattr *)skb_tail_pointer(skb);
RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+ tail = skb_tail_pointer(skb);
for (i = 0; i < tree->hdr.nmatches; i++) {
- struct rtattr *match_start = (struct rtattr*) skb->tail;
+ struct rtattr *match_start = (struct rtattr *)tail;
struct tcf_ematch *em = tcf_em_get_match(tree, i);
struct tcf_ematch_hdr em_hdr = {
.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
} else if (em->datalen > 0)
RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
- match_start->rta_len = skb->tail - (u8*) match_start;
+ tail = skb_tail_pointer(skb);
+ match_start->rta_len = tail - (u8 *)match_start;
}
- list_start->rta_len = skb->tail - (u8 *) list_start;
- top_start->rta_len = skb->tail - (u8 *) top_start;
+ list_start->rta_len = tail - (u8 *)list_start;
+ top_start->rta_len = tail - (u8 *)top_start;
return 0;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ecc988af4a9..bec600af03c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,14 +27,15 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -190,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
(root qdisc, all its children, children of children etc.)
*/
-static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
@@ -201,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
return NULL;
}
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
- struct Qdisc *q;
-
- read_lock(&qdisc_tree_lock);
- q = __qdisc_lookup(dev, handle);
- read_unlock(&qdisc_tree_lock);
- return q;
-}
-
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -291,6 +282,48 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
}
}
+static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
+{
+ struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
+ timer);
+ struct net_device *dev = wd->qdisc->dev;
+
+ wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+ smp_wmb();
+ if (spin_trylock(&dev->queue_lock)) {
+ qdisc_run(dev);
+ spin_unlock(&dev->queue_lock);
+ } else
+ netif_schedule(dev);
+
+ return HRTIMER_NORESTART;
+}
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+ hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ wd->timer.function = qdisc_watchdog;
+ wd->qdisc = qdisc;
+}
+EXPORT_SYMBOL(qdisc_watchdog_init);
+
+void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+{
+ ktime_t time;
+
+ wd->qdisc->flags |= TCQ_F_THROTTLED;
+ time = ktime_set(0, 0);
+ time = ktime_add_ns(time, PSCHED_US2NS(expires));
+ hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+}
+EXPORT_SYMBOL(qdisc_watchdog_schedule);
+
+void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
+{
+ hrtimer_cancel(&wd->timer);
+ wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+}
+EXPORT_SYMBOL(qdisc_watchdog_cancel);
/* Allocate an unique handle from space managed by kernel */
@@ -362,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
while ((parentid = sch->parent)) {
- sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
cl = cops->get(sch, parentid);
@@ -467,12 +500,16 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
+ sch->stats_lock = &dev->ingress_lock;
handle = TC_H_MAKE(TC_H_INGRESS, 0);
- } else if (handle == 0) {
- handle = qdisc_alloc_handle(dev);
- err = -ENOMEM;
- if (handle == 0)
- goto err_out3;
+ } else {
+ sch->stats_lock = &dev->queue_lock;
+ if (handle == 0) {
+ handle = qdisc_alloc_handle(dev);
+ err = -ENOMEM;
+ if (handle == 0)
+ goto err_out3;
+ }
}
sch->handle = handle;
@@ -621,9 +658,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
return err;
if (q) {
qdisc_notify(skb, n, clid, q, NULL);
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
} else {
qdisc_notify(skb, n, clid, NULL, q);
@@ -756,17 +793,17 @@ graft:
err = qdisc_graft(dev, p, clid, q, &old_q);
if (err) {
if (q) {
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
return err;
}
qdisc_notify(skb, n, clid, old_q, q);
if (old_q) {
- spin_lock_bh(&dev->queue_lock);
+ qdisc_lock_tree(dev);
qdisc_destroy(old_q);
- spin_unlock_bh(&dev->queue_lock);
+ qdisc_unlock_tree(dev);
}
}
return 0;
@@ -777,7 +814,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
@@ -811,12 +848,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -857,12 +894,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[0];
s_q_idx = q_idx = cb->args[1];
read_lock(&dev_base_lock);
- for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ idx = 0;
+ for_each_netdev(dev) {
if (idx < s_idx)
- continue;
+ goto cont;
if (idx > s_idx)
s_q_idx = 0;
- read_lock(&qdisc_tree_lock);
q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) {
@@ -870,13 +907,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
- read_unlock(&qdisc_tree_lock);
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
- }
q_idx++;
}
- read_unlock(&qdisc_tree_lock);
+cont:
+ idx++;
}
done:
@@ -1015,7 +1051,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct gnet_dump d;
struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
@@ -1040,12 +1076,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
if (gnet_stats_finish_copy(&d) < 0)
goto rtattr_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1099,7 +1135,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
@@ -1121,7 +1156,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
break;
t++;
}
- read_unlock(&qdisc_tree_lock);
cb->args[0] = t;
@@ -1146,7 +1180,7 @@ reclassify:
for ( ; tp; tp = tp->next) {
if ((tp->protocol == protocol ||
- tp->protocol == __constant_htons(ETH_P_ALL)) &&
+ tp->protocol == htons(ETH_P_ALL)) &&
(err = tp->classify(skb, tp, res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
if ( TC_ACT_RECLASSIFY == err) {
@@ -1175,15 +1209,31 @@ reclassify:
return -1;
}
-static int psched_us_per_tick = 1;
-static int psched_tick_per_us = 1;
+void tcf_destroy(struct tcf_proto *tp)
+{
+ tp->ops->destroy(tp);
+ module_put(tp->ops->owner);
+ kfree(tp);
+}
+
+void tcf_destroy_chain(struct tcf_proto *fl)
+{
+ struct tcf_proto *tp;
+
+ while ((tp = fl) != NULL) {
+ fl = tp->next;
+ tcf_destroy(tp);
+ }
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "%08x %08x %08x %08x\n",
- psched_tick_per_us, psched_us_per_tick,
- 1000000, HZ);
+ (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+ 1000000,
+ (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
return 0;
}
@@ -1202,101 +1252,19 @@ static const struct file_operations psched_fops = {
};
#endif
-#ifdef CONFIG_NET_SCH_CLK_CPU
-psched_tdiff_t psched_clock_per_hz;
-int psched_clock_scale;
-EXPORT_SYMBOL(psched_clock_per_hz);
-EXPORT_SYMBOL(psched_clock_scale);
-
-psched_time_t psched_time_base;
-cycles_t psched_time_mark;
-EXPORT_SYMBOL(psched_time_mark);
-EXPORT_SYMBOL(psched_time_base);
-
-/*
- * Periodically adjust psched_time_base to avoid overflow
- * with 32-bit get_cycles(). Safe up to 4GHz CPU.
- */
-static void psched_tick(unsigned long);
-static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
-
-static void psched_tick(unsigned long dummy)
-{
- if (sizeof(cycles_t) == sizeof(u32)) {
- psched_time_t dummy_stamp;
- PSCHED_GET_TIME(dummy_stamp);
- psched_timer.expires = jiffies + 1*HZ;
- add_timer(&psched_timer);
- }
-}
-
-int __init psched_calibrate_clock(void)
-{
- psched_time_t stamp, stamp1;
- struct timeval tv, tv1;
- psched_tdiff_t delay;
- long rdelay;
- unsigned long stop;
-
- psched_tick(0);
- stop = jiffies + HZ/10;
- PSCHED_GET_TIME(stamp);
- do_gettimeofday(&tv);
- while (time_before(jiffies, stop)) {
- barrier();
- cpu_relax();
- }
- PSCHED_GET_TIME(stamp1);
- do_gettimeofday(&tv1);
-
- delay = PSCHED_TDIFF(stamp1, stamp);
- rdelay = tv1.tv_usec - tv.tv_usec;
- rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
- if (rdelay > delay)
- return -1;
- delay /= rdelay;
- psched_tick_per_us = delay;
- while ((delay>>=1) != 0)
- psched_clock_scale++;
- psched_us_per_tick = 1<<psched_clock_scale;
- psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
- return 0;
-}
-#endif
-
static int __init pktsched_init(void)
{
- struct rtnetlink_link *link_p;
-
-#ifdef CONFIG_NET_SCH_CLK_CPU
- if (psched_calibrate_clock() < 0)
- return -1;
-#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
- psched_tick_per_us = HZ<<PSCHED_JSCALE;
- psched_us_per_tick = 1000000;
-#endif
-
- link_p = rtnetlink_links[PF_UNSPEC];
-
- /* Setup rtnetlink links. It is made here to avoid
- exporting large number of public symbols.
- */
-
- if (link_p) {
- link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
- link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
- link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
- link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
- link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
- }
-
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
proc_net_fops_create("psched", 0, &psched_fops);
+ rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
+ rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
+
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index afb3bbd571f..be7d299acd7 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -14,6 +14,7 @@
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/file.h> /* for fput */
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -157,19 +158,6 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
return atm_tc_get(sch,classid);
}
-
-static void destroy_filters(struct atm_flow_data *flow)
-{
- struct tcf_proto *filter;
-
- while ((filter = flow->filter_list)) {
- DPRINTK("destroy_filters: destroying filter %p\n",filter);
- flow->filter_list = filter->next;
- tcf_destroy(filter);
- }
-}
-
-
/*
* atm_tc_put handles all destructions, including the ones that are explicitly
* requested (atm_tc_destroy, etc.). The assumption here is that we never drop
@@ -194,7 +182,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
*prev = flow->next;
DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
qdisc_destroy(flow->q);
- destroy_filters(flow);
+ tcf_destroy_chain(flow->filter_list);
if (flow->sock) {
DPRINTK("atm_tc_put: f_count %d\n",
file_count(flow->sock->file));
@@ -503,7 +491,7 @@ static void sch_atm_dequeue(unsigned long data)
}
D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
/* remove any LL header somebody else has attached */
- skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data);
+ skb_pull(skb, skb_network_offset(skb));
if (skb_headroom(skb) < flow->hdr_len) {
struct sk_buff *new;
@@ -513,7 +501,7 @@ static void sch_atm_dequeue(unsigned long data)
skb = new;
}
D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
- skb->nh.iph,skb->data);
+ skb_network_header(skb), skb->data);
ATM_SKB(skb)->vcc = flow->vcc;
memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
flow->hdr_len);
@@ -610,7 +598,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
/* races ? */
while ((flow = p->flows)) {
- destroy_filters(flow);
+ tcf_destroy_chain(flow->filter_list);
if (flow->ref > 1)
printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
flow->ref);
@@ -631,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
{
struct atm_qdisc_data *p = PRIV(sch);
struct atm_flow_data *flow = (struct atm_flow_data *) cl;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
@@ -661,11 +649,11 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
}
- rta->rta_len = skb->tail-b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb,b-skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static int
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 76c92e710a3..a294542cb8e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -29,6 +29,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -112,7 +113,7 @@ struct cbq_class
/* Overlimit strategy parameters */
void (*overlimit)(struct cbq_class *cl);
- long penalty;
+ psched_tdiff_t penalty;
/* General scheduler (WRR) parameters */
long allot;
@@ -143,7 +144,7 @@ struct cbq_class
psched_time_t undertime;
long avgidle;
long deficit; /* Saved deficit for WRR */
- unsigned long penalized;
+ psched_time_t penalized;
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
@@ -180,12 +181,12 @@ struct cbq_sched_data
psched_time_t now_rt; /* Cached real time */
unsigned pmask;
- struct timer_list delay_timer;
- struct timer_list wd_timer; /* Watchdog timer,
+ struct hrtimer delay_timer;
+ struct qdisc_watchdog watchdog; /* Watchdog timer,
started when CBQ has
backlog, but cannot
transmit just now */
- long wd_expires;
+ psched_tdiff_t wd_expires;
int toplevel;
u32 hgenerator;
};
@@ -384,12 +385,12 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
psched_time_t now;
psched_tdiff_t incr;
- PSCHED_GET_TIME(now);
- incr = PSCHED_TDIFF(now, q->now_rt);
- PSCHED_TADD2(q->now, incr, now);
+ now = psched_get_time();
+ incr = now - q->now_rt;
+ now = q->now + incr;
do {
- if (PSCHED_TLESS(cl->undertime, now)) {
+ if (cl->undertime < now) {
q->toplevel = cl->level;
return;
}
@@ -473,7 +474,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
static void cbq_ovl_classic(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+ psched_tdiff_t delay = cl->undertime - q->now;
if (!cl->delayed) {
delay += cl->offtime;
@@ -491,7 +492,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
cl->avgidle = cl->minidle;
if (delay <= 0)
delay = 1;
- PSCHED_TADD2(q->now, delay, cl->undertime);
+ cl->undertime = q->now + delay;
cl->xstats.overactions++;
cl->delayed = 1;
@@ -508,7 +509,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
psched_tdiff_t base_delay = q->wd_expires;
for (b = cl->borrow; b; b = b->borrow) {
- delay = PSCHED_TDIFF(b->undertime, q->now);
+ delay = b->undertime - q->now;
if (delay < base_delay) {
if (delay <= 0)
delay = 1;
@@ -546,27 +547,32 @@ static void cbq_ovl_rclassic(struct cbq_class *cl)
static void cbq_ovl_delay(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+ psched_tdiff_t delay = cl->undertime - q->now;
if (!cl->delayed) {
- unsigned long sched = jiffies;
+ psched_time_t sched = q->now;
+ ktime_t expires;
delay += cl->offtime;
if (cl->avgidle < 0)
delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
if (cl->avgidle < cl->minidle)
cl->avgidle = cl->minidle;
- PSCHED_TADD2(q->now, delay, cl->undertime);
+ cl->undertime = q->now + delay;
if (delay > 0) {
- sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
+ sched += delay + cl->penalty;
cl->penalized = sched;
cl->cpriority = TC_CBQ_MAXPRIO;
q->pmask |= (1<<TC_CBQ_MAXPRIO);
- if (del_timer(&q->delay_timer) &&
- (long)(q->delay_timer.expires - sched) > 0)
- q->delay_timer.expires = sched;
- add_timer(&q->delay_timer);
+
+ expires = ktime_set(0, 0);
+ expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
+ if (hrtimer_try_to_cancel(&q->delay_timer) &&
+ ktime_to_ns(ktime_sub(q->delay_timer.expires,
+ expires)) > 0)
+ q->delay_timer.expires = expires;
+ hrtimer_restart(&q->delay_timer);
cl->delayed = 1;
cl->xstats.overactions++;
return;
@@ -583,7 +589,7 @@ static void cbq_ovl_lowprio(struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- cl->penalized = jiffies + cl->penalty;
+ cl->penalized = q->now + cl->penalty;
if (cl->cpriority != cl->priority2) {
cl->cpriority = cl->priority2;
@@ -604,27 +610,19 @@ static void cbq_ovl_drop(struct cbq_class *cl)
cbq_ovl_classic(cl);
}
-static void cbq_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc*)arg;
-
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
-static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
+static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
+ psched_time_t now)
{
struct cbq_class *cl;
struct cbq_class *cl_prev = q->active[prio];
- unsigned long now = jiffies;
- unsigned long sched = now;
+ psched_time_t sched = now;
if (cl_prev == NULL)
- return now;
+ return 0;
do {
cl = cl_prev->next_alive;
- if ((long)(now - cl->penalized) > 0) {
+ if (now - cl->penalized > 0) {
cl_prev->next_alive = cl->next_alive;
cl->next_alive = NULL;
cl->cpriority = cl->priority;
@@ -640,30 +638,34 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
}
cl = cl_prev->next_alive;
- } else if ((long)(sched - cl->penalized) > 0)
+ } else if (sched - cl->penalized > 0)
sched = cl->penalized;
} while ((cl_prev = cl) != q->active[prio]);
- return (long)(sched - now);
+ return sched - now;
}
-static void cbq_undelay(unsigned long arg)
+static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
{
- struct Qdisc *sch = (struct Qdisc*)arg;
- struct cbq_sched_data *q = qdisc_priv(sch);
- long delay = 0;
+ struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
+ delay_timer);
+ struct Qdisc *sch = q->watchdog.qdisc;
+ psched_time_t now;
+ psched_tdiff_t delay = 0;
unsigned pmask;
+ now = psched_get_time();
+
pmask = q->pmask;
q->pmask = 0;
while (pmask) {
int prio = ffz(~pmask);
- long tmp;
+ psched_tdiff_t tmp;
pmask &= ~(1<<prio);
- tmp = cbq_undelay_prio(q, prio);
+ tmp = cbq_undelay_prio(q, prio, now);
if (tmp > 0) {
q->pmask |= 1<<prio;
if (tmp < delay || delay == 0)
@@ -672,12 +674,16 @@ static void cbq_undelay(unsigned long arg)
}
if (delay) {
- q->delay_timer.expires = jiffies + delay;
- add_timer(&q->delay_timer);
+ ktime_t time;
+
+ time = ktime_set(0, 0);
+ time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
+ hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
}
sch->flags &= ~TCQ_F_THROTTLED;
netif_schedule(sch->dev);
+ return HRTIMER_NORESTART;
}
@@ -732,7 +738,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
if (cl && q->toplevel >= borrowed->level) {
if (cl->q->q.qlen > 1) {
do {
- if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
+ if (borrowed->undertime == PSCHED_PASTPERFECT) {
q->toplevel = borrowed->level;
return;
}
@@ -770,7 +776,7 @@ cbq_update(struct cbq_sched_data *q)
idle = (now - last) - last_pktlen/rate
*/
- idle = PSCHED_TDIFF(q->now, cl->last);
+ idle = q->now - cl->last;
if ((unsigned long)idle > 128*1024*1024) {
avgidle = cl->maxidle;
} else {
@@ -814,13 +820,11 @@ cbq_update(struct cbq_sched_data *q)
idle -= L2T(&q->link, len);
idle += L2T(cl, len);
- PSCHED_AUDIT_TDIFF(idle);
-
- PSCHED_TADD2(q->now, idle, cl->undertime);
+ cl->undertime = q->now + idle;
} else {
/* Underlimit */
- PSCHED_SET_PASTPERFECT(cl->undertime);
+ cl->undertime = PSCHED_PASTPERFECT;
if (avgidle > cl->maxidle)
cl->avgidle = cl->maxidle;
else
@@ -841,8 +845,7 @@ cbq_under_limit(struct cbq_class *cl)
if (cl->tparent == NULL)
return cl;
- if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
- !PSCHED_TLESS(q->now, cl->undertime)) {
+ if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
cl->delayed = 0;
return cl;
}
@@ -865,8 +868,7 @@ cbq_under_limit(struct cbq_class *cl)
}
if (cl->level > q->toplevel)
return NULL;
- } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
- PSCHED_TLESS(q->now, cl->undertime));
+ } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
cl->delayed = 0;
return cl;
@@ -1001,8 +1003,8 @@ cbq_dequeue(struct Qdisc *sch)
psched_time_t now;
psched_tdiff_t incr;
- PSCHED_GET_TIME(now);
- incr = PSCHED_TDIFF(now, q->now_rt);
+ now = psched_get_time();
+ incr = now - q->now_rt;
if (q->tx_class) {
psched_tdiff_t incr2;
@@ -1014,12 +1016,12 @@ cbq_dequeue(struct Qdisc *sch)
cbq_time = max(real_time, work);
*/
incr2 = L2T(&q->link, q->tx_len);
- PSCHED_TADD(q->now, incr2);
+ q->now += incr2;
cbq_update(q);
if ((incr -= incr2) < 0)
incr = 0;
}
- PSCHED_TADD(q->now, incr);
+ q->now += incr;
q->now_rt = now;
for (;;) {
@@ -1051,11 +1053,11 @@ cbq_dequeue(struct Qdisc *sch)
*/
if (q->toplevel == TC_CBQ_MAXLEVEL &&
- PSCHED_IS_PASTPERFECT(q->link.undertime))
+ q->link.undertime == PSCHED_PASTPERFECT)
break;
q->toplevel = TC_CBQ_MAXLEVEL;
- PSCHED_SET_PASTPERFECT(q->link.undertime);
+ q->link.undertime = PSCHED_PASTPERFECT;
}
/* No packets in scheduler or nobody wants to give them to us :-(
@@ -1063,13 +1065,9 @@ cbq_dequeue(struct Qdisc *sch)
if (sch->q.qlen) {
sch->qstats.overlimits++;
- if (q->wd_expires) {
- long delay = PSCHED_US2JIFFIE(q->wd_expires);
- if (delay <= 0)
- delay = 1;
- mod_timer(&q->wd_timer, jiffies + delay);
- sch->flags |= TCQ_F_THROTTLED;
- }
+ if (q->wd_expires)
+ qdisc_watchdog_schedule(&q->watchdog,
+ now + q->wd_expires);
}
return NULL;
}
@@ -1276,10 +1274,10 @@ cbq_reset(struct Qdisc* sch)
q->pmask = 0;
q->tx_class = NULL;
q->tx_borrowed = NULL;
- del_timer(&q->wd_timer);
- del_timer(&q->delay_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
+ hrtimer_cancel(&q->delay_timer);
q->toplevel = TC_CBQ_MAXLEVEL;
- PSCHED_GET_TIME(q->now);
+ q->now = psched_get_time();
q->now_rt = q->now;
for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
@@ -1290,7 +1288,7 @@ cbq_reset(struct Qdisc* sch)
qdisc_reset(cl->q);
cl->next_alive = NULL;
- PSCHED_SET_PASTPERFECT(cl->undertime);
+ cl->undertime = PSCHED_PASTPERFECT;
cl->avgidle = cl->maxidle;
cl->deficit = cl->quantum;
cl->cpriority = cl->priority;
@@ -1379,7 +1377,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
default:
return -EINVAL;
}
- cl->penalty = (ovl->penalty*HZ)/1000;
+ cl->penalty = ovl->penalty;
return 0;
}
@@ -1446,14 +1444,11 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
q->link.minidle = -0x7FFFFFFF;
q->link.stats_lock = &sch->dev->queue_lock;
- init_timer(&q->wd_timer);
- q->wd_timer.data = (unsigned long)sch;
- q->wd_timer.function = cbq_watchdog;
- init_timer(&q->delay_timer);
- q->delay_timer.data = (unsigned long)sch;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
q->delay_timer.function = cbq_undelay;
q->toplevel = TC_CBQ_MAXLEVEL;
- PSCHED_GET_TIME(q->now);
+ q->now = psched_get_time();
q->now_rt = q->now;
cbq_link_class(&q->link);
@@ -1467,19 +1462,19 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_lssopt opt;
opt.flags = 0;
@@ -1498,13 +1493,13 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_wrropt opt;
opt.flags = 0;
@@ -1516,30 +1511,30 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_ovl opt;
opt.strategy = cl->ovl_strategy;
opt.priority2 = cl->priority2+1;
opt.pad = 0;
- opt.penalty = (cl->penalty*1000)/HZ;
+ opt.penalty = cl->penalty;
RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_fopt opt;
if (cl->split || cl->defmap) {
@@ -1551,14 +1546,14 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
#ifdef CONFIG_NET_CLS_POLICE
static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_police opt;
if (cl->police) {
@@ -1570,7 +1565,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
#endif
@@ -1592,18 +1587,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
rta = (struct rtattr*)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if (cbq_dump_attr(skb, &q->link) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1621,7 +1616,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct cbq_class *cl = (struct cbq_class*)arg;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
if (cl->tparent)
@@ -1635,11 +1630,11 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if (cbq_dump_attr(skb, cl) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1654,8 +1649,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
cl->xstats.avgidle = cl->avgidle;
cl->xstats.undertime = 0;
- if (!PSCHED_IS_PASTPERFECT(cl->undertime))
- cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
+ if (cl->undertime != PSCHED_PASTPERFECT)
+ cl->xstats.undertime = cl->undertime - q->now;
if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
#ifdef CONFIG_NET_ESTIMATOR
@@ -1722,23 +1717,13 @@ static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
return 0;
}
-static void cbq_destroy_filters(struct cbq_class *cl)
-{
- struct tcf_proto *tp;
-
- while ((tp = cl->filter_list) != NULL) {
- cl->filter_list = tp->next;
- tcf_destroy(tp);
- }
-}
-
static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(sch);
BUG_TRAP(!cl->filters);
- cbq_destroy_filters(cl);
+ tcf_destroy_chain(cl->filter_list);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
#ifdef CONFIG_NET_ESTIMATOR
@@ -1765,7 +1750,7 @@ cbq_destroy(struct Qdisc* sch)
*/
for (h = 0; h < 16; h++)
for (cl = q->classes[h]; cl; cl = cl->next)
- cbq_destroy_filters(cl);
+ tcf_destroy_chain(cl->filter_list);
for (h = 0; h < 16; h++) {
struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 96324cf4e6a..3c6fd181263 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -216,17 +216,17 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
/* FIXME: Safe with non-linear skbs? --RR */
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
+ skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
& ~INET_ECN_MASK;
break;
case __constant_htons(ETH_P_IPV6):
- skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
+ skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
& ~INET_ECN_MASK;
break;
default:
skb->tc_index = 0;
break;
- };
+ }
}
if (TC_H_MAJ(skb->priority) == sch->handle)
@@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
if (p->default_index != NO_DEFAULT_INDEX)
skb->tc_index = p->default_index;
break;
- };
+ }
}
err = p->q->enqueue(skb,p->q);
@@ -292,11 +292,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- ipv4_change_dsfield(skb->nh.iph, p->mask[index],
+ ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
break;
case __constant_htons(ETH_P_IPV6):
- ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
+ ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
p->value[index]);
break;
default:
@@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
"unsupported protocol %d\n",
ntohs(skb->protocol));
break;
- };
+ }
return skb;
}
@@ -412,16 +412,10 @@ static void dsmark_reset(struct Qdisc *sch)
static void dsmark_destroy(struct Qdisc *sch)
{
struct dsmark_qdisc_data *p = PRIV(sch);
- struct tcf_proto *tp;
DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
- while (p->filter_list) {
- tp = p->filter_list;
- p->filter_list = tp->next;
- tcf_destroy(tp);
- }
-
+ tcf_destroy_chain(p->filter_list);
qdisc_destroy(p->q);
kfree(p->mask);
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 52eb3439d7c..3385ee59254 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -36,34 +36,27 @@
/* Main transmission queue. */
-/* Main qdisc structure lock.
-
- However, modifications
- to data, participating in scheduling must be additionally
- protected with dev->queue_lock spinlock.
-
- The idea is the following:
- - enqueue, dequeue are serialized via top level device
- spinlock dev->queue_lock.
- - tree walking is protected by read_lock(qdisc_tree_lock)
- and this lock is used only in process context.
- - updates to tree are made only under rtnl semaphore,
- hence this lock may be made without local bh disabling.
-
- qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+/* Modifications to data participating in scheduling must be protected with
+ * dev->queue_lock spinlock.
+ *
+ * The idea is the following:
+ * - enqueue, dequeue are serialized via top level device
+ * spinlock dev->queue_lock.
+ * - ingress filtering is serialized via top level device
+ * spinlock dev->ingress_lock.
+ * - updates to tree and tree walking are only done under the rtnl mutex.
*/
-DEFINE_RWLOCK(qdisc_tree_lock);
void qdisc_lock_tree(struct net_device *dev)
{
- write_lock(&qdisc_tree_lock);
spin_lock_bh(&dev->queue_lock);
+ spin_lock(&dev->ingress_lock);
}
void qdisc_unlock_tree(struct net_device *dev)
{
+ spin_unlock(&dev->ingress_lock);
spin_unlock_bh(&dev->queue_lock);
- write_unlock(&qdisc_tree_lock);
}
/*
@@ -442,7 +435,6 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
sch->dequeue = ops->dequeue;
sch->dev = dev;
dev_hold(dev);
- sch->stats_lock = &dev->queue_lock;
atomic_set(&sch->refcnt, 1);
return sch;
@@ -458,6 +450,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
sch = qdisc_alloc(dev, ops);
if (IS_ERR(sch))
goto errout;
+ sch->stats_lock = &dev->queue_lock;
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0)
@@ -528,15 +521,11 @@ void dev_activate(struct net_device *dev)
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
- write_lock(&qdisc_tree_lock);
list_add_tail(&qdisc->list, &dev->qdisc_list);
- write_unlock(&qdisc_tree_lock);
} else {
qdisc = &noqueue_qdisc;
}
- write_lock(&qdisc_tree_lock);
dev->qdisc_sleeping = qdisc;
- write_unlock(&qdisc_tree_lock);
}
if (!netif_carrier_ok(dev))
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 407c6fb1ba1..9d124c4ee3a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -59,13 +59,13 @@
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include <linux/timer.h>
#include <linux/list.h>
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <asm/system.h>
@@ -192,23 +192,9 @@ struct hfsc_sched
struct list_head droplist; /* active leaf class list (for
dropping) */
struct sk_buff_head requeue; /* requeued packet */
- struct timer_list wd_timer; /* watchdog timer */
+ struct qdisc_watchdog watchdog; /* watchdog timer */
};
-/*
- * macros
- */
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#include <linux/time.h>
-#undef PSCHED_GET_TIME
-#define PSCHED_GET_TIME(stamp) \
-do { \
- struct timeval tv; \
- do_gettimeofday(&tv); \
- (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \
-} while (0)
-#endif
-
#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
* ism: (psched_us/byte) << ISM_SHIFT
* dx: psched_us
*
- * Clock source resolution (CONFIG_NET_SCH_CLK_*)
- * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
- * CPU: resolution is between 0.5us and 1us.
- * GETTIMEOFDAY: resolution is exactly 1us.
+ * The clock source resolution with ktime is 1.024us.
*
* sm and ism are scaled in order to keep effective digits.
* SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
* digits in decimal using the following table.
*
- * Note: We can afford the additional accuracy (altq hfsc keeps at most
- * 3 effective digits) thanks to the fact that linux clock is bounded
- * much more tightly.
- *
* bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
* ------------+-------------------------------------------------------
- * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3
- * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3
- * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3
+ * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3
*
- * 0.5us/byte 160 16 1.6 0.16 0.016
- * us/byte 80 8 0.8 0.08 0.008
- * 1.27us/byte 63 6.3 0.63 0.063 0.0063
+ * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125
*/
#define SM_SHIFT 20
#define ISM_SHIFT 18
@@ -460,8 +435,8 @@ m2sm(u32 m)
u64 sm;
sm = ((u64)m << SM_SHIFT);
- sm += PSCHED_JIFFIE2US(HZ) - 1;
- do_div(sm, PSCHED_JIFFIE2US(HZ));
+ sm += PSCHED_TICKS_PER_SEC - 1;
+ do_div(sm, PSCHED_TICKS_PER_SEC);
return sm;
}
@@ -474,7 +449,7 @@ m2ism(u32 m)
if (m == 0)
ism = HT_INFINITY;
else {
- ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+ ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT);
ism += m - 1;
do_div(ism, m);
}
@@ -487,7 +462,7 @@ d2dx(u32 d)
{
u64 dx;
- dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+ dx = ((u64)d * PSCHED_TICKS_PER_SEC);
dx += USEC_PER_SEC - 1;
do_div(dx, USEC_PER_SEC);
return dx;
@@ -499,7 +474,7 @@ sm2m(u64 sm)
{
u64 m;
- m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+ m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT;
return (u32)m;
}
@@ -510,7 +485,7 @@ dx2d(u64 dx)
u64 d;
d = dx * USEC_PER_SEC;
- do_div(d, PSCHED_JIFFIE2US(HZ));
+ do_div(d, PSCHED_TICKS_PER_SEC);
return (u32)d;
}
@@ -654,9 +629,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
static void
init_ed(struct hfsc_class *cl, unsigned int next_len)
{
- u64 cur_time;
-
- PSCHED_GET_TIME(cur_time);
+ u64 cur_time = psched_get_time();
/* update the deadline curve */
rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
@@ -779,7 +752,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
if (cl->cl_flags & HFSC_USC) {
/* class has upper limit curve */
if (cur_time == 0)
- PSCHED_GET_TIME(cur_time);
+ cur_time = psched_get_time();
/* update the ulimit curve */
rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
@@ -1063,7 +1036,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
return -EINVAL;
}
- PSCHED_GET_TIME(cur_time);
+ cur_time = psched_get_time();
sch_tree_lock(sch);
if (rsc != NULL)
@@ -1149,22 +1122,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
}
static void
-hfsc_destroy_filters(struct tcf_proto **fl)
-{
- struct tcf_proto *tp;
-
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
- tcf_destroy(tp);
- }
-}
-
-static void
hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
{
struct hfsc_sched *q = qdisc_priv(sch);
- hfsc_destroy_filters(&cl->filter_list);
+ tcf_destroy_chain(cl->filter_list);
qdisc_destroy(cl->qdisc);
#ifdef CONFIG_NET_ESTIMATOR
gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1389,7 +1351,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
struct tcmsg *tcm)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta = (struct rtattr *)b;
tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
@@ -1400,11 +1362,11 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
if (hfsc_dump_curves(skb, cl) < 0)
goto rtattr_failure;
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1459,21 +1421,11 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
static void
-hfsc_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc *)arg;
-
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
-static void
-hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+hfsc_schedule_watchdog(struct Qdisc *sch)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl;
u64 next_time = 0;
- long delay;
if ((cl = eltree_get_minel(q)) != NULL)
next_time = cl->cl_e;
@@ -1482,11 +1434,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
next_time = q->root.cl_cfmin;
}
WARN_ON(next_time == 0);
- delay = next_time - cur_time;
- delay = PSCHED_US2JIFFIE(delay);
-
- sch->flags |= TCQ_F_THROTTLED;
- mod_timer(&q->wd_timer, jiffies + delay);
+ qdisc_watchdog_schedule(&q->watchdog, next_time);
}
static int
@@ -1523,9 +1471,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
- init_timer(&q->wd_timer);
- q->wd_timer.function = hfsc_watchdog;
- q->wd_timer.data = (unsigned long)sch;
+ qdisc_watchdog_init(&q->watchdog, sch);
return 0;
}
@@ -1595,8 +1541,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
__skb_queue_purge(&q->requeue);
q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
- del_timer(&q->wd_timer);
- sch->flags &= ~TCQ_F_THROTTLED;
+ qdisc_watchdog_cancel(&q->watchdog);
sch->q.qlen = 0;
}
@@ -1612,14 +1557,14 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
hfsc_destroy_class(sch, cl);
}
__skb_queue_purge(&q->requeue);
- del_timer(&q->wd_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
}
static int
hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
{
struct hfsc_sched *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_hfsc_qopt qopt;
qopt.defcls = q->defcls;
@@ -1627,7 +1572,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1681,7 +1626,7 @@ hfsc_dequeue(struct Qdisc *sch)
if ((skb = __skb_dequeue(&q->requeue)))
goto out;
- PSCHED_GET_TIME(cur_time);
+ cur_time = psched_get_time();
/*
* if there are eligible classes, use real-time criteria.
@@ -1698,7 +1643,7 @@ hfsc_dequeue(struct Qdisc *sch)
cl = vttree_get_minvt(&q->root, cur_time);
if (cl == NULL) {
sch->qstats.overlimits++;
- hfsc_schedule_watchdog(sch, cur_time);
+ hfsc_schedule_watchdog(sch);
return NULL;
}
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3c3294d0104..99bcec8dd04 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -50,6 +50,7 @@
#include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/compiler.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/rbtree.h>
@@ -128,7 +129,7 @@ struct htb_class {
} un;
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
struct rb_node pq_node; /* node for event queue */
- unsigned long pq_key; /* the same type as jiffies global */
+ psched_time_t pq_key;
int prio_activity; /* for which prios are we active */
enum htb_cmode cmode; /* current mode of the class */
@@ -179,10 +180,7 @@ struct htb_sched {
struct rb_root wait_pq[TC_HTB_MAXDEPTH];
/* time of nearest event per level (row) */
- unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
- /* cached value of jiffies in dequeue */
- unsigned long jiffies;
+ psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
/* whether we hit non-work conserving class during this dequeue; we use */
int nwc_hit; /* this to disable mindelay complaint in dequeue */
@@ -195,7 +193,7 @@ struct htb_sched {
int rate2quantum; /* quant = rate / rate2quantum */
psched_time_t now; /* cached dequeue time */
- struct timer_list timer; /* send delay timer */
+ struct qdisc_watchdog watchdog;
#ifdef HTB_RATECM
struct timer_list rttim; /* rate computer timer */
int recmp_bucket; /* which hash bucket to recompute next */
@@ -342,19 +340,19 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
{
struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
- cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
- if (cl->pq_key == q->jiffies)
+ cl->pq_key = q->now + delay;
+ if (cl->pq_key == q->now)
cl->pq_key++;
/* update the nearest event cache */
- if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
+ if (q->near_ev_cache[cl->level] > cl->pq_key)
q->near_ev_cache[cl->level] = cl->pq_key;
while (*p) {
struct htb_class *c;
parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
- if (time_after_eq(cl->pq_key, c->pq_key))
+ if (cl->pq_key >= c->pq_key)
p = &parent->rb_right;
else
p = &parent->rb_left;
@@ -679,14 +677,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
-static void htb_timer(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc *)arg;
- sch->flags &= ~TCQ_F_THROTTLED;
- wmb();
- netif_schedule(sch->dev);
-}
-
#ifdef HTB_RATECM
#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
static void htb_rate_timer(unsigned long arg)
@@ -739,7 +729,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
cl->T = toks
while (cl) {
- diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+ diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
if (cl->level >= level) {
if (cl->level == level)
cl->xstats.lends++;
@@ -778,11 +768,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
/**
* htb_do_events - make mode changes to classes at the level
*
- * Scans event queue for pending events and applies them. Returns jiffies to
+ * Scans event queue for pending events and applies them. Returns time of
* next pending event (0 for no event in pq).
- * Note: Aplied are events whose have cl->pq_key <= jiffies.
+ * Note: Applied are events whose have cl->pq_key <= q->now.
*/
-static long htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level)
{
int i;
@@ -795,18 +785,18 @@ static long htb_do_events(struct htb_sched *q, int level)
return 0;
cl = rb_entry(p, struct htb_class, pq_node);
- if (time_after(cl->pq_key, q->jiffies)) {
- return cl->pq_key - q->jiffies;
- }
+ if (cl->pq_key > q->now)
+ return cl->pq_key;
+
htb_safe_rb_erase(p, q->wait_pq + level);
- diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+ diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
htb_change_class_mode(q, cl, &diff);
if (cl->cmode != HTB_CAN_SEND)
htb_add_to_wait_tree(q, cl, diff);
}
if (net_ratelimit())
printk(KERN_WARNING "htb: too many events !\n");
- return HZ / 10;
+ return q->now + PSCHED_TICKS_PER_SEC / 10;
}
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -958,30 +948,12 @@ next:
return skb;
}
-static void htb_delay_by(struct Qdisc *sch, long delay)
-{
- struct htb_sched *q = qdisc_priv(sch);
- if (delay <= 0)
- delay = 1;
- if (unlikely(delay > 5 * HZ)) {
- if (net_ratelimit())
- printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
- delay = 5 * HZ;
- }
- /* why don't use jiffies here ? because expires can be in past */
- mod_timer(&q->timer, q->jiffies + delay);
- sch->flags |= TCQ_F_THROTTLED;
- sch->qstats.overlimits++;
-}
-
static struct sk_buff *htb_dequeue(struct Qdisc *sch)
{
struct sk_buff *skb = NULL;
struct htb_sched *q = qdisc_priv(sch);
int level;
- long min_delay;
-
- q->jiffies = jiffies;
+ psched_time_t next_event;
/* try to dequeue direct packets as high prio (!) to minimize cpu work */
skb = __skb_dequeue(&q->direct_queue);
@@ -993,23 +965,25 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
if (!sch->q.qlen)
goto fin;
- PSCHED_GET_TIME(q->now);
+ q->now = psched_get_time();
- min_delay = LONG_MAX;
+ next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
q->nwc_hit = 0;
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
- long delay;
- if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
- delay = htb_do_events(q, level);
- q->near_ev_cache[level] =
- q->jiffies + (delay ? delay : HZ);
+ psched_time_t event;
+
+ if (q->now >= q->near_ev_cache[level]) {
+ event = htb_do_events(q, level);
+ q->near_ev_cache[level] = event ? event :
+ PSCHED_TICKS_PER_SEC;
} else
- delay = q->near_ev_cache[level] - q->jiffies;
+ event = q->near_ev_cache[level];
+
+ if (event && next_event > event)
+ next_event = event;
- if (delay && min_delay > delay)
- min_delay = delay;
m = ~q->row_mask[level];
while (m != (int)(-1)) {
int prio = ffz(m);
@@ -1022,7 +996,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
}
}
}
- htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
+ sch->qstats.overlimits++;
+ qdisc_watchdog_schedule(&q->watchdog, next_event);
fin:
return skb;
}
@@ -1075,8 +1050,7 @@ static void htb_reset(struct Qdisc *sch)
}
}
- sch->flags &= ~TCQ_F_THROTTLED;
- del_timer(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
memset(q->row, 0, sizeof(q->row));
@@ -1113,14 +1087,12 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
for (i = 0; i < TC_HTB_NUMPRIO; i++)
INIT_LIST_HEAD(q->drops + i);
- init_timer(&q->timer);
+ qdisc_watchdog_init(&q->watchdog, sch);
skb_queue_head_init(&q->direct_queue);
q->direct_qlen = sch->dev->tx_queue_len;
if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
q->direct_qlen = 2;
- q->timer.function = htb_timer;
- q->timer.data = (unsigned long)sch;
#ifdef HTB_RATECM
init_timer(&q->rttim);
@@ -1139,7 +1111,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct htb_sched *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_htb_glob gopt;
spin_lock_bh(&sch->dev->queue_lock);
@@ -1152,12 +1124,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
rta = (struct rtattr *)b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
spin_unlock_bh(&sch->dev->queue_lock);
return skb->len;
rtattr_failure:
spin_unlock_bh(&sch->dev->queue_lock);
- skb_trim(skb, skb->tail - skb->data);
+ nlmsg_trim(skb, skb_tail_pointer(skb));
return -1;
}
@@ -1165,7 +1137,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_htb_opt opt;
@@ -1188,12 +1160,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.prio = cl->un.leaf.prio;
opt.level = cl->level;
RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
spin_unlock_bh(&sch->dev->queue_lock);
return skb->len;
rtattr_failure:
spin_unlock_bh(&sch->dev->queue_lock);
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1264,16 +1236,6 @@ static unsigned long htb_get(struct Qdisc *sch, u32 classid)
return (unsigned long)cl;
}
-static void htb_destroy_filters(struct tcf_proto **fl)
-{
- struct tcf_proto *tp;
-
- while ((tp = *fl) != NULL) {
- *fl = tp->next;
- tcf_destroy(tp);
- }
-}
-
static inline int htb_parent_last_child(struct htb_class *cl)
{
if (!cl->parent)
@@ -1302,7 +1264,7 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
parent->un.leaf.prio = parent->prio;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
- PSCHED_GET_TIME(parent->t_c);
+ parent->t_c = psched_get_time();
parent->cmode = HTB_CAN_SEND;
}
@@ -1317,7 +1279,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
qdisc_put_rtab(cl->rate);
qdisc_put_rtab(cl->ceil);
- htb_destroy_filters(&cl->filter_list);
+ tcf_destroy_chain(cl->filter_list);
while (!list_empty(&cl->children))
htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1341,7 +1303,7 @@ static void htb_destroy(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- del_timer_sync(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
#ifdef HTB_RATECM
del_timer_sync(&q->rttim);
#endif
@@ -1349,7 +1311,7 @@ static void htb_destroy(struct Qdisc *sch)
and surprisingly it worked in 2.4. But it must precede it
because filter need its target class alive to be able to call
unbind_filter on it (without Oops). */
- htb_destroy_filters(&q->filter_list);
+ tcf_destroy_chain(q->filter_list);
while (!list_empty(&q->root))
htb_destroy_class(sch, list_entry(q->root.next,
@@ -1498,8 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* set class to be in HTB_CAN_SEND state */
cl->tokens = hopt->buffer;
cl->ctokens = hopt->cbuffer;
- cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */
- PSCHED_GET_TIME(cl->t_c);
+ cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */
+ cl->t_c = psched_get_time();
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index cfe070ee6ee..f8b9f1cdf73 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter.h>
#include <linux/smp.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <asm/byteorder.h>
#include <asm/uaccess.h>
@@ -169,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
skb->tc_index = TC_H_MIN(res.classid);
result = TC_ACT_OK;
break;
- };
+ }
/* backward compat */
#else
#ifdef CONFIG_NET_CLS_POLICE
@@ -186,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
sch->bstats.bytes += skb->len;
result = NF_ACCEPT;
break;
- };
+ }
#else
D2PRINTK("Overriding result to ACCEPT\n");
@@ -247,16 +248,11 @@ ing_hook(unsigned int hook, struct sk_buff **pskb,
skb->dev ? (*pskb)->dev->name : "(no dev)",
skb->len);
-/*
-revisit later: Use a private since lock dev->queue_lock is also
-used on the egress (might slow things for an iota)
-*/
-
if (dev->qdisc_ingress) {
- spin_lock(&dev->queue_lock);
+ spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
fwres = q->enqueue(skb, q);
- spin_unlock(&dev->queue_lock);
+ spin_unlock(&dev->ingress_lock);
}
return fwres;
@@ -345,14 +341,9 @@ static void ingress_reset(struct Qdisc *sch)
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = PRIV(sch);
- struct tcf_proto *tp;
DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
- while (p->filter_list) {
- tp = p->filter_list;
- p->filter_list = tp->next;
- tcf_destroy(tp);
- }
+ tcf_destroy_chain(p->filter_list);
#if 0
/* for future use */
qdisc_destroy(p->q);
@@ -362,16 +353,16 @@ static void ingress_destroy(struct Qdisc *sch)
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
rta = (struct rtattr *) b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1ccbfb55b0b..5d9d8bc9cc3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/netlink.h>
#include <net/pkt_sched.h>
#define VERSION "1.2"
@@ -54,21 +55,22 @@
struct netem_sched_data {
struct Qdisc *qdisc;
- struct timer_list timer;
+ struct qdisc_watchdog watchdog;
+
+ psched_tdiff_t latency;
+ psched_tdiff_t jitter;
- u32 latency;
u32 loss;
u32 limit;
u32 counter;
u32 gap;
- u32 jitter;
u32 duplicate;
u32 reorder;
u32 corrupt;
struct crndstate {
- unsigned long last;
- unsigned long rho;
+ u32 last;
+ u32 rho;
} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
struct disttable {
@@ -95,12 +97,12 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
* Next number depends on last value.
* rho is scaled to avoid floating point.
*/
-static unsigned long get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state)
{
u64 value, rho;
unsigned long answer;
- if (state->rho == 0) /* no correllation */
+ if (state->rho == 0) /* no correlation */
return net_random();
value = net_random();
@@ -114,11 +116,13 @@ static unsigned long get_crandom(struct crndstate *state)
* std deviation sigma. Uses table lookup to approximate the desired
* distribution, and a uniformly-distributed pseudo-random source.
*/
-static long tabledist(unsigned long mu, long sigma,
- struct crndstate *state, const struct disttable *dist)
+static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
+ struct crndstate *state,
+ const struct disttable *dist)
{
- long t, x;
- unsigned long rnd;
+ psched_tdiff_t x;
+ long t;
+ u32 rnd;
if (sigma == 0)
return mu;
@@ -213,8 +217,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
delay = tabledist(q->latency, q->jitter,
&q->delay_cor, q->delay_dist);
- PSCHED_GET_TIME(now);
- PSCHED_TADD2(now, delay, cb->time_to_send);
+ now = psched_get_time();
+ cb->time_to_send = now + delay;
++q->counter;
ret = q->qdisc->enqueue(skb, q->qdisc);
} else {
@@ -222,7 +226,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* Do re-ordering by putting one out of N packets at the front
* of the queue.
*/
- PSCHED_GET_TIME(cb->time_to_send);
+ cb->time_to_send = psched_get_time();
q->counter = 0;
ret = q->qdisc->ops->requeue(skb, q->qdisc);
}
@@ -269,55 +273,43 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
+ smp_mb();
+ if (sch->flags & TCQ_F_THROTTLED)
+ return NULL;
+
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
- psched_time_t now;
+ psched_time_t now = psched_get_time();
/* if more time remaining? */
- PSCHED_GET_TIME(now);
-
- if (PSCHED_TLESS(cb->time_to_send, now)) {
+ if (cb->time_to_send <= now) {
pr_debug("netem_dequeue: return skb=%p\n", skb);
sch->q.qlen--;
- sch->flags &= ~TCQ_F_THROTTLED;
return skb;
- } else {
- psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
-
- if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
- qdisc_tree_decrease_qlen(q->qdisc, 1);
- sch->qstats.drops++;
- printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
- q->qdisc->ops->id);
- }
+ }
- mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
- sch->flags |= TCQ_F_THROTTLED;
+ if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
+ qdisc_tree_decrease_qlen(q->qdisc, 1);
+ sch->qstats.drops++;
+ printk(KERN_ERR "netem: %s could not requeue\n",
+ q->qdisc->ops->id);
}
+
+ qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
}
return NULL;
}
-static void netem_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc *)arg;
-
- pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
static void netem_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
- sch->flags &= ~TCQ_F_THROTTLED;
- del_timer_sync(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
}
/* Pass size change message down to embedded FIFO */
@@ -438,10 +430,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
q->loss = qopt->loss;
q->duplicate = qopt->duplicate;
- /* for compatiablity with earlier versions.
- * if gap is set, need to assume 100% probablity
+ /* for compatibility with earlier versions.
+ * if gap is set, need to assume 100% probability
*/
- q->reorder = ~0;
+ if (q->gap)
+ q->reorder = ~0;
/* Handle nested options after initial queue options.
* Should have put all options in nested format but too late now.
@@ -487,22 +480,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
*/
struct fifo_sched_data {
u32 limit;
+ psched_time_t oldest;
};
static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
struct sk_buff_head *list = &sch->q;
- const struct netem_skb_cb *ncb
- = (const struct netem_skb_cb *)nskb->cb;
+ psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
struct sk_buff *skb;
if (likely(skb_queue_len(list) < q->limit)) {
+ /* Optimize for add at tail */
+ if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
+ q->oldest = tnext;
+ return qdisc_enqueue_tail(nskb, sch);
+ }
+
skb_queue_reverse_walk(list, skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
- if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send))
+ if (tnext >= cb->time_to_send)
break;
}
@@ -515,7 +514,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
- return qdisc_drop(nskb, sch);
+ return qdisc_reshape_fail(nskb, sch);
}
static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -531,6 +530,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
} else
q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
+ q->oldest = PSCHED_PASTPERFECT;
return 0;
}
@@ -567,9 +567,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
if (!opt)
return -EINVAL;
- init_timer(&q->timer);
- q->timer.function = netem_watchdog;
- q->timer.data = (unsigned long) sch;
+ qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1));
@@ -590,7 +588,7 @@ static void netem_destroy(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- del_timer_sync(&q->timer);
+ qdisc_watchdog_cancel(&q->watchdog);
qdisc_destroy(q->qdisc);
kfree(q->delay_dist);
}
@@ -598,7 +596,7 @@ static void netem_destroy(struct Qdisc *sch)
static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
{
const struct netem_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta = (struct rtattr *) b;
struct tc_netem_qopt qopt;
struct tc_netem_corr cor;
@@ -626,12 +624,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
corrupt.correlation = q->corrupt_cor.rho;
RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de889f23f22..269a6e17c6c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -32,6 +32,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -61,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS;
case TC_ACT_SHOT:
return NULL;
- };
+ }
if (!q->filter_list ) {
#else
@@ -188,13 +189,8 @@ prio_destroy(struct Qdisc* sch)
{
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
- struct tcf_proto *tp;
-
- while ((tp = q->filter_list) != NULL) {
- q->filter_list = tp->next;
- tcf_destroy(tp);
- }
+ tcf_destroy_chain(q->filter_list);
for (prio=0; prio<q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -271,7 +267,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt)
static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct prio_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_prio_qopt opt;
opt.bands = q->bands;
@@ -280,7 +276,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66f32051a99..96dfdf78d32 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -30,6 +30,7 @@
#include <linux/notifier.h>
#include <linux/init.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <linux/ipv6.h>
#include <net/route.h>
#include <linux/skbuff.h>
@@ -137,7 +138,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
h = iph->daddr;
h2 = iph->saddr^iph->protocol;
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -152,7 +153,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
}
case __constant_htons(ETH_P_IPV6):
{
- struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
h = iph->daddr.s6_addr32[3];
h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
if (iph->nexthdr == IPPROTO_TCP ||
@@ -461,7 +462,7 @@ static void sfq_destroy(struct Qdisc *sch)
static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct tc_sfq_qopt opt;
opt.quantum = q->quantum;
@@ -476,7 +477,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 85da8daa61d..53862953baa 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -32,6 +32,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
+#include <net/netlink.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
@@ -127,8 +128,8 @@ struct tbf_sched_data
long tokens; /* Current number of B tokens */
long ptokens; /* Current number of P tokens */
psched_time_t t_c; /* Time check-point */
- struct timer_list wd_timer; /* Watchdog timer */
struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
+ struct qdisc_watchdog watchdog; /* Watchdog timer */
};
#define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
@@ -185,14 +186,6 @@ static unsigned int tbf_drop(struct Qdisc* sch)
return len;
}
-static void tbf_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc*)arg;
-
- sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
-}
-
static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
@@ -202,13 +195,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
if (skb) {
psched_time_t now;
- long toks, delay;
+ long toks;
long ptoks = 0;
unsigned int len = skb->len;
- PSCHED_GET_TIME(now);
-
- toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
+ now = psched_get_time();
+ toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
if (q->P_tab) {
ptoks = toks + q->ptokens;
@@ -230,12 +222,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
return skb;
}
- delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks));
-
- if (delay == 0)
- delay = 1;
-
- mod_timer(&q->wd_timer, jiffies+delay);
+ qdisc_watchdog_schedule(&q->watchdog,
+ now + max_t(long, -toks, -ptoks));
/* Maybe we have a shorter packet in the queue,
which can be sent now. It sounds cool,
@@ -254,7 +242,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
sch->qstats.drops++;
}
- sch->flags |= TCQ_F_THROTTLED;
sch->qstats.overlimits++;
}
return NULL;
@@ -266,11 +253,10 @@ static void tbf_reset(struct Qdisc* sch)
qdisc_reset(q->qdisc);
sch->q.qlen = 0;
- PSCHED_GET_TIME(q->t_c);
+ q->t_c = psched_get_time();
q->tokens = q->buffer;
q->ptokens = q->mtu;
- sch->flags &= ~TCQ_F_THROTTLED;
- del_timer(&q->wd_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
}
static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
@@ -377,11 +363,8 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
if (opt == NULL)
return -EINVAL;
- PSCHED_GET_TIME(q->t_c);
- init_timer(&q->wd_timer);
- q->wd_timer.function = tbf_watchdog;
- q->wd_timer.data = (unsigned long)sch;
-
+ q->t_c = psched_get_time();
+ qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc;
return tbf_change(sch, opt);
@@ -391,7 +374,7 @@ static void tbf_destroy(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
- del_timer(&q->wd_timer);
+ qdisc_watchdog_cancel(&q->watchdog);
if (q->P_tab)
qdisc_put_rtab(q->P_tab);
@@ -404,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch)
static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tbf_sched_data *q = qdisc_priv(sch);
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
struct rtattr *rta;
struct tc_tbf_qopt opt;
@@ -420,12 +403,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.mtu = q->mtu;
opt.buffer = q->buffer;
RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
- rta->rta_len = skb->tail - b;
+ rta->rta_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 587123c61af..d24914db786 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -323,7 +323,7 @@ restart:
nores = 1;
break;
}
- __skb_pull(skb, skb->nh.raw - skb->data);
+ __skb_pull(skb, skb_network_offset(skb));
} while ((q = NEXT_SLAVE(q)) != start);
if (nores && skb_res == NULL) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 78d2ddb5ca1..df94e3cdfba 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -143,7 +143,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the maximum mumber of new data packets that can be sent
* in a burst.
*/
- asoc->max_burst = sctp_max_burst;
+ asoc->max_burst = sp->max_burst;
/* initialize association timers */
asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
@@ -714,8 +714,16 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
/* Record the transition on the transport. */
switch (command) {
case SCTP_TRANSPORT_UP:
+ /* If we are moving from UNCONFIRMED state due
+ * to heartbeat success, report the SCTP_ADDR_CONFIRMED
+ * state to the user, otherwise report SCTP_ADDR_AVAILABLE.
+ */
+ if (SCTP_UNCONFIRMED == transport->state &&
+ SCTP_HEARTBEAT_SUCCESS == error)
+ spc_state = SCTP_ADDR_CONFIRMED;
+ else
+ spc_state = SCTP_ADDR_AVAILABLE;
transport->state = SCTP_ACTIVE;
- spc_state = SCTP_ADDR_AVAILABLE;
break;
case SCTP_TRANSPORT_DOWN:
@@ -725,7 +733,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
default:
return;
- };
+ }
/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
* user.
@@ -1095,6 +1103,13 @@ void sctp_assoc_update(struct sctp_association *asoc,
asoc->ssnmap = new->ssnmap;
new->ssnmap = NULL;
}
+
+ if (!asoc->assoc_id) {
+ /* get a new association id since we don't have one
+ * yet.
+ */
+ sctp_assoc_set_id(asoc, GFP_ATOMIC);
+ }
}
}
@@ -1367,3 +1382,25 @@ out:
sctp_read_unlock(&asoc->base.addr_lock);
return found;
}
+
+/* Set an association id for a given association */
+int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
+{
+ int assoc_id;
+ int error = 0;
+retry:
+ if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+ return -ENOMEM;
+
+ spin_lock_bh(&sctp_assocs_id_lock);
+ error = idr_get_new_above(&sctp_assocs_id, (void *)asoc,
+ 1, &assoc_id);
+ spin_unlock_bh(&sctp_assocs_id_lock);
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ return error;
+
+ asoc->assoc_id = (sctp_assoc_t) assoc_id;
+ return error;
+}
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 5f5ab28977c..e8c0f7435d7 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -93,8 +93,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
return "FWD_TSN";
default:
- return "unknown chunk";
- };
+ break;
+ }
+
return "unknown chunk";
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 71db6687369..885109fb3dd 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -79,14 +79,10 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
/* Calculate the SCTP checksum of an SCTP packet. */
static inline int sctp_rcv_checksum(struct sk_buff *skb)
{
- struct sctphdr *sh;
- __u32 cmp, val;
struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- sh = (struct sctphdr *) skb->h.raw;
- cmp = ntohl(sh->checksum);
-
- val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+ struct sctphdr *sh = sctp_hdr(skb);
+ __u32 cmp = ntohl(sh->checksum);
+ __u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
for (; list; list = list->next)
val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
@@ -138,14 +134,13 @@ int sctp_rcv(struct sk_buff *skb)
if (skb_linearize(skb))
goto discard_it;
- sh = (struct sctphdr *) skb->h.raw;
+ sh = sctp_hdr(skb);
/* Pull up the IP and SCTP headers. */
- __skb_pull(skb, skb->h.raw - skb->data);
+ __skb_pull(skb, skb_transport_offset(skb));
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
- if ((skb->ip_summed != CHECKSUM_UNNECESSARY) &&
- (sctp_rcv_checksum(skb) < 0))
+ if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
goto discard_it;
skb_pull(skb, sizeof(struct sctphdr));
@@ -154,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->len < sizeof(struct sctp_chunkhdr))
goto discard_it;
- family = ipver2af(skb->nh.iph->version);
+ family = ipver2af(ip_hdr(skb)->version);
af = sctp_get_af_specific(family);
if (unlikely(!af))
goto discard_it;
@@ -510,30 +505,30 @@ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
void sctp_v4_err(struct sk_buff *skb, __u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
- struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
- int type = skb->h.icmph->type;
- int code = skb->h.icmph->code;
+ const int ihlen = iph->ihl * 4;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
struct sctp_association *asoc = NULL;
struct sctp_transport *transport;
struct inet_sock *inet;
- char *saveip, *savesctp;
+ sk_buff_data_t saveip, savesctp;
int err;
- if (skb->len < ((iph->ihl << 2) + 8)) {
+ if (skb->len < ihlen + 8) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
}
/* Fix up skb to look at the embedded net header. */
- saveip = skb->nh.raw;
- savesctp = skb->h.raw;
- skb->nh.iph = iph;
- skb->h.raw = (char *)sh;
- sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport);
- /* Put back, the original pointers. */
- skb->nh.raw = saveip;
- skb->h.raw = savesctp;
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, ihlen);
+ sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
+ /* Put back, the original values. */
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
if (!sk) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
@@ -616,7 +611,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
break;
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- if (ch_end > skb->tail)
+ if (ch_end > skb_tail_pointer(skb))
break;
/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -648,7 +643,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
}
ch = (sctp_chunkhdr_t *) ch_end;
- } while (ch_end < skb->tail);
+ } while (ch_end < skb_tail_pointer(skb));
return 0;
@@ -905,7 +900,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
struct sctp_association *asoc;
union sctp_addr addr;
union sctp_addr *paddr = &addr;
- struct sctphdr *sh = (struct sctphdr *) skb->h.raw;
+ struct sctphdr *sh = sctp_hdr(skb);
sctp_chunkhdr_t *ch;
union sctp_params params;
sctp_init_chunk_t *init;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index c30629e1778..88aa2240754 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -159,16 +159,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
* the skb->tail.
*/
if (unlikely(skb_is_nonlinear(chunk->skb))) {
- if (chunk->chunk_end > chunk->skb->tail)
- chunk->chunk_end = chunk->skb->tail;
+ if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
}
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */
- if (chunk->chunk_end < chunk->skb->tail) {
+ if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
- } else if (chunk->chunk_end > chunk->skb->tail) {
+ } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
/* RFC 2960, Section 6.10 Bundling
*
* Partial chunks MUST NOT be placed in an SCTP packet.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0b9c49b3a10..84cd53635fe 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,26 +122,24 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int type, int code, int offset, __be32 info)
{
struct inet6_dev *idev;
- struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
- struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
struct sock *sk;
struct sctp_association *asoc;
struct sctp_transport *transport;
struct ipv6_pinfo *np;
- char *saveip, *savesctp;
+ sk_buff_data_t saveip, savesctp;
int err;
idev = in6_dev_get(skb->dev);
/* Fix up skb to look at the embedded net header. */
- saveip = skb->nh.raw;
- savesctp = skb->h.raw;
- skb->nh.ipv6h = iph;
- skb->h.raw = (char *)sh;
- sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport);
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, offset);
+ sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original pointers. */
- skb->nh.raw = saveip;
- skb->h.raw = savesctp;
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
if (!sk) {
ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
goto out;
@@ -391,13 +389,13 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
addr->v6.sin6_flowinfo = 0; /* FIXME */
addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
- sh = (struct sctphdr *) skb->h.raw;
+ sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
- from = &skb->nh.ipv6h->saddr;
+ from = &ipv6_hdr(skb)->saddr;
} else {
*port = sh->dest;
- from = &skb->nh.ipv6h->daddr;
+ from = &ipv6_hdr(skb)->daddr;
}
ipv6_addr_copy(&addr->v6.sin6_addr, from);
}
@@ -606,7 +604,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
default:
retval = SCTP_SCOPE_GLOBAL;
break;
- };
+ }
return retval;
}
@@ -699,7 +697,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
/* Was this packet marked by Explicit Congestion Notification? */
static int sctp_v6_is_ce(const struct sk_buff *skb)
{
- return *((__u32 *)(skb->nh.ipv6h)) & htonl(1<<20);
+ return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
}
/* Dump the v6 addr to the seq file. */
@@ -766,19 +764,19 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
if (msgname) {
sctp_inet6_msgname(msgname, addr_len);
sin6 = (struct sockaddr_in6 *)msgname;
- sh = (struct sctphdr *)skb->h.raw;
+ sh = sctp_hdr(skb);
sin6->sin6_port = sh->source;
/* Map ipv4 address into v4-mapped-on-v6 address. */
if (sctp_sk(skb->sk)->v4mapped &&
- skb->nh.iph->version == 4) {
+ ip_hdr(skb)->version == 4) {
sctp_v4_map_v6((union sctp_addr *)sin6);
- sin6->sin6_addr.s6_addr32[3] = skb->nh.iph->saddr;
+ sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
return;
}
/* Otherwise, just copy the v6 address. */
- ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+ ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
struct sctp_ulpevent *ev = sctp_skb2event(skb);
sin6->sin6_scope_id = ev->iif;
@@ -994,45 +992,52 @@ static struct sctp_pf sctp_pf_inet6_specific = {
.af = &sctp_ipv6_specific,
};
-/* Initialize IPv6 support and register with inet6 stack. */
+/* Initialize IPv6 support and register with socket layer. */
int sctp_v6_init(void)
{
- int rc = proto_register(&sctpv6_prot, 1);
+ int rc;
+
+ /* Register the SCTP specific PF_INET6 functions. */
+ sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
+ /* Register the SCTP specific AF_INET6 functions. */
+ sctp_register_af(&sctp_ipv6_specific);
+
+ rc = proto_register(&sctpv6_prot, 1);
if (rc)
- goto out;
- /* Register inet6 protocol. */
- rc = -EAGAIN;
- if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
- goto out_unregister_sctp_proto;
+ return rc;
/* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */
inet6_register_protosw(&sctpv6_seqpacket_protosw);
inet6_register_protosw(&sctpv6_stream_protosw);
- /* Register the SCTP specific PF_INET6 functions. */
- sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
-
- /* Register the SCTP specific AF_INET6 functions. */
- sctp_register_af(&sctp_ipv6_specific);
+ return 0;
+}
+/* Register with inet6 layer. */
+int sctp_v6_add_protocol(void)
+{
/* Register notifier for inet6 address additions/deletions. */
register_inet6addr_notifier(&sctp_inet6addr_notifier);
- rc = 0;
-out:
- return rc;
-out_unregister_sctp_proto:
- proto_unregister(&sctpv6_prot);
- goto out;
+
+ if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
+ return -EAGAIN;
+
+ return 0;
}
/* IPv6 specific exit support. */
void sctp_v6_exit(void)
{
- list_del(&sctp_ipv6_specific.list);
- inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
inet6_unregister_protosw(&sctpv6_seqpacket_protosw);
inet6_unregister_protosw(&sctpv6_stream_protosw);
- unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
proto_unregister(&sctpv6_prot);
+ list_del(&sctp_ipv6_specific.list);
+}
+
+/* Unregister with inet6 layer. */
+void sctp_v6_del_protocol(void)
+{
+ inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
+ unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f875fc3ced5..d85543def75 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -176,7 +176,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
case SCTP_XMIT_OK:
case SCTP_XMIT_NAGLE_DELAY:
break;
- };
+ }
return retval;
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 41abfd17627..992f361084b 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -338,7 +338,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
q->empty = 0;
break;
- };
+ }
} else {
list_add_tail(&chunk->list, &q->control_chunk_list);
SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -630,7 +630,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
/* Retrieve a new chunk to bundle. */
lchunk = sctp_list_dequeue(lqueue);
break;
- };
+ }
/* If we are here due to a retransmit timeout or a fast
* retransmit and if there are any chunks left in the retransmit
@@ -779,7 +779,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
default:
/* We built a chunk with an illegal type! */
BUG();
- };
+ }
}
/* Is it OK to send data chunks? */
@@ -1397,7 +1397,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
dbg_prt_state = 0;
dbg_ack_tsn = tsn;
- };
+ }
dbg_last_ack_tsn = tsn;
#endif /* SCTP_DEBUG */
@@ -1452,7 +1452,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
dbg_prt_state = 1;
dbg_kept_tsn = tsn;
- };
+ }
dbg_last_kept_tsn = tsn;
#endif /* SCTP_DEBUG */
@@ -1476,7 +1476,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
} else {
SCTP_DEBUG_PRINTK("\n");
}
- };
+ }
#endif /* SCTP_DEBUG */
if (transport) {
if (bytes_acked) {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e17a823ca90..34bab36637a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -170,7 +170,7 @@ static void sctp_get_local_addr_list(void)
struct sctp_af *af;
read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
+ for_each_netdev(dev) {
__list_for_each(pos, &sctp_address_families) {
af = list_entry(pos, struct sctp_af, list);
af->copy_addrlist(&sctp_local_addr_list, dev);
@@ -235,13 +235,13 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
port = &addr->v4.sin_port;
addr->v4.sin_family = AF_INET;
- sh = (struct sctphdr *) skb->h.raw;
+ sh = sctp_hdr(skb);
if (is_saddr) {
*port = sh->source;
- from = &skb->nh.iph->saddr;
+ from = &ip_hdr(skb)->saddr;
} else {
*port = sh->dest;
- from = &skb->nh.iph->daddr;
+ from = &ip_hdr(skb)->daddr;
}
memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
}
@@ -530,7 +530,7 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb)
/* Was this packet marked by Explicit Congestion Notification? */
static int sctp_v4_is_ce(const struct sk_buff *skb)
{
- return INET_ECN_is_ce(skb->nh.iph->tos);
+ return INET_ECN_is_ce(ip_hdr(skb)->tos);
}
/* Create and initialize a new sk for the socket returned by accept(). */
@@ -731,15 +731,13 @@ static void sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname,
/* Initialize and copy out a msgname from an inbound skb. */
static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
{
- struct sctphdr *sh;
- struct sockaddr_in *sin;
-
if (msgname) {
+ struct sctphdr *sh = sctp_hdr(skb);
+ struct sockaddr_in *sin = (struct sockaddr_in *)msgname;
+
sctp_inet_msgname(msgname, len);
- sin = (struct sockaddr_in *)msgname;
- sh = (struct sctphdr *)skb->h.raw;
sin->sin_port = sh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
}
}
@@ -977,28 +975,14 @@ SCTP_STATIC __init int sctp_init(void)
if (!sctp_sanity_check())
goto out;
- status = proto_register(&sctp_prot, 1);
- if (status)
- goto out;
-
- /* Add SCTP to inet_protos hash table. */
- status = -EAGAIN;
- if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0)
- goto err_add_protocol;
-
- /* Add SCTP(TCP and UDP style) to inetsw linked list. */
- inet_register_protosw(&sctp_seqpacket_protosw);
- inet_register_protosw(&sctp_stream_protosw);
-
- /* Allocate a cache pools. */
+ /* Allocate bind_bucket and chunk caches. */
status = -ENOBUFS;
sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
sizeof(struct sctp_bind_bucket),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
-
if (!sctp_bucket_cachep)
- goto err_bucket_cachep;
+ goto out;
sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
sizeof(struct sctp_chunk),
@@ -1044,7 +1028,7 @@ SCTP_STATIC __init int sctp_init(void)
sctp_cookie_preserve_enable = 1;
/* Max.Burst - 4 */
- sctp_max_burst = SCTP_MAX_BURST;
+ sctp_max_burst = SCTP_DEFAULT_MAX_BURST;
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
@@ -1155,6 +1139,14 @@ SCTP_STATIC __init int sctp_init(void)
INIT_LIST_HEAD(&sctp_address_families);
sctp_register_af(&sctp_ipv4_specific);
+ status = proto_register(&sctp_prot, 1);
+ if (status)
+ goto err_proto_register;
+
+ /* Register SCTP(UDP and TCP style) with socket layer. */
+ inet_register_protosw(&sctp_seqpacket_protosw);
+ inet_register_protosw(&sctp_stream_protosw);
+
status = sctp_v6_init();
if (status)
goto err_v6_init;
@@ -1168,19 +1160,39 @@ SCTP_STATIC __init int sctp_init(void)
/* Initialize the local address list. */
INIT_LIST_HEAD(&sctp_local_addr_list);
-
sctp_get_local_addr_list();
/* Register notifier for inet address additions/deletions. */
register_inetaddr_notifier(&sctp_inetaddr_notifier);
+ /* Register SCTP with inet layer. */
+ if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) {
+ status = -EAGAIN;
+ goto err_add_protocol;
+ }
+
+ /* Register SCTP with inet6 layer. */
+ status = sctp_v6_add_protocol();
+ if (status)
+ goto err_v6_add_protocol;
+
__unsafe(THIS_MODULE);
status = 0;
out:
return status;
+err_v6_add_protocol:
+ inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+ unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+err_add_protocol:
+ sctp_free_local_addr_list();
+ sock_release(sctp_ctl_socket);
err_ctl_sock_init:
sctp_v6_exit();
err_v6_init:
+ inet_unregister_protosw(&sctp_stream_protosw);
+ inet_unregister_protosw(&sctp_seqpacket_protosw);
+ proto_unregister(&sctp_prot);
+err_proto_register:
sctp_sysctl_unregister();
list_del(&sctp_ipv4_specific.list);
free_pages((unsigned long)sctp_port_hashtable,
@@ -1194,19 +1206,13 @@ err_ehash_alloc:
sizeof(struct sctp_hashbucket)));
err_ahash_alloc:
sctp_dbg_objcnt_exit();
-err_init_proc:
sctp_proc_exit();
+err_init_proc:
cleanup_sctp_mibs();
err_init_mibs:
kmem_cache_destroy(sctp_chunk_cachep);
err_chunk_cachep:
kmem_cache_destroy(sctp_bucket_cachep);
-err_bucket_cachep:
- inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
- inet_unregister_protosw(&sctp_seqpacket_protosw);
- inet_unregister_protosw(&sctp_stream_protosw);
-err_add_protocol:
- proto_unregister(&sctp_prot);
goto out;
}
@@ -1217,8 +1223,9 @@ SCTP_STATIC __exit void sctp_exit(void)
* up all the remaining associations and all that memory.
*/
- /* Unregister notifier for inet address additions/deletions. */
- unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+ /* Unregister with inet6/inet layers. */
+ sctp_v6_del_protocol();
+ inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
/* Free the local address list. */
sctp_free_local_addr_list();
@@ -1226,7 +1233,16 @@ SCTP_STATIC __exit void sctp_exit(void)
/* Free the control endpoint. */
sock_release(sctp_ctl_socket);
+ /* Cleanup v6 initializations. */
sctp_v6_exit();
+
+ /* Unregister with socket layer. */
+ inet_unregister_protosw(&sctp_stream_protosw);
+ inet_unregister_protosw(&sctp_seqpacket_protosw);
+
+ /* Unregister notifier for inet address additions/deletions. */
+ unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
sctp_sysctl_unregister();
list_del(&sctp_ipv4_specific.list);
@@ -1238,16 +1254,13 @@ SCTP_STATIC __exit void sctp_exit(void)
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
- kmem_cache_destroy(sctp_chunk_cachep);
- kmem_cache_destroy(sctp_bucket_cachep);
-
sctp_dbg_objcnt_exit();
sctp_proc_exit();
cleanup_sctp_mibs();
- inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
- inet_unregister_protosw(&sctp_seqpacket_protosw);
- inet_unregister_protosw(&sctp_stream_protosw);
+ kmem_cache_destroy(sctp_chunk_cachep);
+ kmem_cache_destroy(sctp_bucket_cachep);
+
proto_unregister(&sctp_prot);
}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f7fb29d5a0c..8d18f570c2e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -86,7 +86,7 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
struct sctp_af *af;
int iif = 0;
- af = sctp_get_af_specific(ipver2af(chunk->skb->nh.iph->version));
+ af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
if (af)
iif = af->skb_iif(chunk->skb);
@@ -1143,7 +1143,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
/* Adjust the chunk length field. */
chunk->chunk_hdr->length = htons(chunklen + padlen + len);
- chunk->chunk_end = chunk->skb->tail;
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
return target;
}
@@ -1168,7 +1168,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
/* Adjust the chunk length field. */
chunk->chunk_hdr->length =
htons(ntohs(chunk->chunk_hdr->length) + len);
- chunk->chunk_end = chunk->skb->tail;
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
out:
return err;
@@ -1233,7 +1233,7 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
asoc->temp = 1;
skb = chunk->skb;
/* Create an entry for the source address of the packet. */
- af = sctp_get_af_specific(ipver2af(skb->nh.iph->version));
+ af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
if (unlikely(!af))
goto fail;
af->from_skb(&asoc->c.peer_addr, skb, 1);
@@ -1939,7 +1939,6 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
* association.
*/
if (!asoc->temp) {
- int assoc_id;
int error;
asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams,
@@ -1947,19 +1946,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
if (!asoc->ssnmap)
goto clean_up;
- retry:
- if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+ error = sctp_assoc_set_id(asoc, gfp);
+ if (error)
goto clean_up;
- spin_lock_bh(&sctp_assocs_id_lock);
- error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, 1,
- &assoc_id);
- spin_unlock_bh(&sctp_assocs_id_lock);
- if (error == -EAGAIN)
- goto retry;
- else if (error)
- goto clean_up;
-
- asoc->assoc_id = (sctp_assoc_t) assoc_id;
}
/* ADDIP Section 4.1 ASCONF Chunk Procedures
@@ -2077,7 +2066,7 @@ static int sctp_process_param(struct sctp_association *asoc,
default: /* Just ignore anything else. */
break;
- };
+ }
}
break;
@@ -2118,7 +2107,7 @@ static int sctp_process_param(struct sctp_association *asoc,
SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
ntohs(param.p->type), asoc);
break;
- };
+ }
return retval;
}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 13556749311..d9fad4f6ffc 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -464,7 +464,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
struct sctp_ulpevent *event;
event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC,
- (__u16)error, 0, 0,
+ (__u16)error, 0, 0, NULL,
GFP_ATOMIC);
if (event)
@@ -492,8 +492,13 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
/* Cancel any partial delivery in progress. */
sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
- event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
- (__u16)error, 0, 0,
+ if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
+ event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+ (__u16)error, 0, 0, chunk,
+ GFP_ATOMIC);
+ else
+ event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+ (__u16)error, 0, 0, NULL,
GFP_ATOMIC);
if (event)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
@@ -857,6 +862,33 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
sk->sk_err = error;
}
+/* Helper function to generate an association change event */
+static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
+ struct sctp_association *asoc,
+ u8 state)
+{
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_assoc_change(asoc, 0, state, 0,
+ asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams,
+ NULL, GFP_ATOMIC);
+ if (ev)
+ sctp_ulpq_tail_event(&asoc->ulpq, ev);
+}
+
+/* Helper function to generate an adaptation indication event */
+static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+ struct sctp_association *asoc)
+{
+ struct sctp_ulpevent *ev;
+
+ ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
+
+ if (ev)
+ sctp_ulpq_tail_event(&asoc->ulpq, ev);
+}
+
/* These three macros allow us to pull the debugging code out of the
* main flow of sctp_do_sm() to keep attention focused on the real
* functionality there.
@@ -1004,7 +1036,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
status, state, event_type, subtype.chunk);
BUG();
break;
- };
+ }
bail:
return error;
@@ -1480,11 +1512,20 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_SET_SK_ERR:
sctp_cmd_set_sk_err(asoc, cmd->obj.error);
break;
+ case SCTP_CMD_ASSOC_CHANGE:
+ sctp_cmd_assoc_change(commands, asoc,
+ cmd->obj.u8);
+ break;
+ case SCTP_CMD_ADAPTATION_IND:
+ sctp_cmd_adaptation_ind(commands, asoc);
+ break;
+
default:
printk(KERN_WARNING "Impossible command: %u, %p\n",
cmd->verb, cmd->obj.ptr);
break;
- };
+ }
+
if (error)
break;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e9097cf614b..f02ce3dddb7 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -186,7 +186,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
* notification is passed to the upper layer.
*/
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
- 0, 0, 0, GFP_ATOMIC);
+ 0, 0, 0, NULL, GFP_ATOMIC);
if (ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ev));
@@ -629,7 +629,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
case -SCTP_IERROR_BAD_SIG:
default:
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
- };
+ }
}
@@ -661,7 +661,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0,
new_asoc->c.sinit_num_ostreams,
new_asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
@@ -790,7 +790,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP,
0, asoc->c.sinit_num_ostreams,
asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem;
@@ -1195,7 +1195,7 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
new_asoc->c.my_ttag = asoc->c.my_vtag;
new_asoc->c.peer_ttag = asoc->c.peer_vtag;
break;
- };
+ }
/* Other parameters for the endpoint SHOULD be copied from the
* existing parameters of the association (e.g. number of
@@ -1625,7 +1625,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
new_asoc->c.sinit_num_ostreams,
new_asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
@@ -1656,7 +1656,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
struct sctp_association *new_asoc)
{
sctp_init_chunk_t *peer_init;
- struct sctp_ulpevent *ev;
struct sctp_chunk *repl;
/* new_asoc is a brand-new association, so these are not yet
@@ -1687,34 +1686,28 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
* D) IMPLEMENTATION NOTE: An implementation may choose to
* send the Communication Up notification to the SCTP user
* upon reception of a valid COOKIE ECHO chunk.
+ *
+ * Sadly, this needs to be implemented as a side-effect, because
+ * we are not guaranteed to have set the association id of the real
+ * association and so these notifications need to be delayed until
+ * the association id is allocated.
*/
- ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
- new_asoc->c.sinit_num_ostreams,
- new_asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
- if (!ev)
- goto nomem_ev;
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_CHANGE, SCTP_U8(SCTP_COMM_UP));
/* Sockets API Draft Section 5.3.1.6
* When a peer sends a Adaptation Layer Indication parameter , SCTP
* delivers this notification to inform the application that of the
* peers requested adaptation layer.
+ *
+ * This also needs to be done as a side effect for the same reason as
+ * above.
*/
- if (asoc->peer.adaptation_ind) {
- ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
- if (!ev)
- goto nomem_ev;
-
- sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
- SCTP_ULPEVENT(ev));
- }
+ if (asoc->peer.adaptation_ind)
+ sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
return SCTP_DISPOSITION_CONSUME;
-nomem_ev:
- sctp_chunk_free(repl);
nomem:
return SCTP_DISPOSITION_NOMEM;
}
@@ -1786,7 +1779,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
SCTP_COMM_UP, 0,
asoc->c.sinit_num_ostreams,
asoc->c.sinit_max_instreams,
- GFP_ATOMIC);
+ NULL, GFP_ATOMIC);
if (!ev)
goto nomem;
@@ -1904,7 +1897,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
case -SCTP_IERROR_BAD_SIG:
default:
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
- };
+ }
}
/* Compare the tie_tag in cookie with the verification tag of
@@ -1936,7 +1929,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
default: /* Discard packet for all others. */
retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
break;
- };
+ }
/* Delete the tempory new association. */
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -3035,7 +3028,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
* notification is passed to the upper layer.
*/
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
- 0, 0, 0, GFP_ATOMIC);
+ 0, 0, 0, NULL, GFP_ATOMIC);
if (!ev)
goto nomem;
@@ -3115,7 +3108,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
break;
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
- if (ch_end > skb->tail)
+ if (ch_end > skb_tail_pointer(skb))
break;
if (SCTP_CID_SHUTDOWN_ACK == ch->type)
@@ -3130,7 +3123,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
ch = (sctp_chunkhdr_t *) ch_end;
- } while (ch_end < skb->tail);
+ } while (ch_end < skb_tail_pointer(skb));
if (ootb_shut_ack)
sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
@@ -4816,7 +4809,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
default:
BUG();
break;
- };
+ }
if (!reply)
goto nomem;
@@ -5286,7 +5279,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
chunk->ecn_ce_done = 1;
af = sctp_get_af_specific(
- ipver2af(chunk->skb->nh.iph->version));
+ ipver2af(ip_hdr(chunk->skb)->version));
if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
/* Do real work as sideffect. */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 5e54b17377f..523071c7902 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -101,7 +101,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
default:
/* Yikes! We got an illegal event type. */
return &bug;
- };
+ }
}
#define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a1d026f12b0..9f1a908776d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -941,7 +941,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
default:
err = -EINVAL;
break;
- };
+ }
out:
kfree(kaddrs);
@@ -972,6 +972,7 @@ static int __sctp_connect(struct sock* sk,
int walk_size = 0;
union sctp_addr *sa_addr;
void *addr_buf;
+ unsigned short port;
sp = sctp_sk(sk);
ep = sp->ep;
@@ -992,6 +993,7 @@ static int __sctp_connect(struct sock* sk,
while (walk_size < addrs_size) {
sa_addr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(sa_addr->sa.sa_family);
+ port = ntohs(sa_addr->v4.sin_port);
/* If the address family is not supported or if this address
* causes the address buffer to overflow return EINVAL.
@@ -1005,6 +1007,12 @@ static int __sctp_connect(struct sock* sk,
if (err)
goto out_free;
+ /* Make sure the destination port is correctly set
+ * in all addresses.
+ */
+ if (asoc && asoc->peer.port && asoc->peer.port != port)
+ goto out_free;
+
memcpy(&to, sa_addr, af->sockaddr_len);
/* Check if there already is a matching association on the
@@ -2039,6 +2047,10 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
* SPP_HB_DEMAND - Request a user initiated heartbeat
* to be made immediately.
*
+ * SPP_HB_TIME_IS_ZERO - Specify's that the time for
+ * heartbeat delayis to be set to the value of 0
+ * milliseconds.
+ *
* SPP_PMTUD_ENABLE - This field will enable PMTU
* discovery upon the specified address. Note that
* if the address feild is empty then all addresses
@@ -2081,13 +2093,30 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
return error;
}
- if (params->spp_hbinterval) {
- if (trans) {
- trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
- } else if (asoc) {
- asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
- } else {
- sp->hbinterval = params->spp_hbinterval;
+ /* Note that unless the spp_flag is set to SPP_HB_ENABLE the value of
+ * this field is ignored. Note also that a value of zero indicates
+ * the current setting should be left unchanged.
+ */
+ if (params->spp_flags & SPP_HB_ENABLE) {
+
+ /* Re-zero the interval if the SPP_HB_TIME_IS_ZERO is
+ * set. This lets us use 0 value when this flag
+ * is set.
+ */
+ if (params->spp_flags & SPP_HB_TIME_IS_ZERO)
+ params->spp_hbinterval = 0;
+
+ if (params->spp_hbinterval ||
+ (params->spp_flags & SPP_HB_TIME_IS_ZERO)) {
+ if (trans) {
+ trans->hbinterval =
+ msecs_to_jiffies(params->spp_hbinterval);
+ } else if (asoc) {
+ asoc->hbinterval =
+ msecs_to_jiffies(params->spp_hbinterval);
+ } else {
+ sp->hbinterval = params->spp_hbinterval;
+ }
}
}
@@ -2104,7 +2133,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
- if (params->spp_pathmtu) {
+ /* When Path MTU discovery is disabled the value specified here will
+ * be the "fixed" path mtu (i.e. the value of the spp_flags field must
+ * include the flag SPP_PMTUD_DISABLE for this field to have any
+ * effect).
+ */
+ if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
if (trans) {
trans->pathmtu = params->spp_pathmtu;
sctp_assoc_sync_pmtu(asoc);
@@ -2135,7 +2169,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
- if (params->spp_sackdelay) {
+ /* Note that unless the spp_flag is set to SPP_SACKDELAY_ENABLE the
+ * value of this field is ignored. Note also that a value of zero
+ * indicates the current setting should be left unchanged.
+ */
+ if ((params->spp_flags & SPP_SACKDELAY_ENABLE) && params->spp_sackdelay) {
if (trans) {
trans->sackdelay =
msecs_to_jiffies(params->spp_sackdelay);
@@ -2163,7 +2201,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
}
}
- if (params->spp_pathmaxrxt) {
+ /* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value
+ * of this field is ignored. Note also that a value of zero
+ * indicates the current setting should be left unchanged.
+ */
+ if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) {
if (trans) {
trans->pathmaxrxt = params->spp_pathmaxrxt;
} else if (asoc) {
@@ -2255,7 +2297,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
return 0;
}
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
*
* This options will get or set the delayed ack timer. The time is set
* in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -2792,6 +2834,102 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
return 0;
}
+/*
+ * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ *
+ * This options will at a minimum specify if the implementation is doing
+ * fragmented interleave. Fragmented interleave, for a one to many
+ * socket, is when subsequent calls to receive a message may return
+ * parts of messages from different associations. Some implementations
+ * may allow you to turn this value on or off. If so, when turned off,
+ * no fragment interleave will occur (which will cause a head of line
+ * blocking amongst multiple associations sharing the same one to many
+ * socket). When this option is turned on, then each receive call may
+ * come from a different association (thus the user must receive data
+ * with the extended calls (e.g. sctp_recvmsg) to keep track of which
+ * association each receive belongs to.
+ *
+ * This option takes a boolean value. A non-zero value indicates that
+ * fragmented interleave is on. A value of zero indicates that
+ * fragmented interleave is off.
+ *
+ * Note that it is important that an implementation that allows this
+ * option to be turned on, have it off by default. Otherwise an unaware
+ * application using the one to many model may become confused and act
+ * incorrectly.
+ */
+static int sctp_setsockopt_fragment_interleave(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ int val;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+
+ return 0;
+}
+
+/*
+ * 7.1.25. Set or Get the sctp partial delivery point
+ * (SCTP_PARTIAL_DELIVERY_POINT)
+ * This option will set or get the SCTP partial delivery point. This
+ * point is the size of a message where the partial delivery API will be
+ * invoked to help free up rwnd space for the peer. Setting this to a
+ * lower value will cause partial delivery's to happen more often. The
+ * calls argument is an integer that sets or gets the partial delivery
+ * point.
+ */
+static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ u32 val;
+
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->pd_point = val;
+
+ return 0; /* is this the right error code? */
+}
+
+/*
+ * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST)
+ *
+ * This option will allow a user to change the maximum burst of packets
+ * that can be emitted by this association. Note that the default value
+ * is 4, and some implementations may restrict this setting so that it
+ * can only be lowered.
+ *
+ * NOTE: This text doesn't seem right. Do this on a socket basis with
+ * future associations inheriting the socket value.
+ */
+static int sctp_setsockopt_maxburst(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ int val;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ if (val < 0)
+ return -EINVAL;
+
+ sctp_sk(sk)->max_burst = val;
+
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2871,6 +3009,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_DELAYED_ACK_TIME:
retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
break;
+ case SCTP_PARTIAL_DELIVERY_POINT:
+ retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
+ break;
case SCTP_INITMSG:
retval = sctp_setsockopt_initmsg(sk, optval, optlen);
@@ -2906,11 +3047,16 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_CONTEXT:
retval = sctp_setsockopt_context(sk, optval, optlen);
break;
-
+ case SCTP_FRAGMENT_INTERLEAVE:
+ retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
+ break;
+ case SCTP_MAX_BURST:
+ retval = sctp_setsockopt_maxburst(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
- };
+ }
sctp_release_sock(sk);
@@ -3066,6 +3212,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->default_timetolive = 0;
sp->default_rcv_context = 0;
+ sp->max_burst = sctp_max_burst;
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
@@ -3134,8 +3281,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->pf = sctp_get_pf_specific(sk->sk_family);
/* Control variables for partial data delivery. */
- sp->pd_mode = 0;
+ atomic_set(&sp->pd_mode, 0);
skb_queue_head_init(&sp->pd_lobby);
+ sp->frag_interleave = 0;
/* Create a per socket endpoint structure. Even if we
* change the data structure relationships, this may still
@@ -3642,7 +3790,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
return 0;
}
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
*
* This options will get or set the delayed ack timer. The time is set
* in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -3847,7 +3995,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
memcpy(&temp, &from->ipaddr, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
- if(space_left < addrlen)
+ if (space_left < addrlen)
return -ENOMEM;
if (copy_to_user(to, &temp, addrlen))
return -EFAULT;
@@ -3936,8 +4084,9 @@ done:
/* Helper function that copies local addresses to user and returns the number
* of addresses copied.
*/
-static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs,
- void __user *to)
+static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
+ int max_addrs, void *to,
+ int *bytes_copied)
{
struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
@@ -3954,10 +4103,10 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if (copy_to_user(to, &temp, addrlen))
- return -EFAULT;
+ memcpy(to, &temp, addrlen);
to += addrlen;
+ *bytes_copied += addrlen;
cnt ++;
if (cnt >= max_addrs) break;
}
@@ -3965,8 +4114,8 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
return cnt;
}
-static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
- void __user **to, size_t space_left)
+static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
+ size_t space_left, int *bytes_copied)
{
struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
@@ -3983,14 +4132,14 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if(space_left<addrlen)
+ if (space_left < addrlen)
return -ENOMEM;
- if (copy_to_user(*to, &temp, addrlen))
- return -EFAULT;
+ memcpy(to, &temp, addrlen);
- *to += addrlen;
+ to += addrlen;
cnt ++;
space_left -= addrlen;
+ bytes_copied += addrlen;
}
return cnt;
@@ -4014,6 +4163,8 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
int addrlen;
rwlock_t *addr_lock;
int err = 0;
+ void *addrs;
+ int bytes_copied = 0;
if (len != sizeof(struct sctp_getaddrs_old))
return -EINVAL;
@@ -4041,6 +4192,15 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
to = getaddrs.addrs;
+ /* Allocate space for a local instance of packed array to hold all
+ * the data. We store addresses here first and then put write them
+ * to the user in one shot.
+ */
+ addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num,
+ GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+
sctp_read_lock(addr_lock);
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
@@ -4050,13 +4210,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- cnt = sctp_copy_laddrs_to_user_old(sk, bp->port,
- getaddrs.addr_num,
- to);
- if (cnt < 0) {
- err = cnt;
- goto unlock;
- }
+ cnt = sctp_copy_laddrs_old(sk, bp->port,
+ getaddrs.addr_num,
+ addrs, &bytes_copied);
goto copy_getaddrs;
}
}
@@ -4066,22 +4222,29 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if (copy_to_user(to, &temp, addrlen)) {
- err = -EFAULT;
- goto unlock;
- }
+ memcpy(addrs, &temp, addrlen);
to += addrlen;
+ bytes_copied += addrlen;
cnt ++;
if (cnt >= getaddrs.addr_num) break;
}
copy_getaddrs:
+ sctp_read_unlock(addr_lock);
+
+ /* copy the entire address list into the user provided space */
+ if (copy_to_user(to, addrs, bytes_copied)) {
+ err = -EFAULT;
+ goto error;
+ }
+
+ /* copy the leading structure back to user */
getaddrs.addr_num = cnt;
if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
err = -EFAULT;
-unlock:
- sctp_read_unlock(addr_lock);
+error:
+ kfree(addrs);
return err;
}
@@ -4101,7 +4264,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
rwlock_t *addr_lock;
int err = 0;
size_t space_left;
- int bytes_copied;
+ int bytes_copied = 0;
+ void *addrs;
if (len <= sizeof(struct sctp_getaddrs))
return -EINVAL;
@@ -4129,6 +4293,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
to = optval + offsetof(struct sctp_getaddrs,addrs);
space_left = len - sizeof(struct sctp_getaddrs) -
offsetof(struct sctp_getaddrs,addrs);
+ addrs = kmalloc(space_left, GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
sctp_read_lock(addr_lock);
@@ -4139,11 +4306,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- cnt = sctp_copy_laddrs_to_user(sk, bp->port,
- &to, space_left);
+ cnt = sctp_copy_laddrs(sk, bp->port, addrs,
+ space_left, &bytes_copied);
if (cnt < 0) {
err = cnt;
- goto unlock;
+ goto error;
}
goto copy_getaddrs;
}
@@ -4154,26 +4321,31 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if(space_left < addrlen)
- return -ENOMEM; /*fixme: right error?*/
- if (copy_to_user(to, &temp, addrlen)) {
- err = -EFAULT;
- goto unlock;
+ if (space_left < addrlen) {
+ err = -ENOMEM; /*fixme: right error?*/
+ goto error;
}
+ memcpy(addrs, &temp, addrlen);
to += addrlen;
+ bytes_copied += addrlen;
cnt ++;
space_left -= addrlen;
}
copy_getaddrs:
+ sctp_read_unlock(addr_lock);
+
+ if (copy_to_user(to, addrs, bytes_copied)) {
+ err = -EFAULT;
+ goto error;
+ }
if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
return -EFAULT;
- bytes_copied = ((char __user *)to) - optval;
if (put_user(bytes_copied, optlen))
return -EFAULT;
-unlock:
- sctp_read_unlock(addr_lock);
+error:
+ kfree(addrs);
return err;
}
@@ -4536,6 +4708,77 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
return 0;
}
+/*
+ * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
+ */
+static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
+ char __user *optval, int __user *optlen)
+{
+ int val;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+
+ val = sctp_sk(sk)->frag_interleave;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * 7.1.25. Set or Get the sctp partial delivery point
+ * (chapter and verse is quoted at sctp_setsockopt_partial_delivery_point())
+ */
+static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ u32 val;
+
+ if (len < sizeof(u32))
+ return -EINVAL;
+
+ len = sizeof(u32);
+
+ val = sctp_sk(sk)->pd_point;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return -ENOTSUPP;
+}
+
+/*
+ * 7.1.28. Set or Get the maximum burst (SCTP_MAX_BURST)
+ * (chapter and verse is quoted at sctp_setsockopt_maxburst())
+ */
+static int sctp_getsockopt_maxburst(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ int val;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+
+ val = sctp_sk(sk)->max_burst;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return -ENOTSUPP;
+}
+
SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -4648,10 +4891,21 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_CONTEXT:
retval = sctp_getsockopt_context(sk, len, optval, optlen);
break;
+ case SCTP_FRAGMENT_INTERLEAVE:
+ retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_PARTIAL_DELIVERY_POINT:
+ retval = sctp_getsockopt_partial_delivery_point(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_MAX_BURST:
+ retval = sctp_getsockopt_maxburst(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
- };
+ }
sctp_release_sock(sk);
return retval;
@@ -4766,7 +5020,8 @@ pp_found:
struct hlist_node *node;
SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
- if (pp->fastreuse && sk->sk_reuse)
+ if (pp->fastreuse && sk->sk_reuse &&
+ sk->sk_state != SCTP_SS_LISTENING)
goto success;
/* Run through the list of sockets bound to the port
@@ -4783,7 +5038,8 @@ pp_found:
struct sctp_endpoint *ep2;
ep2 = sctp_sk(sk2)->ep;
- if (reuse && sk2->sk_reuse)
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != SCTP_SS_LISTENING)
continue;
if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
@@ -4804,9 +5060,13 @@ pp_not_found:
* if sk->sk_reuse is too (that is, if the caller requested
* SO_REUSEADDR on this socket -sk-).
*/
- if (hlist_empty(&pp->owner))
- pp->fastreuse = sk->sk_reuse ? 1 : 0;
- else if (pp->fastreuse && !sk->sk_reuse)
+ if (hlist_empty(&pp->owner)) {
+ if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
+ pp->fastreuse = 1;
+ else
+ pp->fastreuse = 0;
+ } else if (pp->fastreuse &&
+ (!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
pp->fastreuse = 0;
/* We are set, so fill up all the data in the hash table
@@ -4814,8 +5074,8 @@ pp_not_found:
* sockets FIXME: Blurry, NPI (ipg).
*/
success:
- inet_sk(sk)->num = snum;
if (!sctp_sk(sk)->bind_hash) {
+ inet_sk(sk)->num = snum;
sk_add_bind_node(sk, &pp->owner);
sctp_sk(sk)->bind_hash = pp;
}
@@ -4888,12 +5148,16 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
* This is not currently spelled out in the SCTP sockets
* extensions draft, but follows the practice as seen in TCP
* sockets.
+ *
+ * Additionally, turn off fastreuse flag since we are not listening
*/
+ sk->sk_state = SCTP_SS_LISTENING;
if (!ep->base.bind_addr.port) {
if (sctp_autobind(sk))
return -EAGAIN;
- }
- sk->sk_state = SCTP_SS_LISTENING;
+ } else
+ sctp_sk(sk)->bind_hash->fastreuse = 0;
+
sctp_hash_endpoint(ep);
return 0;
}
@@ -4931,11 +5195,13 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
* extensions draft, but follows the practice as seen in TCP
* sockets.
*/
+ sk->sk_state = SCTP_SS_LISTENING;
if (!ep->base.bind_addr.port) {
if (sctp_autobind(sk))
return -EAGAIN;
- }
- sk->sk_state = SCTP_SS_LISTENING;
+ } else
+ sctp_sk(sk)->bind_hash->fastreuse = 0;
+
sk->sk_max_ack_backlog = backlog;
sctp_hash_endpoint(ep);
return 0;
@@ -4976,7 +5242,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
break;
default:
break;
- };
+ }
+
if (err)
goto cleanup;
@@ -5239,7 +5506,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
default:
return -EINVAL;
- };
+ }
}
return 0;
}
@@ -5742,9 +6009,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
*/
skb_queue_head_init(&newsp->pd_lobby);
- sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;
+ atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
- if (sctp_sk(oldsk)->pd_mode) {
+ if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
struct sk_buff_head *queue;
/* Decide which queue to move pd_lobby skbs to. */
@@ -5770,7 +6037,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* delivery to finish.
*/
if (assoc->ulpq.pd_mode)
- sctp_clear_pd(oldsk);
+ sctp_clear_pd(oldsk, NULL);
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4d8c2ab864f..961df275d5b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -507,7 +507,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
transport->cwnd = max(transport->cwnd/2,
4*transport->asoc->pathmtu);
break;
- };
+ }
transport->partial_bytes_acked = 0;
SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 2e11bc8d5d3..661ea2dd78b 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -131,19 +131,54 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
const struct sctp_association *asoc,
__u16 flags, __u16 state, __u16 error, __u16 outbound,
- __u16 inbound, gfp_t gfp)
+ __u16 inbound, struct sctp_chunk *chunk, gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_assoc_change *sac;
struct sk_buff *skb;
- event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
+ /* If the lower layer passed in the chunk, it will be
+ * an ABORT, so we need to include it in the sac_info.
+ */
+ if (chunk) {
+ /* sctp_inqu_pop() has allready pulled off the chunk
+ * header. We need to put it back temporarily
+ */
+ skb_push(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+ /* Copy the chunk data to a new skb and reserve enough
+ * head room to use as notification.
+ */
+ skb = skb_copy_expand(chunk->skb,
+ sizeof(struct sctp_assoc_change), 0, gfp);
+
+ if (!skb)
+ goto fail;
+
+ /* put back the chunk header now that we have a copy */
+ skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+ /* Embed the event fields inside the cloned skb. */
+ event = sctp_skb2event(skb);
+ sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+ /* Include the notification structure */
+ sac = (struct sctp_assoc_change *)
+ skb_push(skb, sizeof(struct sctp_assoc_change));
+
+ /* Trim the buffer to the right length. */
+ skb_trim(skb, sizeof(struct sctp_assoc_change) +
+ ntohs(chunk->chunk_hdr->length));
+ } else {
+ event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
MSG_NOTIFICATION, gfp);
- if (!event)
- goto fail;
- skb = sctp_event2skb(event);
- sac = (struct sctp_assoc_change *)
- skb_put(skb, sizeof(struct sctp_assoc_change));
+ if (!event)
+ goto fail;
+
+ skb = sctp_event2skb(event);
+ sac = (struct sctp_assoc_change *) skb_put(skb,
+ sizeof(struct sctp_assoc_change));
+ }
/* Socket Extensions for SCTP
* 5.3.1.1 SCTP_ASSOC_CHANGE
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index b29e3e4b72c..34eb977a204 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -138,26 +138,59 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
/* Clear the partial delivery mode for this socket. Note: This
* assumes that no association is currently in partial delivery mode.
*/
-int sctp_clear_pd(struct sock *sk)
+int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
{
struct sctp_sock *sp = sctp_sk(sk);
- sp->pd_mode = 0;
- if (!skb_queue_empty(&sp->pd_lobby)) {
- struct list_head *list;
- sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
- list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
- INIT_LIST_HEAD(list);
- return 1;
+ if (atomic_dec_and_test(&sp->pd_mode)) {
+ /* This means there are no other associations in PD, so
+ * we can go ahead and clear out the lobby in one shot
+ */
+ if (!skb_queue_empty(&sp->pd_lobby)) {
+ struct list_head *list;
+ sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
+ list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
+ INIT_LIST_HEAD(list);
+ return 1;
+ }
+ } else {
+ /* There are other associations in PD, so we only need to
+ * pull stuff out of the lobby that belongs to the
+ * associations that is exiting PD (all of its notifications
+ * are posted here).
+ */
+ if (!skb_queue_empty(&sp->pd_lobby) && asoc) {
+ struct sk_buff *skb, *tmp;
+ struct sctp_ulpevent *event;
+
+ sctp_skb_for_each(skb, &sp->pd_lobby, tmp) {
+ event = sctp_skb2event(skb);
+ if (event->asoc == asoc) {
+ __skb_unlink(skb, &sp->pd_lobby);
+ __skb_queue_tail(&sk->sk_receive_queue,
+ skb);
+ }
+ }
+ }
}
+
return 0;
}
+/* Set the pd_mode on the socket and ulpq */
+static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq)
+{
+ struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk);
+
+ atomic_inc(&sp->pd_mode);
+ ulpq->pd_mode = 1;
+}
+
/* Clear the pd_mode and restart any pending messages waiting for delivery. */
static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
{
ulpq->pd_mode = 0;
- return sctp_clear_pd(ulpq->asoc->base.sk);
+ return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
}
/* If the SKB of 'event' is on a list, it is the first such member
@@ -187,25 +220,35 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
* the association the cause of the partial delivery.
*/
- if (!sctp_sk(sk)->pd_mode) {
+ if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
queue = &sk->sk_receive_queue;
- } else if (ulpq->pd_mode) {
- /* If the association is in partial delivery, we
- * need to finish delivering the partially processed
- * packet before passing any other data. This is
- * because we don't truly support stream interleaving.
- */
- if ((event->msg_flags & MSG_NOTIFICATION) ||
- (SCTP_DATA_NOT_FRAG ==
- (event->msg_flags & SCTP_DATA_FRAG_MASK)))
- queue = &sctp_sk(sk)->pd_lobby;
- else {
- clear_pd = event->msg_flags & MSG_EOR;
- queue = &sk->sk_receive_queue;
+ } else {
+ if (ulpq->pd_mode) {
+ /* If the association is in partial delivery, we
+ * need to finish delivering the partially processed
+ * packet before passing any other data. This is
+ * because we don't truly support stream interleaving.
+ */
+ if ((event->msg_flags & MSG_NOTIFICATION) ||
+ (SCTP_DATA_NOT_FRAG ==
+ (event->msg_flags & SCTP_DATA_FRAG_MASK)))
+ queue = &sctp_sk(sk)->pd_lobby;
+ else {
+ clear_pd = event->msg_flags & MSG_EOR;
+ queue = &sk->sk_receive_queue;
+ }
+ } else {
+ /*
+ * If fragment interleave is enabled, we
+ * can queue this to the recieve queue instead
+ * of the lobby.
+ */
+ if (sctp_sk(sk)->frag_interleave)
+ queue = &sk->sk_receive_queue;
+ else
+ queue = &sctp_sk(sk)->pd_lobby;
}
- } else
- queue = &sctp_sk(sk)->pd_lobby;
-
+ }
/* If we are harvesting multiple skbs they will be
* collected on a list.
@@ -348,7 +391,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
break;
pos->next = pnext;
pos = pnext;
- };
+ }
event = sctp_skb2event(f_frag);
SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
@@ -367,6 +410,11 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
struct sk_buff *first_frag = NULL;
__u32 ctsn, next_tsn;
struct sctp_ulpevent *retval = NULL;
+ struct sk_buff *pd_first = NULL;
+ struct sk_buff *pd_last = NULL;
+ size_t pd_len = 0;
+ struct sctp_association *asoc;
+ u32 pd_point;
/* Initialized to 0 just to avoid compiler warning message. Will
* never be used with this value. It is referenced only after it
@@ -382,6 +430,10 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
* we expect to find the remaining middle fragments and the last
* fragment in order. If not, first_frag is reset to NULL and we
* start the next pass when we find another first fragment.
+ *
+ * There is a potential to do partial delivery if user sets
+ * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here
+ * to see if can do PD.
*/
skb_queue_walk(&ulpq->reasm, pos) {
cevent = sctp_skb2event(pos);
@@ -389,14 +441,32 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
case SCTP_DATA_FIRST_FRAG:
+ /* If this "FIRST_FRAG" is the first
+ * element in the queue, then count it towards
+ * possible PD.
+ */
+ if (pos == ulpq->reasm.next) {
+ pd_first = pos;
+ pd_last = pos;
+ pd_len = pos->len;
+ } else {
+ pd_first = NULL;
+ pd_last = NULL;
+ pd_len = 0;
+ }
+
first_frag = pos;
next_tsn = ctsn + 1;
break;
case SCTP_DATA_MIDDLE_FRAG:
- if ((first_frag) && (ctsn == next_tsn))
+ if ((first_frag) && (ctsn == next_tsn)) {
next_tsn++;
- else
+ if (pd_first) {
+ pd_last = pos;
+ pd_len += pos->len;
+ }
+ } else
first_frag = NULL;
break;
@@ -406,8 +476,29 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
else
first_frag = NULL;
break;
- };
+ }
+ }
+ asoc = ulpq->asoc;
+ if (pd_first) {
+ /* Make sure we can enter partial deliver.
+ * We can trigger partial delivery only if framgent
+ * interleave is set, or the socket is not already
+ * in partial delivery.
+ */
+ if (!sctp_sk(asoc->base.sk)->frag_interleave &&
+ atomic_read(&sctp_sk(asoc->base.sk)->pd_mode))
+ goto done;
+
+ cevent = sctp_skb2event(pd_first);
+ pd_point = sctp_sk(asoc->base.sk)->pd_point;
+ if (pd_point && pd_point <= pd_len) {
+ retval = sctp_make_reassembled_event(&ulpq->reasm,
+ pd_first,
+ pd_last);
+ if (retval)
+ sctp_ulpq_set_pd(ulpq);
+ }
}
done:
return retval;
@@ -465,7 +556,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
goto done;
default:
return NULL;
- };
+ }
}
/* We have the reassembled event. There is no need to look
@@ -557,7 +648,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
break;
default:
return NULL;
- };
+ }
}
/* We have the reassembled event. There is no need to look
@@ -826,19 +917,29 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
{
struct sctp_ulpevent *event;
struct sctp_association *asoc;
+ struct sctp_sock *sp;
asoc = ulpq->asoc;
+ sp = sctp_sk(asoc->base.sk);
- /* Are we already in partial delivery mode? */
- if (!sctp_sk(asoc->base.sk)->pd_mode) {
+ /* If the association is already in Partial Delivery mode
+ * we have noting to do.
+ */
+ if (ulpq->pd_mode)
+ return;
+ /* If the user enabled fragment interleave socket option,
+ * multiple associations can enter partial delivery.
+ * Otherwise, we can only enter partial delivery if the
+ * socket is not in partial deliver mode.
+ */
+ if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) {
/* Is partial delivery possible? */
event = sctp_ulpq_retrieve_first(ulpq);
/* Send event to the ULP. */
if (event) {
sctp_ulpq_tail_event(ulpq, event);
- sctp_sk(asoc->base.sk)->pd_mode = 1;
- ulpq->pd_mode = 1;
+ sctp_ulpq_set_pd(ulpq);
return;
}
}
diff --git a/net/socket.c b/net/socket.c
index ea8f81abc45..1ad62c08377 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -585,6 +585,37 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
return result;
}
+/*
+ * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
+ */
+void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
+{
+ ktime_t kt = skb->tstamp;
+
+ if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+ struct timeval tv;
+ /* Race occurred between timestamp enabling and packet
+ receiving. Fill in the current time for now. */
+ if (kt.tv64 == 0)
+ kt = ktime_get_real();
+ skb->tstamp = kt;
+ tv = ktime_to_timeval(kt);
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+ } else {
+ struct timespec ts;
+ /* Race occurred between timestamp enabling and packet
+ receiving. Fill in the current time for now. */
+ if (kt.tv64 == 0)
+ kt = ktime_get_real();
+ skb->tstamp = kt;
+ ts = ktime_to_timespec(kt);
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+ }
+}
+
+EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
@@ -1292,7 +1323,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
int err, fput_needed;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if(sock) {
+ if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, address);
if (err >= 0) {
err = security_socket_bind(sock,
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index cdcab9ca4c6..8ebfc4db7f5 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
- pmap_clnt.o timer.o xdr.o \
+ rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 104cbf4f769..d158635de6c 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -123,9 +123,6 @@ spkm3_make_token(struct spkm3_ctx *ctx,
return GSS_S_COMPLETE;
out_err:
- if (md5cksum.data)
- kfree(md5cksum.data);
-
token->data = NULL;
token->len = 0;
return GSS_S_FAILURE;
@@ -152,7 +149,7 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
switch (cksumtype) {
case CKSUMTYPE_HMAC_MD5:
- cksumname = "md5";
+ cksumname = "hmac(md5)";
break;
default:
dprintk("RPC: spkm3_make_checksum:"
@@ -172,8 +169,12 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
if (err)
goto out;
+ err = crypto_hash_init(&desc);
+ if (err)
+ goto out;
+
sg_set_buf(sg, header, hdrlen);
- crypto_hash_update(&desc, sg, 1);
+ crypto_hash_update(&desc, sg, sg->length);
xdr_process_buf(body, body_offset, body->len - body_offset,
spkm3_checksummer, &desc);
@@ -184,5 +185,3 @@ out:
return err ? GSS_S_FAILURE : 0;
}
-
-EXPORT_SYMBOL(make_spkm3_checksum);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f02f24ae946..543b085ae2c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1237,20 +1237,12 @@ static int content_open(struct inode *inode, struct file *file)
return res;
}
-static int content_release(struct inode *inode, struct file *file)
-{
- struct seq_file *m = (struct seq_file *)file->private_data;
- struct handle *han = m->private;
- kfree(han);
- m->private = NULL;
- return seq_release(inode, file);
-}
static const struct file_operations content_file_operations = {
.open = content_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = content_release,
+ .release = seq_release_private,
};
static ssize_t read_flush(struct file *file, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 396cdbe249d..d8fbee40a19 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -36,8 +36,6 @@
#include <linux/sunrpc/metrics.h>
-#define RPC_SLACK_SPACE (1024) /* total overkill */
-
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
@@ -747,21 +745,38 @@ call_reserveresult(struct rpc_task *task)
static void
call_allocate(struct rpc_task *task)
{
+ unsigned int slack = task->tk_auth->au_cslack;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = task->tk_xprt;
- unsigned int bufsiz;
+ struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
dprint_status(task);
+ task->tk_status = 0;
task->tk_action = call_bind;
+
if (req->rq_buffer)
return;
- /* FIXME: compute buffer requirements more exactly using
- * auth->au_wslack */
- bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
+ if (proc->p_proc != 0) {
+ BUG_ON(proc->p_arglen == 0);
+ if (proc->p_decode != NULL)
+ BUG_ON(proc->p_replen == 0);
+ }
- if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
+ /*
+ * Calculate the size (in quads) of the RPC call
+ * and reply headers, and convert both values
+ * to byte sizes.
+ */
+ req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
+ req->rq_callsize <<= 2;
+ req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
+ req->rq_rcvsize <<= 2;
+
+ req->rq_buffer = xprt->ops->buf_alloc(task,
+ req->rq_callsize + req->rq_rcvsize);
+ if (req->rq_buffer != NULL)
return;
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -788,6 +803,17 @@ rpc_task_force_reencode(struct rpc_task *task)
task->tk_rqstp->rq_snd_buf.len = 0;
}
+static inline void
+rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
+{
+ buf->head[0].iov_base = start;
+ buf->head[0].iov_len = len;
+ buf->tail[0].iov_len = 0;
+ buf->page_len = 0;
+ buf->len = 0;
+ buf->buflen = len;
+}
+
/*
* 3. Encode arguments of an RPC call
*/
@@ -795,28 +821,17 @@ static void
call_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- struct xdr_buf *sndbuf = &req->rq_snd_buf;
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- unsigned int bufsiz;
kxdrproc_t encode;
__be32 *p;
dprint_status(task);
- /* Default buffer setup */
- bufsiz = req->rq_bufsize >> 1;
- sndbuf->head[0].iov_base = (void *)req->rq_buffer;
- sndbuf->head[0].iov_len = bufsiz;
- sndbuf->tail[0].iov_len = 0;
- sndbuf->page_len = 0;
- sndbuf->len = 0;
- sndbuf->buflen = bufsiz;
- rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
- rcvbuf->head[0].iov_len = bufsiz;
- rcvbuf->tail[0].iov_len = 0;
- rcvbuf->page_len = 0;
- rcvbuf->len = 0;
- rcvbuf->buflen = bufsiz;
+ rpc_xdr_buf_init(&req->rq_snd_buf,
+ req->rq_buffer,
+ req->rq_callsize);
+ rpc_xdr_buf_init(&req->rq_rcv_buf,
+ (char *)req->rq_buffer + req->rq_callsize,
+ req->rq_rcvsize);
/* Encode header and provided arguments */
encode = task->tk_msg.rpc_proc->p_encode;
@@ -887,9 +902,11 @@ call_bind_status(struct rpc_task *task)
task->tk_pid);
break;
case -EPROTONOSUPPORT:
- dprintk("RPC: %5u remote rpcbind version 2 unavailable\n",
+ dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n",
task->tk_pid);
- break;
+ task->tk_status = 0;
+ task->tk_action = call_bind;
+ return;
default:
dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
task->tk_pid, -task->tk_status);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
deleted file mode 100644
index d9f76534458..00000000000
--- a/net/sunrpc/pmap_clnt.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * linux/net/sunrpc/pmap_clnt.c
- *
- * In-kernel RPC portmapper client.
- *
- * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/uio.h>
-#include <linux/in.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/sched.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY RPCDBG_PMAP
-#endif
-
-#define PMAP_SET 1
-#define PMAP_UNSET 2
-#define PMAP_GETPORT 3
-
-struct portmap_args {
- u32 pm_prog;
- u32 pm_vers;
- u32 pm_prot;
- unsigned short pm_port;
- struct rpc_xprt * pm_xprt;
-};
-
-static struct rpc_procinfo pmap_procedures[];
-static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int);
-static void pmap_getport_done(struct rpc_task *, void *);
-static struct rpc_program pmap_program;
-
-static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
-{
- struct portmap_args *map = calldata;
- struct rpc_message msg = {
- .rpc_proc = &pmap_procedures[PMAP_GETPORT],
- .rpc_argp = map,
- .rpc_resp = &map->pm_port,
- };
-
- rpc_call_setup(task, &msg, 0);
-}
-
-static inline struct portmap_args *pmap_map_alloc(void)
-{
- return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
-}
-
-static inline void pmap_map_free(struct portmap_args *map)
-{
- kfree(map);
-}
-
-static void pmap_map_release(void *data)
-{
- struct portmap_args *map = data;
-
- xprt_put(map->pm_xprt);
- pmap_map_free(map);
-}
-
-static const struct rpc_call_ops pmap_getport_ops = {
- .rpc_call_prepare = pmap_getport_prepare,
- .rpc_call_done = pmap_getport_done,
- .rpc_release = pmap_map_release,
-};
-
-static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
-{
- xprt_clear_binding(xprt);
- rpc_wake_up_status(&xprt->binding, status);
-}
-
-/**
- * rpc_getport - obtain the port for a given RPC service on a given host
- * @task: task that is waiting for portmapper request
- *
- * This one can be called for an ongoing RPC request, and can be used in
- * an async (rpciod) context.
- */
-void rpc_getport(struct rpc_task *task)
-{
- struct rpc_clnt *clnt = task->tk_client;
- struct rpc_xprt *xprt = task->tk_xprt;
- struct sockaddr_in addr;
- struct portmap_args *map;
- struct rpc_clnt *pmap_clnt;
- struct rpc_task *child;
- int status;
-
- dprintk("RPC: %5u rpc_getport(%s, %u, %u, %d)\n",
- task->tk_pid, clnt->cl_server,
- clnt->cl_prog, clnt->cl_vers, xprt->prot);
-
- /* Autobind on cloned rpc clients is discouraged */
- BUG_ON(clnt->cl_parent != clnt);
-
- status = -EACCES; /* tell caller to check again */
- if (xprt_test_and_set_binding(xprt))
- goto bailout_nowake;
-
- /* Put self on queue before sending rpcbind request, in case
- * pmap_getport_done completes before we return from rpc_run_task */
- rpc_sleep_on(&xprt->binding, task, NULL, NULL);
-
- /* Someone else may have bound if we slept */
- status = 0;
- if (xprt_bound(xprt))
- goto bailout_nofree;
-
- status = -ENOMEM;
- map = pmap_map_alloc();
- if (!map)
- goto bailout_nofree;
- map->pm_prog = clnt->cl_prog;
- map->pm_vers = clnt->cl_vers;
- map->pm_prot = xprt->prot;
- map->pm_port = 0;
- map->pm_xprt = xprt_get(xprt);
-
- rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
- pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
- status = PTR_ERR(pmap_clnt);
- if (IS_ERR(pmap_clnt))
- goto bailout;
-
- status = -EIO;
- child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
- if (IS_ERR(child))
- goto bailout_nofree;
- rpc_put_task(child);
-
- task->tk_xprt->stat.bind_count++;
- return;
-
-bailout:
- pmap_map_free(map);
- xprt_put(xprt);
-bailout_nofree:
- pmap_wake_portmap_waiters(xprt, status);
-bailout_nowake:
- task->tk_status = status;
-}
-
-#ifdef CONFIG_ROOT_NFS
-/**
- * rpc_getport_external - obtain the port for a given RPC service on a given host
- * @sin: address of remote peer
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- *
- * This one is called from outside the RPC client in a synchronous task context.
- */
-int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
-{
- struct portmap_args map = {
- .pm_prog = prog,
- .pm_vers = vers,
- .pm_prot = prot,
- .pm_port = 0
- };
- struct rpc_message msg = {
- .rpc_proc = &pmap_procedures[PMAP_GETPORT],
- .rpc_argp = &map,
- .rpc_resp = &map.pm_port,
- };
- struct rpc_clnt *pmap_clnt;
- char hostname[32];
- int status;
-
- dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
- NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
-
- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
- pmap_clnt = pmap_create(hostname, sin, prot, 0);
- if (IS_ERR(pmap_clnt))
- return PTR_ERR(pmap_clnt);
-
- /* Setup the call info struct */
- status = rpc_call_sync(pmap_clnt, &msg, 0);
-
- if (status >= 0) {
- if (map.pm_port != 0)
- return map.pm_port;
- status = -EACCES;
- }
- return status;
-}
-#endif
-
-/*
- * Portmapper child task invokes this callback via tk_exit.
- */
-static void pmap_getport_done(struct rpc_task *child, void *data)
-{
- struct portmap_args *map = data;
- struct rpc_xprt *xprt = map->pm_xprt;
- int status = child->tk_status;
-
- if (status < 0) {
- /* Portmapper not available */
- xprt->ops->set_port(xprt, 0);
- } else if (map->pm_port == 0) {
- /* Requested RPC service wasn't registered */
- xprt->ops->set_port(xprt, 0);
- status = -EACCES;
- } else {
- /* Succeeded */
- xprt->ops->set_port(xprt, map->pm_port);
- xprt_set_bound(xprt);
- status = 0;
- }
-
- dprintk("RPC: %5u pmap_getport_done(status %d, port %u)\n",
- child->tk_pid, status, map->pm_port);
-
- pmap_wake_portmap_waiters(xprt, status);
-}
-
-/**
- * rpc_register - set or unset a port registration with the local portmapper
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- * @port: port value to register
- * @okay: result code
- *
- * port == 0 means unregister, port != 0 means register.
- */
-int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
- struct portmap_args map = {
- .pm_prog = prog,
- .pm_vers = vers,
- .pm_prot = prot,
- .pm_port = port,
- };
- struct rpc_message msg = {
- .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
- .rpc_argp = &map,
- .rpc_resp = okay,
- };
- struct rpc_clnt *pmap_clnt;
- int error = 0;
-
- dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
- prog, vers, prot, port);
-
- pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
- if (IS_ERR(pmap_clnt)) {
- error = PTR_ERR(pmap_clnt);
- dprintk("RPC: couldn't create pmap client. Error = %d\n",
- error);
- return error;
- }
-
- error = rpc_call_sync(pmap_clnt, &msg, 0);
-
- if (error < 0) {
- printk(KERN_WARNING
- "RPC: failed to contact portmap (errno %d).\n",
- error);
- }
- dprintk("RPC: registration status %d/%d\n", error, *okay);
-
- /* Client deleted automatically because cl_oneshot == 1 */
- return error;
-}
-
-static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
-{
- struct rpc_create_args args = {
- .protocol = proto,
- .address = (struct sockaddr *)srvaddr,
- .addrsize = sizeof(*srvaddr),
- .servername = hostname,
- .program = &pmap_program,
- .version = RPC_PMAP_VERSION,
- .authflavor = RPC_AUTH_UNIX,
- .flags = (RPC_CLNT_CREATE_ONESHOT |
- RPC_CLNT_CREATE_NOPING),
- };
-
- srvaddr->sin_port = htons(RPC_PMAP_PORT);
- if (!privileged)
- args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
- return rpc_create(&args);
-}
-
-/*
- * XDR encode/decode functions for PMAP
- */
-static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map)
-{
- dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
- map->pm_prog, map->pm_vers,
- map->pm_prot, map->pm_port);
- *p++ = htonl(map->pm_prog);
- *p++ = htonl(map->pm_vers);
- *p++ = htonl(map->pm_prot);
- *p++ = htonl(map->pm_port);
-
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- return 0;
-}
-
-static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp)
-{
- *portp = (unsigned short) ntohl(*p++);
- return 0;
-}
-
-static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp)
-{
- *boolp = (unsigned int) ntohl(*p++);
- return 0;
-}
-
-static struct rpc_procinfo pmap_procedures[] = {
-[PMAP_SET] = {
- .p_proc = PMAP_SET,
- .p_encode = (kxdrproc_t) xdr_encode_mapping,
- .p_decode = (kxdrproc_t) xdr_decode_bool,
- .p_bufsiz = 4,
- .p_count = 1,
- .p_statidx = PMAP_SET,
- .p_name = "SET",
- },
-[PMAP_UNSET] = {
- .p_proc = PMAP_UNSET,
- .p_encode = (kxdrproc_t) xdr_encode_mapping,
- .p_decode = (kxdrproc_t) xdr_decode_bool,
- .p_bufsiz = 4,
- .p_count = 1,
- .p_statidx = PMAP_UNSET,
- .p_name = "UNSET",
- },
-[PMAP_GETPORT] = {
- .p_proc = PMAP_GETPORT,
- .p_encode = (kxdrproc_t) xdr_encode_mapping,
- .p_decode = (kxdrproc_t) xdr_decode_port,
- .p_bufsiz = 4,
- .p_count = 1,
- .p_statidx = PMAP_GETPORT,
- .p_name = "GETPORT",
- },
-};
-
-static struct rpc_version pmap_version2 = {
- .number = 2,
- .nrprocs = 4,
- .procs = pmap_procedures
-};
-
-static struct rpc_version * pmap_version[] = {
- NULL,
- NULL,
- &pmap_version2
-};
-
-static struct rpc_stat pmap_stats;
-
-static struct rpc_program pmap_program = {
- .name = "portmap",
- .number = RPC_PMAP_PROGRAM,
- .nrvers = ARRAY_SIZE(pmap_version),
- .version = pmap_version,
- .stats = &pmap_stats,
-};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
new file mode 100644
index 00000000000..6c7aa8a1f0c
--- /dev/null
+++ b/net/sunrpc/rpcb_clnt.c
@@ -0,0 +1,625 @@
+/*
+ * In-kernel rpcbind client supporting versions 2, 3, and 4 of the rpcbind
+ * protocol
+ *
+ * Based on RFC 1833: "Binding Protocols for ONC RPC Version 2" and
+ * RFC 3530: "Network File System (NFS) version 4 Protocol"
+ *
+ * Original: Gilles Quillard, Bull Open Source, 2005 <gilles.quillard@bull.net>
+ * Updated: Chuck Lever, Oracle Corporation, 2007 <chuck.lever@oracle.com>
+ *
+ * Descended from net/sunrpc/pmap_clnt.c,
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_BIND
+#endif
+
+#define RPCBIND_PROGRAM (100000u)
+#define RPCBIND_PORT (111u)
+
+enum {
+ RPCBPROC_NULL,
+ RPCBPROC_SET,
+ RPCBPROC_UNSET,
+ RPCBPROC_GETPORT,
+ RPCBPROC_GETADDR = 3, /* alias for GETPORT */
+ RPCBPROC_DUMP,
+ RPCBPROC_CALLIT,
+ RPCBPROC_BCAST = 5, /* alias for CALLIT */
+ RPCBPROC_GETTIME,
+ RPCBPROC_UADDR2TADDR,
+ RPCBPROC_TADDR2UADDR,
+ RPCBPROC_GETVERSADDR,
+ RPCBPROC_INDIRECT,
+ RPCBPROC_GETADDRLIST,
+ RPCBPROC_GETSTAT,
+};
+
+#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT
+#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR
+#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
+
+/*
+ * r_addr
+ *
+ * Quoting RFC 3530, section 2.2:
+ *
+ * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
+ * US-ASCII string:
+ *
+ * h1.h2.h3.h4.p1.p2
+ *
+ * The prefix, "h1.h2.h3.h4", is the standard textual form for
+ * representing an IPv4 address, which is always four octets long.
+ * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
+ * the first through fourth octets each converted to ASCII-decimal.
+ * Assuming big-endian ordering, p1 and p2 are, respectively, the first
+ * and second octets each converted to ASCII-decimal. For example, if a
+ * host, in big-endian order, has an address of 0x0A010307 and there is
+ * a service listening on, in big endian order, port 0x020F (decimal
+ * 527), then the complete universal address is "10.1.3.7.2.15".
+ *
+ * ...
+ *
+ * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
+ * US-ASCII string:
+ *
+ * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
+ *
+ * The suffix "p1.p2" is the service port, and is computed the same way
+ * as with universal addresses for TCP and UDP over IPv4. The prefix,
+ * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
+ * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
+ * Additionally, the two alternative forms specified in Section 2.2 of
+ * [RFC2373] are also acceptable.
+ *
+ * XXX: Currently this implementation does not explicitly convert the
+ * stored address to US-ASCII on non-ASCII systems.
+ */
+#define RPCB_MAXADDRLEN (128u)
+
+/*
+ * r_netid
+ *
+ * Quoting RFC 3530, section 2.2:
+ *
+ * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP
+ * over IPv4 the value of r_netid is the string "udp".
+ *
+ * ...
+ *
+ * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP
+ * over IPv6 the value of r_netid is the string "udp6".
+ */
+#define RPCB_NETID_UDP "\165\144\160" /* "udp" */
+#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */
+#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */
+#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */
+
+#define RPCB_MAXNETIDLEN (4u)
+
+/*
+ * r_owner
+ *
+ * The "owner" is allowed to unset a service in the rpcbind database.
+ * We always use the following (arbitrary) fixed string.
+ */
+#define RPCB_OWNER_STRING "rpcb"
+#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
+
+static void rpcb_getport_done(struct rpc_task *, void *);
+extern struct rpc_program rpcb_program;
+
+struct rpcbind_args {
+ struct rpc_xprt * r_xprt;
+
+ u32 r_prog;
+ u32 r_vers;
+ u32 r_prot;
+ unsigned short r_port;
+ char * r_netid;
+ char r_addr[RPCB_MAXADDRLEN];
+ char * r_owner;
+};
+
+static struct rpc_procinfo rpcb_procedures2[];
+static struct rpc_procinfo rpcb_procedures3[];
+
+static struct rpcb_info {
+ int rpc_vers;
+ struct rpc_procinfo * rpc_proc;
+} rpcb_next_version[];
+
+static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
+{
+ struct rpcbind_args *map = calldata;
+ struct rpc_xprt *xprt = map->r_xprt;
+ struct rpc_message msg = {
+ .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc,
+ .rpc_argp = map,
+ .rpc_resp = &map->r_port,
+ };
+
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void rpcb_map_release(void *data)
+{
+ struct rpcbind_args *map = data;
+
+ xprt_put(map->r_xprt);
+ kfree(map);
+}
+
+static const struct rpc_call_ops rpcb_getport_ops = {
+ .rpc_call_prepare = rpcb_getport_prepare,
+ .rpc_call_done = rpcb_getport_done,
+ .rpc_release = rpcb_map_release,
+};
+
+static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
+{
+ xprt_clear_binding(xprt);
+ rpc_wake_up_status(&xprt->binding, status);
+}
+
+static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
+ int proto, int version, int privileged)
+{
+ struct rpc_create_args args = {
+ .protocol = proto,
+ .address = srvaddr,
+ .addrsize = sizeof(struct sockaddr_in),
+ .servername = hostname,
+ .program = &rpcb_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = (RPC_CLNT_CREATE_ONESHOT |
+ RPC_CLNT_CREATE_NOPING),
+ };
+
+ ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+ if (!privileged)
+ args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ return rpc_create(&args);
+}
+
+/**
+ * rpcb_register - set or unset a port registration with the local rpcbind svc
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ * @port: port value to register
+ * @okay: result code
+ *
+ * port == 0 means unregister, port != 0 means register.
+ *
+ * This routine supports only rpcbind version 2.
+ */
+int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+{
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct rpcbind_args map = {
+ .r_prog = prog,
+ .r_vers = vers,
+ .r_prot = prot,
+ .r_port = port,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &rpcb_procedures2[port ?
+ RPCBPROC_SET : RPCBPROC_UNSET],
+ .rpc_argp = &map,
+ .rpc_resp = okay,
+ };
+ struct rpc_clnt *rpcb_clnt;
+ int error = 0;
+
+ dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
+ "rpcbind\n", (port ? "" : "un"),
+ prog, vers, prot, port);
+
+ rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
+ IPPROTO_UDP, 2, 1);
+ if (IS_ERR(rpcb_clnt))
+ return PTR_ERR(rpcb_clnt);
+
+ error = rpc_call_sync(rpcb_clnt, &msg, 0);
+
+ if (error < 0)
+ printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ "server (errno %d).\n", -error);
+ dprintk("RPC: registration status %d/%d\n", error, *okay);
+
+ return error;
+}
+
+#ifdef CONFIG_ROOT_NFS
+/**
+ * rpcb_getport_external - obtain the port for an RPC service on a given host
+ * @sin: address of remote peer
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ *
+ * Called from outside the RPC client in a synchronous task context.
+ *
+ * For now, this supports only version 2 queries, but is used only by
+ * mount_clnt for NFS_ROOT.
+ */
+int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
+ __u32 vers, int prot)
+{
+ struct rpcbind_args map = {
+ .r_prog = prog,
+ .r_vers = vers,
+ .r_prot = prot,
+ .r_port = 0,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
+ .rpc_argp = &map,
+ .rpc_resp = &map.r_port,
+ };
+ struct rpc_clnt *rpcb_clnt;
+ char hostname[40];
+ int status;
+
+ dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
+ NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
+
+ sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
+ rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
+ if (IS_ERR(rpcb_clnt))
+ return PTR_ERR(rpcb_clnt);
+
+ status = rpc_call_sync(rpcb_clnt, &msg, 0);
+
+ if (status >= 0) {
+ if (map.r_port != 0)
+ return map.r_port;
+ status = -EACCES;
+ }
+ return status;
+}
+#endif
+
+/**
+ * rpcb_getport - obtain the port for a given RPC service on a given host
+ * @task: task that is waiting for portmapper request
+ *
+ * This one can be called for an ongoing RPC request, and can be used in
+ * an async (rpciod) context.
+ */
+void rpcb_getport(struct rpc_task *task)
+{
+ struct rpc_clnt *clnt = task->tk_client;
+ int bind_version;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_clnt *rpcb_clnt;
+ static struct rpcbind_args *map;
+ struct rpc_task *child;
+ struct sockaddr addr;
+ int status;
+
+ dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n",
+ task->tk_pid, clnt->cl_server,
+ clnt->cl_prog, clnt->cl_vers, xprt->prot);
+
+ /* Autobind on cloned rpc clients is discouraged */
+ BUG_ON(clnt->cl_parent != clnt);
+
+ if (xprt_test_and_set_binding(xprt)) {
+ status = -EACCES; /* tell caller to check again */
+ dprintk("RPC: %5u rpcb_getport waiting for another binder\n",
+ task->tk_pid);
+ goto bailout_nowake;
+ }
+
+ /* Put self on queue before sending rpcbind request, in case
+ * rpcb_getport_done completes before we return from rpc_run_task */
+ rpc_sleep_on(&xprt->binding, task, NULL, NULL);
+
+ /* Someone else may have bound if we slept */
+ if (xprt_bound(xprt)) {
+ status = 0;
+ dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid);
+ goto bailout_nofree;
+ }
+
+ if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+ xprt->bind_index = 0;
+ status = -EACCES; /* tell caller to try again later */
+ dprintk("RPC: %5u rpcb_getport no more getport versions "
+ "available\n", task->tk_pid);
+ goto bailout_nofree;
+ }
+ bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+
+ dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n",
+ task->tk_pid, bind_version);
+
+ map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
+ if (!map) {
+ status = -ENOMEM;
+ dprintk("RPC: %5u rpcb_getport no memory available\n",
+ task->tk_pid);
+ goto bailout_nofree;
+ }
+ map->r_prog = clnt->cl_prog;
+ map->r_vers = clnt->cl_vers;
+ map->r_prot = xprt->prot;
+ map->r_port = 0;
+ map->r_xprt = xprt_get(xprt);
+ map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
+ RPCB_NETID_UDP;
+ memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
+ sizeof(map->r_addr));
+ map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
+
+ rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+ rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
+ if (IS_ERR(rpcb_clnt)) {
+ status = PTR_ERR(rpcb_clnt);
+ dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n",
+ task->tk_pid, PTR_ERR(rpcb_clnt));
+ goto bailout;
+ }
+
+ child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
+ if (IS_ERR(child)) {
+ status = -EIO;
+ dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
+ task->tk_pid);
+ goto bailout_nofree;
+ }
+ rpc_put_task(child);
+
+ task->tk_xprt->stat.bind_count++;
+ return;
+
+bailout:
+ kfree(map);
+ xprt_put(xprt);
+bailout_nofree:
+ rpcb_wake_rpcbind_waiters(xprt, status);
+bailout_nowake:
+ task->tk_status = status;
+}
+
+/*
+ * Rpcbind child task calls this callback via tk_exit.
+ */
+static void rpcb_getport_done(struct rpc_task *child, void *data)
+{
+ struct rpcbind_args *map = data;
+ struct rpc_xprt *xprt = map->r_xprt;
+ int status = child->tk_status;
+
+ /* rpcbind server doesn't support this rpcbind protocol version */
+ if (status == -EPROTONOSUPPORT)
+ xprt->bind_index++;
+
+ if (status < 0) {
+ /* rpcbind server not available on remote host? */
+ xprt->ops->set_port(xprt, 0);
+ } else if (map->r_port == 0) {
+ /* Requested RPC service wasn't registered on remote host */
+ xprt->ops->set_port(xprt, 0);
+ status = -EACCES;
+ } else {
+ /* Succeeded */
+ xprt->ops->set_port(xprt, map->r_port);
+ xprt_set_bound(xprt);
+ status = 0;
+ }
+
+ dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",
+ child->tk_pid, status, map->r_port);
+
+ rpcb_wake_rpcbind_waiters(xprt, status);
+}
+
+static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
+ struct rpcbind_args *rpcb)
+{
+ dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n",
+ rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
+ *p++ = htonl(rpcb->r_prog);
+ *p++ = htonl(rpcb->r_vers);
+ *p++ = htonl(rpcb->r_prot);
+ *p++ = htonl(rpcb->r_port);
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
+ unsigned short *portp)
+{
+ *portp = (unsigned short) ntohl(*p++);
+ dprintk("RPC: rpcb_decode_getport result %u\n",
+ *portp);
+ return 0;
+}
+
+static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
+ unsigned int *boolp)
+{
+ *boolp = (unsigned int) ntohl(*p++);
+ dprintk("RPC: rpcb_decode_set result %u\n",
+ *boolp);
+ return 0;
+}
+
+static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
+ struct rpcbind_args *rpcb)
+{
+ dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n",
+ rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
+ *p++ = htonl(rpcb->r_prog);
+ *p++ = htonl(rpcb->r_vers);
+
+ p = xdr_encode_string(p, rpcb->r_netid);
+ p = xdr_encode_string(p, rpcb->r_addr);
+ p = xdr_encode_string(p, rpcb->r_owner);
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ return 0;
+}
+
+static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
+ unsigned short *portp)
+{
+ char *addr;
+ int addr_len, c, i, f, first, val;
+
+ *portp = 0;
+ addr_len = (unsigned int) ntohl(*p++);
+ if (addr_len > RPCB_MAXADDRLEN) /* sanity */
+ return -EINVAL;
+
+ dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n",
+ (char *) p);
+
+ addr = (char *)p;
+ val = 0;
+ first = 1;
+ f = 1;
+ for (i = addr_len - 1; i > 0; i--) {
+ c = addr[i];
+ if (c >= '0' && c <= '9') {
+ val += (c - '0') * f;
+ f *= 10;
+ } else if (c == '.') {
+ if (first) {
+ *portp = val;
+ val = first = 0;
+ f = 1;
+ } else {
+ *portp |= (val << 8);
+ break;
+ }
+ }
+ }
+
+ dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp);
+ return 0;
+}
+
+#define RPCB_program_sz (1u)
+#define RPCB_version_sz (1u)
+#define RPCB_protocol_sz (1u)
+#define RPCB_port_sz (1u)
+#define RPCB_boolean_sz (1u)
+
+#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
+#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
+#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
+
+#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \
+ RPCB_protocol_sz+RPCB_port_sz
+#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \
+ RPCB_netid_sz+RPCB_addr_sz+ \
+ RPCB_ownerstring_sz
+
+#define RPCB_setres_sz RPCB_boolean_sz
+#define RPCB_getportres_sz RPCB_port_sz
+
+/*
+ * Note that RFC 1833 does not put any size restrictions on the
+ * address string returned by the remote rpcbind database.
+ */
+#define RPCB_getaddrres_sz RPCB_addr_sz
+
+#define PROC(proc, argtype, restype) \
+ [RPCBPROC_##proc] = { \
+ .p_proc = RPCBPROC_##proc, \
+ .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \
+ .p_decode = (kxdrproc_t) rpcb_decode_##restype, \
+ .p_arglen = RPCB_##argtype##args_sz, \
+ .p_replen = RPCB_##restype##res_sz, \
+ .p_statidx = RPCBPROC_##proc, \
+ .p_timer = 0, \
+ .p_name = #proc, \
+ }
+
+/*
+ * Not all rpcbind procedures described in RFC 1833 are implemented
+ * since the Linux kernel RPC code requires only these.
+ */
+static struct rpc_procinfo rpcb_procedures2[] = {
+ PROC(SET, mapping, set),
+ PROC(UNSET, mapping, set),
+ PROC(GETADDR, mapping, getport),
+};
+
+static struct rpc_procinfo rpcb_procedures3[] = {
+ PROC(SET, mapping, set),
+ PROC(UNSET, mapping, set),
+ PROC(GETADDR, getaddr, getaddr),
+};
+
+static struct rpc_procinfo rpcb_procedures4[] = {
+ PROC(SET, mapping, set),
+ PROC(UNSET, mapping, set),
+ PROC(GETVERSADDR, getaddr, getaddr),
+};
+
+static struct rpcb_info rpcb_next_version[] = {
+#ifdef CONFIG_SUNRPC_BIND34
+ { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
+ { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+#endif
+ { 2, &rpcb_procedures2[RPCBPROC_GETPORT] },
+ { 0, NULL },
+};
+
+static struct rpc_version rpcb_version2 = {
+ .number = 2,
+ .nrprocs = RPCB_HIGHPROC_2,
+ .procs = rpcb_procedures2
+};
+
+static struct rpc_version rpcb_version3 = {
+ .number = 3,
+ .nrprocs = RPCB_HIGHPROC_3,
+ .procs = rpcb_procedures3
+};
+
+static struct rpc_version rpcb_version4 = {
+ .number = 4,
+ .nrprocs = RPCB_HIGHPROC_4,
+ .procs = rpcb_procedures4
+};
+
+static struct rpc_version *rpcb_version[] = {
+ NULL,
+ NULL,
+ &rpcb_version2,
+ &rpcb_version3,
+ &rpcb_version4
+};
+
+static struct rpc_stat rpcb_stats;
+
+struct rpc_program rpcb_program = {
+ .name = "rpcbind",
+ .number = RPCBIND_PROGRAM,
+ .nrvers = ARRAY_SIZE(rpcb_version),
+ .version = rpcb_version,
+ .stats = &rpcb_stats,
+};
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6d87320074b..4a53e94f813 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -741,50 +741,53 @@ static void rpc_async_schedule(struct work_struct *work)
* @task: RPC task that will use this buffer
* @size: requested byte size
*
- * We try to ensure that some NFS reads and writes can always proceed
- * by using a mempool when allocating 'small' buffers.
+ * To prevent rpciod from hanging, this allocator never sleeps,
+ * returning NULL if the request cannot be serviced immediately.
+ * The caller can arrange to sleep in a way that is safe for rpciod.
+ *
+ * Most requests are 'small' (under 2KiB) and can be serviced from a
+ * mempool, ensuring that NFS reads and writes can always proceed,
+ * and that there is good locality of reference for these buffers.
+ *
* In order to avoid memory starvation triggering more writebacks of
- * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
+ * NFS requests, we avoid using GFP_KERNEL.
*/
-void * rpc_malloc(struct rpc_task *task, size_t size)
+void *rpc_malloc(struct rpc_task *task, size_t size)
{
- struct rpc_rqst *req = task->tk_rqstp;
- gfp_t gfp;
+ size_t *buf;
+ gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
- if (task->tk_flags & RPC_TASK_SWAPPER)
- gfp = GFP_ATOMIC;
+ size += sizeof(size_t);
+ if (size <= RPC_BUFFER_MAXSIZE)
+ buf = mempool_alloc(rpc_buffer_mempool, gfp);
else
- gfp = GFP_NOFS;
-
- if (size > RPC_BUFFER_MAXSIZE) {
- req->rq_buffer = kmalloc(size, gfp);
- if (req->rq_buffer)
- req->rq_bufsize = size;
- } else {
- req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
- if (req->rq_buffer)
- req->rq_bufsize = RPC_BUFFER_MAXSIZE;
- }
- return req->rq_buffer;
+ buf = kmalloc(size, gfp);
+ *buf = size;
+ dprintk("RPC: %5u allocated buffer of size %u at %p\n",
+ task->tk_pid, size, buf);
+ return (void *) ++buf;
}
/**
* rpc_free - free buffer allocated via rpc_malloc
- * @task: RPC task with a buffer to be freed
+ * @buffer: buffer to free
*
*/
-void rpc_free(struct rpc_task *task)
+void rpc_free(void *buffer)
{
- struct rpc_rqst *req = task->tk_rqstp;
+ size_t size, *buf = (size_t *) buffer;
- if (req->rq_buffer) {
- if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
- mempool_free(req->rq_buffer, rpc_buffer_mempool);
- else
- kfree(req->rq_buffer);
- req->rq_buffer = NULL;
- req->rq_bufsize = 0;
- }
+ if (!buffer)
+ return;
+ size = *buf;
+ buf--;
+
+ dprintk("RPC: freeing buffer of size %u at %p\n",
+ size, buf);
+ if (size <= RPC_BUFFER_MAXSIZE)
+ mempool_free(buf, rpc_buffer_mempool);
+ else
+ kfree(buf);
}
/*
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 634885b0c04..1d377d1ab7f 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
desc.offset = sizeof(struct udphdr);
desc.count = skb->len - desc.offset;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+ if (skb_csum_unnecessary(skb))
goto no_checksum;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b4db53ff143..b7503c103ae 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -757,7 +757,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
if (progp->pg_vers[i]->vs_hidden)
continue;
- error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
+ error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
if (error < 0)
break;
if (port && !dummy) {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2772fee9388..22f61aee482 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -798,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
dprintk("svc: recvfrom returned error %d\n", -err);
}
rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
- if (skb->tstamp.off_sec == 0) {
- struct timeval tv;
-
- tv.tv_sec = xtime.tv_sec;
- tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
- skb_set_timestamp(skb, &tv);
+ if (skb->tstamp.tv64 == 0) {
+ skb->tstamp = ktime_get_real();
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
- skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
+ svsk->sk_sk->sk_stamp = skb->tstamp;
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 456a1451030..5b05b73e4c1 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -823,7 +823,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_buffer = NULL;
- req->rq_bufsize = 0;
req->rq_xid = xprt_alloc_xid(xprt);
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
@@ -855,7 +854,7 @@ void xprt_release(struct rpc_task *task)
mod_timer(&xprt->timer,
xprt->last_used + xprt->idle_timeout);
spin_unlock_bh(&xprt->transport_lock);
- xprt->ops->buf_free(task);
+ xprt->ops->buf_free(req->rq_buffer);
task->tk_rqstp = NULL;
if (req->rq_release_snd_buf)
req->rq_release_snd_buf(req);
@@ -928,6 +927,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
xprt->timer.data = (unsigned long) xprt;
xprt->last_used = jiffies;
xprt->cwnd = RPC_INITCWND;
+ xprt->bind_index = 0;
rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a5a32029e72..cc33c5880ab 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1476,7 +1476,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
- .rpcbind = rpc_getport,
+ .rpcbind = rpcb_getport,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
@@ -1493,7 +1493,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
static struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
- .rpcbind = rpc_getport,
+ .rpcbind = rpcb_getport,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 14789a82de5..c71337a22d3 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -89,7 +89,7 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
void *tlv_data, int tlv_data_size)
{
- struct tlv_desc *tlv = (struct tlv_desc *)buf->tail;
+ struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
int new_tlv_space = TLV_SPACE(tlv_data_size);
if (skb_tailroom(buf) < new_tlv_space) {
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 9be4839e32c..0ee6ded18f3 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -73,7 +73,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
clone = skb_clone(buf, GFP_ATOMIC);
if (clone) {
- clone->nh.raw = clone->data;
+ skb_reset_network_header(clone);
dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
clone->dev = dev;
dev->hard_header(clone, dev, ETH_P_TIPC,
@@ -99,8 +99,8 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
if (likely(eb_ptr->bearer)) {
if (likely(!dev->promiscuity) ||
- !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
- !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
+ !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+ !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
size = msg_size((struct tipc_msg *)buf->data);
skb_trim(buf, size);
if (likely(buf->len == size)) {
@@ -120,16 +120,18 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
static int enable_bearer(struct tipc_bearer *tb_ptr)
{
- struct net_device *dev = dev_base;
+ struct net_device *dev, *pdev;
struct eth_bearer *eb_ptr = &eth_bearers[0];
struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
/* Find device with specified name */
-
- while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) {
- dev = dev->next;
- }
+ dev = NULL;
+ for_each_netdev(pdev)
+ if (!strncmp(dev->name, driver_name, IFNAMSIZ)) {
+ dev = pdev;
+ break;
+ }
if (!dev)
return -ENODEV;
@@ -140,7 +142,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
return -EDQUOT;
if (!eb_ptr->dev) {
eb_ptr->dev = dev;
- eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC);
+ eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
eb_ptr->tipc_packet_type.dev = dev;
eb_ptr->tipc_packet_type.func = recv_msg;
eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 71c2f2fd405..2124f32ef29 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1001,7 +1001,7 @@ static int link_bundle_buf(struct link *l_ptr,
return 0;
skb_put(bundler, pad + size);
- memcpy(bundler->data + to_pos, buf->data, size);
+ skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
msg_set_size(bundler_msg, to_pos + size);
msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
@@ -1109,8 +1109,8 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
if (bundler) {
msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
TIPC_OK, INT_H_SIZE, l_ptr->addr);
- memcpy(bundler->data, (unchar *)&bundler_hdr,
- INT_H_SIZE);
+ skb_copy_to_linear_data(bundler, &bundler_hdr,
+ INT_H_SIZE);
skb_trim(bundler, INT_H_SIZE);
link_bundle_buf(l_ptr, bundler, buf);
buf = bundler;
@@ -1383,9 +1383,9 @@ again:
if (!buf)
return -ENOMEM;
buf->next = NULL;
- memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
hsz = msg_hdr_sz(hdr);
- memcpy(buf->data + INT_H_SIZE, (unchar *)hdr, hsz);
+ skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
msg_dbg(buf_msg(buf), ">BUILD>");
/* Chop up message: */
@@ -1416,8 +1416,8 @@ error:
return -EFAULT;
}
} else
- memcpy(buf->data + fragm_crs, sect_crs, sz);
-
+ skb_copy_to_linear_data_offset(buf, fragm_crs,
+ sect_crs, sz);
sect_crs += sz;
sect_rest -= sz;
fragm_crs += sz;
@@ -1442,7 +1442,7 @@ error:
buf->next = NULL;
prev->next = buf;
- memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
fragm_crs = INT_H_SIZE;
fragm_rest = fragm_sz;
msg_dbg(buf_msg(buf)," >BUILD>");
@@ -2130,7 +2130,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
buf = l_ptr->proto_msg_queue;
if (!buf)
return;
- memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+ skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
return;
}
msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
@@ -2143,7 +2143,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
if (!buf)
return;
- memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+ skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
msg_set_size(buf_msg(buf), msg_size);
if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -2319,8 +2319,8 @@ void tipc_link_tunnel(struct link *l_ptr,
"unable to send tunnel msg\n");
return;
}
- memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
- memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
+ skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
msg_dbg(buf_msg(buf), ">SEND>");
tipc_link_send_buf(tunnel, buf);
@@ -2361,7 +2361,7 @@ void tipc_link_changeover(struct link *l_ptr)
buf = buf_acquire(INT_H_SIZE);
if (buf) {
- memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
msg_set_size(&tunnel_hdr, INT_H_SIZE);
dbg("%c->%c:", l_ptr->b_ptr->net_plane,
tunnel->b_ptr->net_plane);
@@ -2426,8 +2426,9 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
"unable to send duplicate msg\n");
return;
}
- memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
- memcpy(outbuf->data + INT_H_SIZE, iter->data, length);
+ skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
+ length);
dbg("%c->%c:", l_ptr->b_ptr->net_plane,
tunnel->b_ptr->net_plane);
msg_dbg(buf_msg(outbuf), ">SEND>");
@@ -2457,7 +2458,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
eb = buf_acquire(size);
if (eb)
- memcpy(eb->data, (unchar *)msg, size);
+ skb_copy_to_linear_data(eb, msg, size);
return eb;
}
@@ -2569,7 +2570,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
if (obuf == NULL) {
warn("Link unable to unbundle message(s)\n");
break;
- };
+ }
pos += align(msg_size(buf_msg(obuf)));
msg_dbg(buf_msg(obuf), " /");
tipc_net_route_msg(obuf);
@@ -2631,9 +2632,9 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
goto exit;
}
msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
- memcpy(fragm->data, (unchar *)&fragm_hdr, INT_H_SIZE);
- memcpy(fragm->data + INT_H_SIZE, crs, fragm_sz);
-
+ skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
+ fragm_sz);
/* Send queued messages first, if any: */
l_ptr->stats.sent_fragments++;
@@ -2733,8 +2734,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
if (pbuf != NULL) {
pbuf->next = *pending;
*pending = pbuf;
- memcpy(pbuf->data, (unchar *)imsg, msg_data_sz(fragm));
-
+ skb_copy_to_linear_data(pbuf, imsg,
+ msg_data_sz(fragm));
/* Prepare buffer for subsequent fragments. */
set_long_msg_seqno(pbuf, long_msg_seq_no);
@@ -2750,7 +2751,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
u32 fsz = get_fragm_size(pbuf);
u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
u32 exp_frags = get_expected_frags(pbuf) - 1;
- memcpy(pbuf->data + crs, msg_data(fragm), dsz);
+ skb_copy_to_linear_data_offset(pbuf, crs,
+ msg_data(fragm), dsz);
buf_discard(fbuf);
/* Is message complete? */
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 62d54906360..35d5ba1d4f4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,8 +1,8 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -71,8 +71,11 @@ static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
static inline void msg_set_bits(struct tipc_msg *m, u32 w,
u32 pos, u32 mask, u32 val)
{
- u32 word = msg_word(m,w) & ~(mask << pos);
- msg_set_word(m, w, (word |= (val << pos)));
+ val = (val & mask) << pos;
+ val = htonl(val);
+ mask = htonl(mask << pos);
+ m->hdr[w] &= ~mask;
+ m->hdr[w] |= val;
}
/*
@@ -786,15 +789,16 @@ static inline int msg_build(struct tipc_msg *hdr,
*buf = buf_acquire(sz);
if (!(*buf))
return -ENOMEM;
- memcpy((*buf)->data, (unchar *)hdr, hsz);
+ skb_copy_to_linear_data(*buf, hdr, hsz);
for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
if (likely(usrmem))
res = !copy_from_user((*buf)->data + pos,
msg_sect[cnt].iov_base,
msg_sect[cnt].iov_len);
else
- memcpy((*buf)->data + pos, msg_sect[cnt].iov_base,
- msg_sect[cnt].iov_len);
+ skb_copy_to_linear_data_offset(*buf, pos,
+ msg_sect[cnt].iov_base,
+ msg_sect[cnt].iov_len);
pos += msg_sect[cnt].iov_len;
}
if (likely(res))
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b8e1edc2bad..4cdafa2d1d4 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -57,7 +57,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
if (rep_buf) {
skb_push(rep_buf, hdr_space);
- rep_nlh = (struct nlmsghdr *)rep_buf->data;
+ rep_nlh = nlmsg_hdr(rep_buf);
memcpy(rep_nlh, req_nlh, hdr_space);
rep_nlh->nlmsg_len = rep_buf->len;
genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5f8217d4b45..bcd5da00737 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -464,7 +464,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
msg_set_size(rmsg, data_sz + hdr_sz);
msg_set_nametype(rmsg, msg_nametype(msg));
msg_set_nameinst(rmsg, msg_nameinst(msg));
- memcpy(rbuf->data + hdr_sz, msg_data(msg), data_sz);
+ skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz);
/* send self-abort message when rejecting on a connected port */
if (msg_connected(msg)) {
@@ -1419,7 +1419,7 @@ int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
return -ENOMEM;
skb_push(buf, hsz);
- memcpy(buf->data, (unchar *)msg, hsz);
+ skb_copy_to_linear_data(buf, msg, hsz);
destnode = msg_destnode(msg);
p_ptr->publ.congested = 1;
if (!tipc_port_congested(p_ptr)) {
@@ -1555,7 +1555,7 @@ int tipc_forward_buf2name(u32 ref,
if (skb_cow(buf, LONG_H_SIZE))
return -ENOMEM;
skb_push(buf, LONG_H_SIZE);
- memcpy(buf->data, (unchar *)msg, LONG_H_SIZE);
+ skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
msg_dbg(buf_msg(buf),"PREP:");
if (likely(destport || destnode)) {
p_ptr->sent++;
@@ -1679,7 +1679,7 @@ int tipc_forward_buf2port(u32 ref,
return -ENOMEM;
skb_push(buf, DIR_MSG_H_SIZE);
- memcpy(buf->data, (unchar *)msg, DIR_MSG_H_SIZE);
+ skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
msg_dbg(msg, "buf2port: ");
p_ptr->sent++;
if (dest->node == tipc_own_addr)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b71739fbe2c..45832fb75ea 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1020,7 +1020,7 @@ restart:
if (!err) {
buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
- sz = buf->tail - buf_crs;
+ sz = skb_tail_pointer(buf) - buf_crs;
needed = (buf_len - sz_copied);
sz_to_copy = (sz <= needed) ? sz : needed;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 606971645b3..aec8cf165e1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1319,7 +1319,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
unix_attach_fds(siocb->scm, skb);
unix_get_secdata(siocb->scm, skb);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
if (err)
goto out_free;
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 5d2d93dc083..7a19e0ede28 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -277,8 +277,8 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
skb_push(skb, 7);
skb->data[0] = 0;
skb->data[1] = NLPID_SNAP;
- memcpy(&skb->data[2], wanrouter_oui_ether,
- sizeof(wanrouter_oui_ether));
+ skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
+ sizeof(wanrouter_oui_ether));
*((unsigned short*)&skb->data[5]) = htons(type);
break;
@@ -339,7 +339,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->protocol = ethertype;
skb->pkt_type = PACKET_HOST; /* Physically point to point */
skb_pull(skb, cnt);
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
return ethertype;
}
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
new file mode 100644
index 00000000000..a228d56a91b
--- /dev/null
+++ b/net/wireless/Kconfig
@@ -0,0 +1,16 @@
+config CFG80211
+ tristate "Improved wireless configuration API"
+
+config WIRELESS_EXT
+ bool "Wireless extensions"
+ default n
+ ---help---
+ This option enables the legacy wireless extensions
+ (wireless network interface configuration via ioctls.)
+
+ Wireless extensions will be replaced by cfg80211 and
+ will be required only by legacy drivers that implement
+ wireless extension handlers.
+
+ Say N (if you can) unless you know you need wireless
+ extensions for external modules.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
new file mode 100644
index 00000000000..3a96ae60271
--- /dev/null
+++ b/net/wireless/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_WIRELESS_EXT) += wext.o
+obj-$(CONFIG_CFG80211) += cfg80211.o
+
+cfg80211-y += core.o sysfs.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
new file mode 100644
index 00000000000..7eabd55417a
--- /dev/null
+++ b/net/wireless/core.c
@@ -0,0 +1,224 @@
+/*
+ * This is the linux wireless configuration interface.
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/if.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/nl80211.h>
+#include <linux/debugfs.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <net/genetlink.h>
+#include <net/cfg80211.h>
+#include <net/wireless.h>
+#include "core.h"
+#include "sysfs.h"
+
+/* name for sysfs, %d is appended */
+#define PHY_NAME "phy"
+
+MODULE_AUTHOR("Johannes Berg");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("wireless configuration support");
+
+/* RCU might be appropriate here since we usually
+ * only read the list, and that can happen quite
+ * often because we need to do it for each command */
+LIST_HEAD(cfg80211_drv_list);
+DEFINE_MUTEX(cfg80211_drv_mutex);
+static int wiphy_counter;
+
+/* for debugfs */
+static struct dentry *ieee80211_debugfs_dir;
+
+/* exported functions */
+
+struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
+{
+ struct cfg80211_registered_device *drv;
+ int alloc_size;
+
+ alloc_size = sizeof(*drv) + sizeof_priv;
+
+ drv = kzalloc(alloc_size, GFP_KERNEL);
+ if (!drv)
+ return NULL;
+
+ drv->ops = ops;
+
+ mutex_lock(&cfg80211_drv_mutex);
+
+ drv->idx = wiphy_counter;
+
+ /* now increase counter for the next device unless
+ * it has wrapped previously */
+ if (wiphy_counter >= 0)
+ wiphy_counter++;
+
+ mutex_unlock(&cfg80211_drv_mutex);
+
+ if (unlikely(drv->idx < 0)) {
+ /* ugh, wrapped! */
+ kfree(drv);
+ return NULL;
+ }
+
+ /* give it a proper name */
+ snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
+ PHY_NAME "%d", drv->idx);
+
+ mutex_init(&drv->mtx);
+ mutex_init(&drv->devlist_mtx);
+ INIT_LIST_HEAD(&drv->netdev_list);
+
+ device_initialize(&drv->wiphy.dev);
+ drv->wiphy.dev.class = &ieee80211_class;
+ drv->wiphy.dev.platform_data = drv;
+
+ return &drv->wiphy;
+}
+EXPORT_SYMBOL(wiphy_new);
+
+int wiphy_register(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+ int res;
+
+ mutex_lock(&cfg80211_drv_mutex);
+
+ res = device_add(&drv->wiphy.dev);
+ if (res)
+ goto out_unlock;
+
+ list_add(&drv->list, &cfg80211_drv_list);
+
+ /* add to debugfs */
+ drv->wiphy.debugfsdir =
+ debugfs_create_dir(wiphy_name(&drv->wiphy),
+ ieee80211_debugfs_dir);
+
+ res = 0;
+out_unlock:
+ mutex_unlock(&cfg80211_drv_mutex);
+ return res;
+}
+EXPORT_SYMBOL(wiphy_register);
+
+void wiphy_unregister(struct wiphy *wiphy)
+{
+ struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+ /* protect the device list */
+ mutex_lock(&cfg80211_drv_mutex);
+
+ BUG_ON(!list_empty(&drv->netdev_list));
+
+ /*
+ * Try to grab drv->mtx. If a command is still in progress,
+ * hopefully the driver will refuse it since it's tearing
+ * down the device already. We wait for this command to complete
+ * before unlinking the item from the list.
+ * Note: as codified by the BUG_ON above we cannot get here if
+ * a virtual interface is still associated. Hence, we can only
+ * get to lock contention here if userspace issues a command
+ * that identified the hardware by wiphy index.
+ */
+ mutex_lock(&drv->mtx);
+ /* unlock again before freeing */
+ mutex_unlock(&drv->mtx);
+
+ list_del(&drv->list);
+ device_del(&drv->wiphy.dev);
+ debugfs_remove(drv->wiphy.debugfsdir);
+
+ mutex_unlock(&cfg80211_drv_mutex);
+}
+EXPORT_SYMBOL(wiphy_unregister);
+
+void cfg80211_dev_free(struct cfg80211_registered_device *drv)
+{
+ mutex_destroy(&drv->mtx);
+ mutex_destroy(&drv->devlist_mtx);
+ kfree(drv);
+}
+
+void wiphy_free(struct wiphy *wiphy)
+{
+ put_device(&wiphy->dev);
+}
+EXPORT_SYMBOL(wiphy_free);
+
+static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
+ unsigned long state,
+ void *ndev)
+{
+ struct net_device *dev = ndev;
+ struct cfg80211_registered_device *rdev;
+
+ if (!dev->ieee80211_ptr)
+ return 0;
+
+ rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
+
+ switch (state) {
+ case NETDEV_REGISTER:
+ mutex_lock(&rdev->devlist_mtx);
+ list_add(&dev->ieee80211_ptr->list, &rdev->netdev_list);
+ if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
+ "phy80211")) {
+ printk(KERN_ERR "wireless: failed to add phy80211 "
+ "symlink to netdev!\n");
+ }
+ dev->ieee80211_ptr->netdev = dev;
+ mutex_unlock(&rdev->devlist_mtx);
+ break;
+ case NETDEV_UNREGISTER:
+ mutex_lock(&rdev->devlist_mtx);
+ if (!list_empty(&dev->ieee80211_ptr->list)) {
+ sysfs_remove_link(&dev->dev.kobj, "phy80211");
+ list_del_init(&dev->ieee80211_ptr->list);
+ }
+ mutex_unlock(&rdev->devlist_mtx);
+ break;
+ }
+
+ return 0;
+}
+
+static struct notifier_block cfg80211_netdev_notifier = {
+ .notifier_call = cfg80211_netdev_notifier_call,
+};
+
+static int cfg80211_init(void)
+{
+ int err = wiphy_sysfs_init();
+ if (err)
+ goto out_fail_sysfs;
+
+ err = register_netdevice_notifier(&cfg80211_netdev_notifier);
+ if (err)
+ goto out_fail_notifier;
+
+ ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);
+
+ return 0;
+
+out_fail_notifier:
+ wiphy_sysfs_exit();
+out_fail_sysfs:
+ return err;
+}
+module_init(cfg80211_init);
+
+static void cfg80211_exit(void)
+{
+ debugfs_remove(ieee80211_debugfs_dir);
+ unregister_netdevice_notifier(&cfg80211_netdev_notifier);
+ wiphy_sysfs_exit();
+}
+module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
new file mode 100644
index 00000000000..158db1edb92
--- /dev/null
+++ b/net/wireless/core.h
@@ -0,0 +1,49 @@
+/*
+ * Wireless configuration interface internals.
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef __NET_WIRELESS_CORE_H
+#define __NET_WIRELESS_CORE_H
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/wireless.h>
+#include <net/cfg80211.h>
+
+struct cfg80211_registered_device {
+ struct cfg80211_ops *ops;
+ struct list_head list;
+ /* we hold this mutex during any call so that
+ * we cannot do multiple calls at once, and also
+ * to avoid the deregister call to proceed while
+ * any call is in progress */
+ struct mutex mtx;
+
+ /* wiphy index, internal only */
+ int idx;
+
+ /* associate netdev list */
+ struct mutex devlist_mtx;
+ struct list_head netdev_list;
+
+ /* must be last because of the way we do wiphy_priv(),
+ * and it should at least be aligned to NETDEV_ALIGN */
+ struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+static inline
+struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
+{
+ BUG_ON(!wiphy);
+ return container_of(wiphy, struct cfg80211_registered_device, wiphy);
+}
+
+extern struct mutex cfg80211_drv_mutex;
+extern struct list_head cfg80211_drv_list;
+
+/* free object */
+extern void cfg80211_dev_free(struct cfg80211_registered_device *drv);
+
+#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
new file mode 100644
index 00000000000..3ebae144296
--- /dev/null
+++ b/net/wireless/sysfs.c
@@ -0,0 +1,80 @@
+/*
+ * This file provides /sys/class/ieee80211/<wiphy name>/
+ * and some default attributes.
+ *
+ * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/nl80211.h>
+#include <linux/rtnetlink.h>
+#include <net/cfg80211.h>
+#include "sysfs.h"
+#include "core.h"
+
+static inline struct cfg80211_registered_device *dev_to_rdev(
+ struct device *dev)
+{
+ return container_of(dev, struct cfg80211_registered_device, wiphy.dev);
+}
+
+static ssize_t _show_index(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx);
+}
+
+static ssize_t _show_permaddr(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ char *addr = dev_to_rdev(dev)->wiphy.perm_addr;
+
+ return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+}
+
+static struct device_attribute ieee80211_dev_attrs[] = {
+ __ATTR(index, S_IRUGO, _show_index, NULL),
+ __ATTR(macaddress, S_IRUGO, _show_permaddr, NULL),
+ {}
+};
+
+static void wiphy_dev_release(struct device *dev)
+{
+ struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
+
+ cfg80211_dev_free(rdev);
+}
+
+static int wiphy_uevent(struct device *dev, char **envp,
+ int num_envp, char *buf, int size)
+{
+ /* TODO, we probably need stuff here */
+ return 0;
+}
+
+struct class ieee80211_class = {
+ .name = "ieee80211",
+ .owner = THIS_MODULE,
+ .dev_release = wiphy_dev_release,
+ .dev_attrs = ieee80211_dev_attrs,
+#ifdef CONFIG_HOTPLUG
+ .dev_uevent = wiphy_uevent,
+#endif
+};
+
+int wiphy_sysfs_init(void)
+{
+ return class_register(&ieee80211_class);
+}
+
+void wiphy_sysfs_exit(void)
+{
+ class_unregister(&ieee80211_class);
+}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
new file mode 100644
index 00000000000..65acbebd371
--- /dev/null
+++ b/net/wireless/sysfs.h
@@ -0,0 +1,9 @@
+#ifndef __WIRELESS_SYSFS_H
+#define __WIRELESS_SYSFS_H
+
+extern int wiphy_sysfs_init(void);
+extern void wiphy_sysfs_exit(void);
+
+extern struct class ieee80211_class;
+
+#endif /* __WIRELESS_SYSFS_H */
diff --git a/net/core/wireless.c b/net/wireless/wext.c
index b07fe270a50..d6aaf65192e 100644
--- a/net/core/wireless.c
+++ b/net/wireless/wext.c
@@ -97,22 +97,10 @@
#include <linux/wireless.h> /* Pretty obvious */
#include <net/iw_handler.h> /* New driver API */
#include <net/netlink.h>
+#include <net/wext.h>
#include <asm/uaccess.h> /* copy_to_user() */
-/**************************** CONSTANTS ****************************/
-
-/* Debugging stuff */
-#undef WE_IOCTL_DEBUG /* Debug IOCTL API */
-#undef WE_RTNETLINK_DEBUG /* Debug RtNetlink API */
-#undef WE_EVENT_DEBUG /* Debug Event dispatcher */
-#undef WE_SPY_DEBUG /* Debug enhanced spy support */
-
-/* Options */
-//CONFIG_NET_WIRELESS_RTNETLINK /* Wireless requests over RtNetlink */
-#define WE_EVENT_RTNETLINK /* Propagate events using RtNetlink */
-#define WE_SET_EVENT /* Generate an event on some set commands */
-
/************************* GLOBAL VARIABLES *************************/
/*
* You should not use global variables, because of re-entrancy.
@@ -349,8 +337,7 @@ static const struct iw_ioctl_description standard_ioctl[] = {
.max_tokens = sizeof(struct iw_pmksa),
},
};
-static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) /
- sizeof(struct iw_ioctl_description));
+static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
/*
* Meta-data about all the additional standard Wireless Extension events
@@ -400,8 +387,7 @@ static const struct iw_ioctl_description standard_event[] = {
.max_tokens = sizeof(struct iw_pmkid_cand),
},
};
-static const unsigned standard_event_num = (sizeof(standard_event) /
- sizeof(struct iw_ioctl_description));
+static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
/* Size (in bytes) of the various private data types */
static const char iw_priv_type_size[] = {
@@ -454,26 +440,24 @@ static const int event_type_pk_size[] = {
/* ---------------------------------------------------------------- */
/*
* Return the driver handler associated with a specific Wireless Extension.
- * Called from various place, so make sure it remains efficient.
*/
-static inline iw_handler get_handler(struct net_device *dev,
- unsigned int cmd)
+static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
{
/* Don't "optimise" the following variable, it will crash */
unsigned int index; /* *MUST* be unsigned */
/* Check if we have some wireless handlers defined */
- if(dev->wireless_handlers == NULL)
+ if (dev->wireless_handlers == NULL)
return NULL;
/* Try as a standard command */
index = cmd - SIOCIWFIRST;
- if(index < dev->wireless_handlers->num_standard)
+ if (index < dev->wireless_handlers->num_standard)
return dev->wireless_handlers->standard[index];
/* Try as a private command */
index = cmd - SIOCIWFIRSTPRIV;
- if(index < dev->wireless_handlers->num_private)
+ if (index < dev->wireless_handlers->num_private)
return dev->wireless_handlers->private[index];
/* Not found */
@@ -484,15 +468,15 @@ static inline iw_handler get_handler(struct net_device *dev,
/*
* Get statistics out of the driver
*/
-static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+static struct iw_statistics *get_wireless_stats(struct net_device *dev)
{
/* New location */
- if((dev->wireless_handlers != NULL) &&
+ if ((dev->wireless_handlers != NULL) &&
(dev->wireless_handlers->get_wireless_stats != NULL))
return dev->wireless_handlers->get_wireless_stats(dev);
/* Not found */
- return (struct iw_statistics *) NULL;
+ return NULL;
}
/* ---------------------------------------------------------------- */
@@ -514,14 +498,14 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
* netif_running(dev) test. I'm open on that one...
* Hopefully, the driver will remember to do a commit in "open()" ;-)
*/
-static inline int call_commit_handler(struct net_device * dev)
+static int call_commit_handler(struct net_device *dev)
{
- if((netif_running(dev)) &&
- (dev->wireless_handlers->standard[0] != NULL)) {
+ if ((netif_running(dev)) &&
+ (dev->wireless_handlers->standard[0] != NULL))
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
- } else
+ else
return 0; /* Command completed successfully */
}
@@ -570,14 +554,13 @@ static int iw_handler_get_iwstats(struct net_device * dev,
struct iw_statistics *stats;
stats = get_wireless_stats(dev);
- if (stats != (struct iw_statistics *) NULL) {
-
+ if (stats) {
/* Copy statistics to extra */
memcpy(extra, stats, sizeof(struct iw_statistics));
wrqu->data.length = sizeof(struct iw_statistics);
/* Check if we need to clear the updated flag */
- if(wrqu->data.flags != 0)
+ if (wrqu->data.flags != 0)
stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
return 0;
} else
@@ -596,12 +579,12 @@ static int iw_handler_get_private(struct net_device * dev,
char * extra)
{
/* Check if the driver has something to export */
- if((dev->wireless_handlers->num_private_args == 0) ||
+ if ((dev->wireless_handlers->num_private_args == 0) ||
(dev->wireless_handlers->private_args == NULL))
return -EOPNOTSUPP;
/* Check if there is enough buffer up there */
- if(wrqu->data.length < dev->wireless_handlers->num_private_args) {
+ if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
/* User space can't know in advance how large the buffer
* needs to be. Give it a hint, so that we can support
* any size buffer we want somewhat efficiently... */
@@ -636,8 +619,8 @@ static int iw_handler_get_private(struct net_device * dev,
/*
* Print one entry (line) of /proc/net/wireless
*/
-static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
- struct net_device *dev)
+static void wireless_seq_printf_stats(struct seq_file *seq,
+ struct net_device *dev)
{
/* Get stats from the driver */
struct iw_statistics *stats = get_wireless_stats(dev);
@@ -680,7 +663,7 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations wireless_seq_ops = {
+static const struct seq_operations wireless_seq_ops = {
.start = dev_seq_start,
.next = dev_seq_next,
.stop = dev_seq_stop,
@@ -700,7 +683,7 @@ static const struct file_operations wireless_seq_fops = {
.release = seq_release,
};
-int __init wireless_proc_init(void)
+int __init wext_proc_init(void)
{
/* Create /proc/net/wireless entry */
if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
@@ -735,32 +718,24 @@ static int ioctl_standard_call(struct net_device * dev,
int ret = -EINVAL;
/* Get the description of the IOCTL */
- if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+ if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
return -EOPNOTSUPP;
descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
- ifr->ifr_name, cmd);
- printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_IOCTL_DEBUG */
-
/* Prepare the call */
info.cmd = cmd;
info.flags = 0;
/* Check if we have a pointer to user space data or not */
- if(descr->header_type != IW_HEADER_TYPE_POINT) {
+ if (descr->header_type != IW_HEADER_TYPE_POINT) {
/* No extra arguments. Trivial to handle */
ret = handler(dev, &info, &(iwr->u), NULL);
-#ifdef WE_SET_EVENT
/* Generate an event to notify listeners of the change */
- if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+ if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
((ret == 0) || (ret == -EIWCOMMIT)))
wireless_send_event(dev, cmd, &(iwr->u), NULL);
-#endif /* WE_SET_EVENT */
} else {
char * extra;
int extra_size;
@@ -800,19 +775,19 @@ static int ioctl_standard_call(struct net_device * dev,
iwr->u.data.length -= essid_compat;
/* Check what user space is giving us */
- if(IW_IS_SET(cmd)) {
+ if (IW_IS_SET(cmd)) {
/* Check NULL pointer */
- if((iwr->u.data.pointer == NULL) &&
+ if ((iwr->u.data.pointer == NULL) &&
(iwr->u.data.length != 0))
return -EFAULT;
/* Check if number of token fits within bounds */
- if(iwr->u.data.length > descr->max_tokens)
+ if (iwr->u.data.length > descr->max_tokens)
return -E2BIG;
- if(iwr->u.data.length < descr->min_tokens)
+ if (iwr->u.data.length < descr->min_tokens)
return -EINVAL;
} else {
/* Check NULL pointer */
- if(iwr->u.data.pointer == NULL)
+ if (iwr->u.data.pointer == NULL)
return -EFAULT;
/* Save user space buffer size for checking */
user_length = iwr->u.data.length;
@@ -822,7 +797,7 @@ static int ioctl_standard_call(struct net_device * dev,
* implied by the test at the end. */
/* Support for very large requests */
- if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+ if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
(user_length > descr->max_tokens)) {
/* Allow userspace to GET more than max so
* we can support any size GET requests.
@@ -835,20 +810,14 @@ static int ioctl_standard_call(struct net_device * dev,
}
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_IOCTL_DEBUG */
-
/* Create the kernel buffer */
/* kzalloc ensures NULL-termination for essid_compat */
extra = kzalloc(extra_size, GFP_KERNEL);
- if (extra == NULL) {
+ if (extra == NULL)
return -ENOMEM;
- }
/* If it is a SET, get all the extra data in here */
- if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+ if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
err = copy_from_user(extra, iwr->u.data.pointer,
iwr->u.data.length *
descr->token_size);
@@ -856,11 +825,6 @@ static int ioctl_standard_call(struct net_device * dev,
kfree(extra);
return -EFAULT;
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
- dev->name,
- iwr->u.data.length * descr->token_size);
-#endif /* WE_IOCTL_DEBUG */
}
/* Call the handler */
@@ -871,7 +835,7 @@ static int ioctl_standard_call(struct net_device * dev,
/* If we have something to return to the user */
if (!ret && IW_IS_GET(cmd)) {
/* Check if there is enough buffer up there */
- if(user_length < iwr->u.data.length) {
+ if (user_length < iwr->u.data.length) {
kfree(extra);
return -E2BIG;
}
@@ -881,18 +845,12 @@ static int ioctl_standard_call(struct net_device * dev,
descr->token_size);
if (err)
ret = -EFAULT;
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
- dev->name,
- iwr->u.data.length * descr->token_size);
-#endif /* WE_IOCTL_DEBUG */
}
-#ifdef WE_SET_EVENT
/* Generate an event to notify listeners of the change */
- if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+ if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
((ret == 0) || (ret == -EIWCOMMIT))) {
- if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+ if (descr->flags & IW_DESCR_FLAG_RESTRICT)
/* If the event is restricted, don't
* export the payload */
wireless_send_event(dev, cmd, &(iwr->u), NULL);
@@ -900,14 +858,13 @@ static int ioctl_standard_call(struct net_device * dev,
wireless_send_event(dev, cmd, &(iwr->u),
extra);
}
-#endif /* WE_SET_EVENT */
/* Cleanup - I told you it wasn't that long ;-) */
kfree(extra);
}
/* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
+ if (ret == -EIWCOMMIT)
ret = call_commit_handler(dev);
/* Here, we will generate the appropriate event if needed */
@@ -931,10 +888,8 @@ static int ioctl_standard_call(struct net_device * dev,
* a iw_handler but process it in your ioctl handler (i.e. use the
* old driver API).
*/
-static inline int ioctl_private_call(struct net_device * dev,
- struct ifreq * ifr,
- unsigned int cmd,
- iw_handler handler)
+static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
+ unsigned int cmd, iw_handler handler)
{
struct iwreq * iwr = (struct iwreq *) ifr;
const struct iw_priv_args * descr = NULL;
@@ -944,28 +899,18 @@ static inline int ioctl_private_call(struct net_device * dev,
int ret = -EINVAL;
/* Get the description of the IOCTL */
- for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
- if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+ for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+ if (cmd == dev->wireless_handlers->private_args[i].cmd) {
descr = &(dev->wireless_handlers->private_args[i]);
break;
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
- ifr->ifr_name, cmd);
- if(descr) {
- printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
- dev->name, descr->name,
- descr->set_args, descr->get_args);
- }
-#endif /* WE_IOCTL_DEBUG */
-
/* Compute the size of the set/get arguments */
- if(descr != NULL) {
- if(IW_IS_SET(cmd)) {
+ if (descr != NULL) {
+ if (IW_IS_SET(cmd)) {
int offset = 0; /* For sub-ioctls */
/* Check for sub-ioctl handler */
- if(descr->name[0] == '\0')
+ if (descr->name[0] == '\0')
/* Reserve one int for sub-ioctl index */
offset = sizeof(__u32);
@@ -973,7 +918,7 @@ static inline int ioctl_private_call(struct net_device * dev,
extra_size = get_priv_size(descr->set_args);
/* Does it fits in iwr ? */
- if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+ if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
((extra_size + offset) <= IFNAMSIZ))
extra_size = 0;
} else {
@@ -981,7 +926,7 @@ static inline int ioctl_private_call(struct net_device * dev,
extra_size = get_priv_size(descr->get_args);
/* Does it fits in iwr ? */
- if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+ if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
(extra_size <= IFNAMSIZ))
extra_size = 0;
}
@@ -992,7 +937,7 @@ static inline int ioctl_private_call(struct net_device * dev,
info.flags = 0;
/* Check if we have a pointer to user space data or not. */
- if(extra_size == 0) {
+ if (extra_size == 0) {
/* No extra arguments. Trivial to handle */
ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
} else {
@@ -1000,46 +945,33 @@ static inline int ioctl_private_call(struct net_device * dev,
int err;
/* Check what user space is giving us */
- if(IW_IS_SET(cmd)) {
+ if (IW_IS_SET(cmd)) {
/* Check NULL pointer */
- if((iwr->u.data.pointer == NULL) &&
+ if ((iwr->u.data.pointer == NULL) &&
(iwr->u.data.length != 0))
return -EFAULT;
/* Does it fits within bounds ? */
- if(iwr->u.data.length > (descr->set_args &
+ if (iwr->u.data.length > (descr->set_args &
IW_PRIV_SIZE_MASK))
return -E2BIG;
- } else {
- /* Check NULL pointer */
- if(iwr->u.data.pointer == NULL)
- return -EFAULT;
- }
-
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_IOCTL_DEBUG */
+ } else if (iwr->u.data.pointer == NULL)
+ return -EFAULT;
/* Always allocate for max space. Easier, and won't last
* long... */
extra = kmalloc(extra_size, GFP_KERNEL);
- if (extra == NULL) {
+ if (extra == NULL)
return -ENOMEM;
- }
/* If it is a SET, get all the extra data in here */
- if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+ if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
err = copy_from_user(extra, iwr->u.data.pointer,
extra_size);
if (err) {
kfree(extra);
return -EFAULT;
}
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
- dev->name, iwr->u.data.length);
-#endif /* WE_IOCTL_DEBUG */
}
/* Call the handler */
@@ -1059,10 +991,6 @@ static inline int ioctl_private_call(struct net_device * dev,
extra_size);
if (err)
ret = -EFAULT;
-#ifdef WE_IOCTL_DEBUG
- printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
- dev->name, iwr->u.data.length);
-#endif /* WE_IOCTL_DEBUG */
}
/* Cleanup - I told you it wasn't that long ;-) */
@@ -1071,7 +999,7 @@ static inline int ioctl_private_call(struct net_device * dev,
/* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
+ if (ret == -EIWCOMMIT)
ret = call_commit_handler(dev);
return ret;
@@ -1079,11 +1007,10 @@ static inline int ioctl_private_call(struct net_device * dev,
/* ---------------------------------------------------------------- */
/*
- * Main IOCTl dispatcher. Called from the main networking code
- * (dev_ioctl() in net/core/dev.c).
+ * Main IOCTl dispatcher.
* Check the type of IOCTL and call the appropriate wrapper...
*/
-int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
{
struct net_device *dev;
iw_handler handler;
@@ -1098,789 +1025,54 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
/* A bunch of special cases, then the generic case...
* Note that 'cmd' is already filtered in dev_ioctl() with
* (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
- switch(cmd)
- {
- case SIOCGIWSTATS:
- /* Get Wireless Stats */
- return ioctl_standard_call(dev,
- ifr,
- cmd,
- &iw_handler_get_iwstats);
-
- case SIOCGIWPRIV:
- /* Check if we have some wireless handlers defined */
- if(dev->wireless_handlers != NULL) {
- /* We export to user space the definition of
- * the private handler ourselves */
- return ioctl_standard_call(dev,
- ifr,
- cmd,
- &iw_handler_get_private);
- }
- // ## Fall-through for old API ##
- default:
- /* Generic IOCTL */
- /* Basic check */
- if (!netif_device_present(dev))
- return -ENODEV;
- /* New driver API : try to find the handler */
- handler = get_handler(dev, cmd);
- if(handler != NULL) {
- /* Standard and private are not the same */
- if(cmd < SIOCIWFIRSTPRIV)
- return ioctl_standard_call(dev,
- ifr,
- cmd,
- handler);
- else
- return ioctl_private_call(dev,
- ifr,
- cmd,
- handler);
- }
- /* Old driver API : call driver ioctl handler */
- if (dev->do_ioctl) {
- return dev->do_ioctl(dev, ifr, cmd);
- }
- return -EOPNOTSUPP;
- }
- /* Not reached */
- return -EINVAL;
-}
-
-/********************** RTNETLINK REQUEST API **********************/
-/*
- * The alternate user space API to configure all those Wireless Extensions
- * is through RtNetlink.
- * This API support only the new driver API (iw_handler).
- *
- * This RtNetlink API use the same query/reply model as the ioctl API.
- * Maximum effort has been done to fit in the RtNetlink model, and
- * we support both RtNetlink Set and RtNelink Get operations.
- * On the other hand, we don't offer Dump operations because of the
- * following reasons :
- * o Large number of parameters, most optional
- * o Large size of some parameters (> 100 bytes)
- * o Each parameters need to be extracted from hardware
- * o Scan requests can take seconds and disable network activity.
- * Because of this high cost/overhead, we want to return only the
- * parameters the user application is really interested in.
- * We could offer partial Dump using the IW_DESCR_FLAG_DUMP flag.
- *
- * The API uses the standard RtNetlink socket. When the RtNetlink code
- * find a IFLA_WIRELESS field in a RtNetlink SET_LINK request,
- * it calls here.
- */
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension GET handler.
- * We do various checks and call the handler with the proper args.
- */
-static int rtnetlink_standard_get(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler,
- char ** p_buf,
- int * p_len)
-{
- const struct iw_ioctl_description * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- int hdr_len;
- struct iw_request_info info;
- char * buffer = NULL;
- int buffer_size = 0;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
- return -EOPNOTSUPP;
- descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found standard handler for 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Check if wrqu is complete */
- hdr_len = event_type_size[descr->header_type];
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have extra data in the reply or not */
- if(descr->header_type != IW_HEADER_TYPE_POINT) {
-
- /* Create the kernel buffer that we will return.
- * It's at an offset to match the TYPE_POINT case... */
- buffer_size = request_len + IW_EV_POINT_OFF;
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
- /* Copy event data */
- memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
- /* Use our own copy of wrqu */
- wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
- + IW_EV_LCP_PK_LEN);
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, NULL);
-
- } else {
- union iwreq_data wrqu_point;
- char * extra = NULL;
- int extra_size = 0;
+ if (cmd == SIOCGIWSTATS)
+ return ioctl_standard_call(dev, ifr, cmd,
+ &iw_handler_get_iwstats);
- /* Get a temp copy of wrqu (skip pointer) */
- memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
- ((char *) request) + IW_EV_LCP_PK_LEN,
- IW_EV_POINT_LEN - IW_EV_LCP_PK_LEN);
-
- /* Calculate space needed by arguments. Always allocate
- * for max space. Easier, and won't last long... */
- extra_size = descr->max_tokens * descr->token_size;
- /* Support for very large requests */
- if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
- (wrqu_point.data.length > descr->max_tokens))
- extra_size = (wrqu_point.data.length
- * descr->token_size);
- buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
- dev->name, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Create the kernel buffer that we will return */
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
-
- /* Put wrqu in the right place (just before extra).
- * Leave space for IWE header and dummy pointer...
- * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
- */
- memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
- ((char *) &wrqu_point) + IW_EV_POINT_OFF,
- IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
- wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
-
- /* Extra comes logically after that. Offset +12 bytes. */
- extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
-
- /* Call the handler */
- ret = handler(dev, &info, wrqu, extra);
-
- /* Calculate real returned length */
- extra_size = (wrqu->data.length * descr->token_size);
- /* Re-adjust reply size */
- request->len = extra_size + IW_EV_POINT_PK_LEN;
-
- /* Put the iwe header where it should, i.e. scrap the
- * dummy pointer. */
- memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Check if there is enough buffer up there */
- if(wrqu_point.data.length < wrqu->data.length)
- ret = -E2BIG;
- }
-
- /* Return the buffer to the caller */
- if (!ret) {
- *p_buf = buffer;
- *p_len = request->len;
- } else {
- /* Cleanup */
- if(buffer)
- kfree(buffer);
- }
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension SET handler.
- * We do various checks and call the handler with the proper args.
- */
-static inline int rtnetlink_standard_set(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler)
-{
- const struct iw_ioctl_description * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- union iwreq_data wrqu_point;
- int hdr_len;
- char * extra = NULL;
- int extra_size = 0;
- struct iw_request_info info;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
- return -EOPNOTSUPP;
- descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found standard SET handler for 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Extract fixed header from request. This is properly aligned. */
- wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
-
- /* Check if wrqu is complete */
- hdr_len = event_type_pk_size[descr->header_type];
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have extra data in the request or not */
- if(descr->header_type != IW_HEADER_TYPE_POINT) {
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, NULL);
-
- } else {
- int extra_len;
-
- /* Put wrqu in the right place (skip pointer) */
- memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
- wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
- /* Don't forget about the event code... */
- wrqu = &wrqu_point;
-
- /* Check if number of token fits within bounds */
- if(wrqu_point.data.length > descr->max_tokens)
- return -E2BIG;
- if(wrqu_point.data.length < descr->min_tokens)
- return -EINVAL;
-
- /* Real length of payload */
- extra_len = wrqu_point.data.length * descr->token_size;
-
- /* Check if request is self consistent */
- if((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Always allocate for max space. Easier, and won't last
- * long... */
- extra_size = descr->max_tokens * descr->token_size;
- extra = kmalloc(extra_size, GFP_KERNEL);
- if (extra == NULL)
- return -ENOMEM;
-
- /* Copy extra in aligned buffer */
- memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
- /* Call the handler */
- ret = handler(dev, &info, &wrqu_point, extra);
- }
-
-#ifdef WE_SET_EVENT
- /* Generate an event to notify listeners of the change */
- if((descr->flags & IW_DESCR_FLAG_EVENT) &&
- ((ret == 0) || (ret == -EIWCOMMIT))) {
- if(descr->flags & IW_DESCR_FLAG_RESTRICT)
- /* If the event is restricted, don't
- * export the payload */
- wireless_send_event(dev, cmd, wrqu, NULL);
- else
- wireless_send_event(dev, cmd, wrqu, extra);
- }
-#endif /* WE_SET_EVENT */
-
- /* Cleanup - I told you it wasn't that long ;-) */
- if(extra)
- kfree(extra);
-
- /* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
- ret = call_commit_handler(dev);
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension GET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_get(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler,
- char ** p_buf,
- int * p_len)
-{
- const struct iw_priv_args * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- int hdr_len;
- struct iw_request_info info;
- int extra_size = 0;
- int i;
- char * buffer = NULL;
- int buffer_size = 0;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
- if(cmd == dev->wireless_handlers->private_args[i].cmd) {
- descr = &(dev->wireless_handlers->private_args[i]);
- break;
- }
- if(descr == NULL)
- return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
- dev->name, descr->name, descr->set_args, descr->get_args);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Compute the max size of the get arguments */
- extra_size = get_priv_size(descr->get_args);
-
- /* Does it fits in wrqu ? */
- if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
- (extra_size <= IFNAMSIZ)) {
- hdr_len = extra_size;
- extra_size = 0;
- } else {
- hdr_len = IW_EV_POINT_PK_LEN;
- }
-
- /* Check if wrqu is complete */
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have a pointer to user space data or not. */
- if(extra_size == 0) {
-
- /* Create the kernel buffer that we will return.
- * It's at an offset to match the TYPE_POINT case... */
- buffer_size = request_len + IW_EV_POINT_OFF;
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
- /* Copy event data */
- memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
- /* Use our own copy of wrqu */
- wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
- + IW_EV_LCP_PK_LEN);
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, (char *) wrqu);
-
- } else {
- char * extra;
-
- /* Buffer for full reply */
- buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
- dev->name, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Create the kernel buffer that we will return */
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (buffer == NULL) {
- return -ENOMEM;
- }
-
- /* Put wrqu in the right place (just before extra).
- * Leave space for IWE header and dummy pointer...
- * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
- */
- memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
- ((char *) request) + IW_EV_LCP_PK_LEN,
- IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
- wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
-
- /* Extra comes logically after that. Offset +12 bytes. */
- extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
-
- /* Call the handler */
- ret = handler(dev, &info, wrqu, extra);
-
- /* Adjust for the actual length if it's variable,
- * avoid leaking kernel bits outside. */
- if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
- extra_size = adjust_priv_size(descr->get_args, wrqu);
- /* Re-adjust reply size */
- request->len = extra_size + IW_EV_POINT_PK_LEN;
-
- /* Put the iwe header where it should, i.e. scrap the
- * dummy pointer. */
- memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif /* WE_RTNETLINK_DEBUG */
- }
-
- /* Return the buffer to the caller */
- if (!ret) {
- *p_buf = buffer;
- *p_len = request->len;
- } else {
- /* Cleanup */
- if(buffer)
- kfree(buffer);
- }
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension SET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_set(struct net_device * dev,
- struct iw_event * request,
- int request_len,
- iw_handler handler)
-{
- const struct iw_priv_args * descr = NULL;
- unsigned int cmd;
- union iwreq_data * wrqu;
- union iwreq_data wrqu_point;
- int hdr_len;
- char * extra = NULL;
- int extra_size = 0;
- int offset = 0; /* For sub-ioctls */
- struct iw_request_info info;
- int i;
- int ret = -EINVAL;
-
- /* Get the description of the Request */
- cmd = request->cmd;
- for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
- if(cmd == dev->wireless_handlers->private_args[i].cmd) {
- descr = &(dev->wireless_handlers->private_args[i]);
- break;
- }
- if(descr == NULL)
- return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
- ifr->ifr_name, cmd);
- printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
- dev->name, descr->name, descr->set_args, descr->get_args);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Compute the size of the set arguments */
- /* Check for sub-ioctl handler */
- if(descr->name[0] == '\0')
- /* Reserve one int for sub-ioctl index */
- offset = sizeof(__u32);
-
- /* Size of set arguments */
- extra_size = get_priv_size(descr->set_args);
-
- /* Does it fits in wrqu ? */
- if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
- (extra_size <= IFNAMSIZ)) {
- hdr_len = IW_EV_LCP_PK_LEN + extra_size;
- extra_size = 0;
- } else {
- hdr_len = IW_EV_POINT_PK_LEN;
- }
-
- /* Extract fixed header from request. This is properly aligned. */
- wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
-
- /* Check if wrqu is complete */
- if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG
- "%s (WE.r) : Wireless request too short (%d)\n",
- dev->name, request_len);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
- /* Prepare the call */
- info.cmd = cmd;
- info.flags = 0;
-
- /* Check if we have a pointer to user space data or not. */
- if(extra_size == 0) {
-
- /* No extra arguments. Trivial to handle */
- ret = handler(dev, &info, wrqu, (char *) wrqu);
-
- } else {
- int extra_len;
-
- /* Put wrqu in the right place (skip pointer) */
- memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
- wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-
- /* Does it fits within bounds ? */
- if(wrqu_point.data.length > (descr->set_args &
- IW_PRIV_SIZE_MASK))
- return -E2BIG;
-
- /* Real length of payload */
- extra_len = adjust_priv_size(descr->set_args, &wrqu_point);
-
- /* Check if request is self consistent */
- if((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
- return -EINVAL;
- }
-
-#ifdef WE_RTNETLINK_DEBUG
- printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
- dev->name, extra_size);
-#endif /* WE_RTNETLINK_DEBUG */
-
- /* Always allocate for max space. Easier, and won't last
- * long... */
- extra = kmalloc(extra_size, GFP_KERNEL);
- if (extra == NULL)
- return -ENOMEM;
-
- /* Copy extra in aligned buffer */
- memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
- /* Call the handler */
- ret = handler(dev, &info, &wrqu_point, extra);
-
- /* Cleanup - I told you it wasn't that long ;-) */
- kfree(extra);
- }
-
- /* Call commit handler if needed and defined */
- if(ret == -EIWCOMMIT)
- ret = call_commit_handler(dev);
-
- return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_getlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_get(struct net_device * dev,
- char * data,
- int len,
- char ** p_buf,
- int * p_len)
-{
- struct iw_event * request = (struct iw_event *) data;
- iw_handler handler;
-
- /* Check length */
- if(len < IW_EV_LCP_PK_LEN) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
- dev->name, len);
- return -EINVAL;
- }
-
- /* ReCheck length (len may have padding) */
- if(request->len > len) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
- dev->name, request->len, len);
- return -EINVAL;
- }
-
- /* Only accept GET requests in here */
- if(!IW_IS_GET(request->cmd))
- return -EOPNOTSUPP;
-
- /* If command is `get the encoding parameters', check if
- * the user has the right to do it */
- if (request->cmd == SIOCGIWENCODE ||
- request->cmd == SIOCGIWENCODEEXT) {
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- }
-
- /* Special cases */
- if(request->cmd == SIOCGIWSTATS)
- /* Get Wireless Stats */
- return rtnetlink_standard_get(dev,
- request,
- request->len,
- &iw_handler_get_iwstats,
- p_buf, p_len);
- if(request->cmd == SIOCGIWPRIV) {
- /* Check if we have some wireless handlers defined */
- if(dev->wireless_handlers == NULL)
- return -EOPNOTSUPP;
- /* Get Wireless Stats */
- return rtnetlink_standard_get(dev,
- request,
- request->len,
- &iw_handler_get_private,
- p_buf, p_len);
- }
+ if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
+ return ioctl_standard_call(dev, ifr, cmd,
+ &iw_handler_get_private);
/* Basic check */
if (!netif_device_present(dev))
return -ENODEV;
- /* Try to find the handler */
- handler = get_handler(dev, request->cmd);
- if(handler != NULL) {
+ /* New driver API : try to find the handler */
+ handler = get_handler(dev, cmd);
+ if (handler) {
/* Standard and private are not the same */
- if(request->cmd < SIOCIWFIRSTPRIV)
- return rtnetlink_standard_get(dev,
- request,
- request->len,
- handler,
- p_buf, p_len);
+ if (cmd < SIOCIWFIRSTPRIV)
+ return ioctl_standard_call(dev, ifr, cmd, handler);
else
- return rtnetlink_private_get(dev,
- request,
- request->len,
- handler,
- p_buf, p_len);
+ return ioctl_private_call(dev, ifr, cmd, handler);
}
-
+ /* Old driver API : call driver ioctl handler */
+ if (dev->do_ioctl)
+ return dev->do_ioctl(dev, ifr, cmd);
return -EOPNOTSUPP;
}
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_setlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_set(struct net_device * dev,
- char * data,
- int len)
+/* entry point from dev ioctl */
+int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
+ void __user *arg)
{
- struct iw_event * request = (struct iw_event *) data;
- iw_handler handler;
-
- /* Check length */
- if(len < IW_EV_LCP_PK_LEN) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
- dev->name, len);
- return -EINVAL;
- }
-
- /* ReCheck length (len may have padding) */
- if(request->len > len) {
- printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
- dev->name, request->len, len);
- return -EINVAL;
- }
-
- /* Only accept SET requests in here */
- if(!IW_IS_SET(request->cmd))
- return -EOPNOTSUPP;
-
- /* Basic check */
- if (!netif_device_present(dev))
- return -ENODEV;
+ int ret;
- /* New driver API : try to find the handler */
- handler = get_handler(dev, request->cmd);
- if(handler != NULL) {
- /* Standard and private are not the same */
- if(request->cmd < SIOCIWFIRSTPRIV)
- return rtnetlink_standard_set(dev,
- request,
- request->len,
- handler);
- else
- return rtnetlink_private_set(dev,
- request,
- request->len,
- handler);
- }
-
- return -EOPNOTSUPP;
+ /* If command is `set a parameter', or
+ * `get the encoding parameters', check if
+ * the user has the right to do it */
+ if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
+ && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ dev_load(ifr->ifr_name);
+ rtnl_lock();
+ ret = wireless_process_ioctl(ifr, cmd);
+ rtnl_unlock();
+ if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return ret;
}
-#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
-
/************************* EVENT PROCESSING *************************/
/*
@@ -1888,7 +1080,6 @@ int wireless_rtnetlink_set(struct net_device * dev,
* Most often, the event will be propagated through rtnetlink
*/
-#ifdef WE_EVENT_RTNETLINK
/* ---------------------------------------------------------------- */
/*
* Locking...
@@ -1933,15 +1124,12 @@ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
* current wireless config. Dumping the wireless config is far too
* expensive (for each parameter, the driver need to query the hardware).
*/
-static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
- struct net_device * dev,
- int type,
- char * event,
- int event_len)
+static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev,
+ int type, char *event, int event_len)
{
struct ifinfomsg *r;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
r = NLMSG_DATA(nlh);
@@ -1955,12 +1143,12 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff * skb,
/* Add the wireless events in the netlink packet */
RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1971,9 +1159,7 @@ rtattr_failure:
* Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
* within a RTM_NEWLINK event.
*/
-static inline void rtmsg_iwinfo(struct net_device * dev,
- char * event,
- int event_len)
+static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
{
struct sk_buff *skb;
int size = NLMSG_GOODSIZE;
@@ -1992,8 +1178,6 @@ static inline void rtmsg_iwinfo(struct net_device * dev,
tasklet_schedule(&wireless_nlevent_tasklet);
}
-#endif /* WE_EVENT_RTNETLINK */
-
/* ---------------------------------------------------------------- */
/*
* Main event dispatcher. Called from other parts and drivers.
@@ -2015,17 +1199,17 @@ void wireless_send_event(struct net_device * dev,
unsigned cmd_index; /* *MUST* be unsigned */
/* Get the description of the Event */
- if(cmd <= SIOCIWLAST) {
+ if (cmd <= SIOCIWLAST) {
cmd_index = cmd - SIOCIWFIRST;
- if(cmd_index < standard_ioctl_num)
+ if (cmd_index < standard_ioctl_num)
descr = &(standard_ioctl[cmd_index]);
} else {
cmd_index = cmd - IWEVFIRST;
- if(cmd_index < standard_event_num)
+ if (cmd_index < standard_event_num)
descr = &(standard_event[cmd_index]);
}
/* Don't accept unknown events */
- if(descr == NULL) {
+ if (descr == NULL) {
/* Note : we don't return an error to the driver, because
* the driver would not know what to do about it. It can't
* return an error to the user, because the event is not
@@ -2037,63 +1221,50 @@ void wireless_send_event(struct net_device * dev,
dev->name, cmd);
return;
}
-#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
- dev->name, cmd);
- printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif /* WE_EVENT_DEBUG */
/* Check extra parameters and set extra_len */
- if(descr->header_type == IW_HEADER_TYPE_POINT) {
+ if (descr->header_type == IW_HEADER_TYPE_POINT) {
/* Check if number of token fits within bounds */
- if(wrqu->data.length > descr->max_tokens) {
+ if (wrqu->data.length > descr->max_tokens) {
printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
return;
}
- if(wrqu->data.length < descr->min_tokens) {
+ if (wrqu->data.length < descr->min_tokens) {
printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
return;
}
/* Calculate extra_len - extra is NULL for restricted events */
- if(extra != NULL)
+ if (extra != NULL)
extra_len = wrqu->data.length * descr->token_size;
/* Always at an offset in wrqu */
wrqu_off = IW_EV_POINT_OFF;
-#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
-#endif /* WE_EVENT_DEBUG */
}
/* Total length of the event */
hdr_len = event_type_size[descr->header_type];
event_len = hdr_len + extra_len;
-#ifdef WE_EVENT_DEBUG
- printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
-#endif /* WE_EVENT_DEBUG */
-
/* Create temporary buffer to hold the event */
event = kmalloc(event_len, GFP_ATOMIC);
- if(event == NULL)
+ if (event == NULL)
return;
/* Fill event */
event->len = event_len;
event->cmd = cmd;
memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
- if(extra != NULL)
+ if (extra)
memcpy(((char *) event) + hdr_len, extra, extra_len);
-#ifdef WE_EVENT_RTNETLINK
/* Send via the RtNetlink event channel */
rtmsg_iwinfo(dev, (char *) event, event_len);
-#endif /* WE_EVENT_RTNETLINK */
/* Cleanup */
kfree(event);
return; /* Always success, I guess ;-) */
}
+EXPORT_SYMBOL(wireless_send_event);
/********************** ENHANCED IWSPY SUPPORT **********************/
/*
@@ -2113,11 +1284,11 @@ void wireless_send_event(struct net_device * dev,
* Because this is called on the Rx path via wireless_spy_update(),
* we want it to be efficient...
*/
-static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+static inline struct iw_spy_data *get_spydata(struct net_device *dev)
{
/* This is the new way */
- if(dev->wireless_data)
- return(dev->wireless_data->spy_data);
+ if (dev->wireless_data)
+ return dev->wireless_data->spy_data;
return NULL;
}
@@ -2134,7 +1305,7 @@ int iw_handler_set_spy(struct net_device * dev,
struct sockaddr * address = (struct sockaddr *) extra;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
/* Disable spy collection while we copy the addresses.
@@ -2151,29 +1322,16 @@ int iw_handler_set_spy(struct net_device * dev,
smp_wmb();
/* Are there are addresses to copy? */
- if(wrqu->data.length > 0) {
+ if (wrqu->data.length > 0) {
int i;
/* Copy addresses */
- for(i = 0; i < wrqu->data.length; i++)
+ for (i = 0; i < wrqu->data.length; i++)
memcpy(spydata->spy_address[i], address[i].sa_data,
ETH_ALEN);
/* Reset stats */
memset(spydata->spy_stat, 0,
sizeof(struct iw_quality) * IW_MAX_SPY);
-
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
- for (i = 0; i < wrqu->data.length; i++)
- printk(KERN_DEBUG
- "%02X:%02X:%02X:%02X:%02X:%02X \n",
- spydata->spy_address[i][0],
- spydata->spy_address[i][1],
- spydata->spy_address[i][2],
- spydata->spy_address[i][3],
- spydata->spy_address[i][4],
- spydata->spy_address[i][5]);
-#endif /* WE_SPY_DEBUG */
}
/* Make sure above is updated before re-enabling */
@@ -2184,6 +1342,7 @@ int iw_handler_set_spy(struct net_device * dev,
return 0;
}
+EXPORT_SYMBOL(iw_handler_set_spy);
/*------------------------------------------------------------------*/
/*
@@ -2199,26 +1358,27 @@ int iw_handler_get_spy(struct net_device * dev,
int i;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
wrqu->data.length = spydata->spy_number;
/* Copy addresses. */
- for(i = 0; i < spydata->spy_number; i++) {
+ for (i = 0; i < spydata->spy_number; i++) {
memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
address[i].sa_family = AF_UNIX;
}
/* Copy stats to the user buffer (just after). */
- if(spydata->spy_number > 0)
+ if (spydata->spy_number > 0)
memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number),
spydata->spy_stat,
sizeof(struct iw_quality) * spydata->spy_number);
/* Reset updated flags. */
- for(i = 0; i < spydata->spy_number; i++)
+ for (i = 0; i < spydata->spy_number; i++)
spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
return 0;
}
+EXPORT_SYMBOL(iw_handler_get_spy);
/*------------------------------------------------------------------*/
/*
@@ -2233,7 +1393,7 @@ int iw_handler_set_thrspy(struct net_device * dev,
struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
/* Just do it */
@@ -2243,12 +1403,9 @@ int iw_handler_set_thrspy(struct net_device * dev,
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_handler_set_thrspy() : low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
-#endif /* WE_SPY_DEBUG */
-
return 0;
}
+EXPORT_SYMBOL(iw_handler_set_thrspy);
/*------------------------------------------------------------------*/
/*
@@ -2263,7 +1420,7 @@ int iw_handler_get_thrspy(struct net_device * dev,
struct iw_thrspy * threshold = (struct iw_thrspy *) extra;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return -EOPNOTSUPP;
/* Just do it */
@@ -2272,6 +1429,7 @@ int iw_handler_get_thrspy(struct net_device * dev,
return 0;
}
+EXPORT_SYMBOL(iw_handler_get_thrspy);
/*------------------------------------------------------------------*/
/*
@@ -2297,16 +1455,6 @@ static void iw_send_thrspy_event(struct net_device * dev,
memcpy(&(threshold.low), &(spydata->spy_thr_low),
2 * sizeof(struct iw_quality));
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
- threshold.addr.sa_data[0],
- threshold.addr.sa_data[1],
- threshold.addr.sa_data[2],
- threshold.addr.sa_data[3],
- threshold.addr.sa_data[4],
- threshold.addr.sa_data[5], threshold.qual.level);
-#endif /* WE_SPY_DEBUG */
-
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
}
@@ -2327,16 +1475,12 @@ void wireless_spy_update(struct net_device * dev,
int match = -1;
/* Make sure driver is not buggy or using the old API */
- if(!spydata)
+ if (!spydata)
return;
-#ifdef WE_SPY_DEBUG
- printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
-#endif /* WE_SPY_DEBUG */
-
/* Update all records that match */
- for(i = 0; i < spydata->spy_number; i++)
- if(!compare_ether_addr(address, spydata->spy_address[i])) {
+ for (i = 0; i < spydata->spy_number; i++)
+ if (!compare_ether_addr(address, spydata->spy_address[i])) {
memcpy(&(spydata->spy_stat[i]), wstats,
sizeof(struct iw_quality));
match = i;
@@ -2346,15 +1490,15 @@ void wireless_spy_update(struct net_device * dev,
* To avoid event storms, we have a simple hysteresis : we generate
* event only when we go under the low threshold or above the
* high threshold. */
- if(match >= 0) {
- if(spydata->spy_thr_under[match]) {
- if(wstats->level > spydata->spy_thr_high.level) {
+ if (match >= 0) {
+ if (spydata->spy_thr_under[match]) {
+ if (wstats->level > spydata->spy_thr_high.level) {
spydata->spy_thr_under[match] = 0;
iw_send_thrspy_event(dev, spydata,
address, wstats);
}
} else {
- if(wstats->level < spydata->spy_thr_low.level) {
+ if (wstats->level < spydata->spy_thr_low.level) {
spydata->spy_thr_under[match] = 1;
iw_send_thrspy_event(dev, spydata,
address, wstats);
@@ -2362,10 +1506,4 @@ void wireless_spy_update(struct net_device * dev,
}
}
}
-
-EXPORT_SYMBOL(iw_handler_get_spy);
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-EXPORT_SYMBOL(iw_handler_set_spy);
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-EXPORT_SYMBOL(wireless_send_event);
EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e62ba41b05c..0d6002fc77b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -951,7 +951,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
* Incoming Call User Data.
*/
if (skb->len >= 0) {
- memcpy(makex25->calluserdata.cuddata, skb->data, skb->len);
+ skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
makex25->calluserdata.cudlength = skb->len;
}
@@ -1058,9 +1058,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
*/
SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
- asmptr = skb->h.raw = skb_put(skb, len);
+ skb_reset_transport_header(skb);
+ skb_put(skb, len);
- rc = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+ rc = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
if (rc)
goto out_kfree_skb;
@@ -1210,8 +1211,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
}
}
- skb->h.raw = skb->data;
-
+ skb_reset_transport_header(skb);
copied = skb->len;
if (copied > size) {
@@ -1280,6 +1280,12 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = sock_get_timestamp(sk,
(struct timeval __user *)argp);
break;
+ case SIOCGSTAMPNS:
+ rc = -EINVAL;
+ if (sk)
+ rc = sock_get_timestampns(sk,
+ (struct timespec __user *)argp);
+ break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
@@ -1521,6 +1527,12 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
rc = compat_sock_get_timestamp(sk,
(struct timeval __user*)argp);
break;
+ case SIOCGSTAMPNS:
+ rc = -EINVAL;
+ if (sk)
+ rc = compat_sock_get_timestampns(sk,
+ (struct timespec __user*)argp);
+ break;
case SIOCGIFADDR:
case SIOCSIFADDR:
case SIOCGIFDSTADDR:
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index c7221de98a9..848a6b6f90a 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -48,7 +48,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
if ((sk = x25_find_socket(lci, nb)) != NULL) {
int queued = 1;
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
queued = x25_process_rx_frame(sk, skb);
@@ -191,7 +191,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
{
unsigned char *dptr;
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
switch (nb->dev->type) {
case ARPHRD_X25:
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index c5239fcdefa..1c88762c279 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -53,17 +53,20 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
skb_queue_tail(&x25->fragment_queue, skb);
- skbn->h.raw = skbn->data;
+ skb_reset_transport_header(skbn);
skbo = skb_dequeue(&x25->fragment_queue);
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo, skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
while ((skbo =
skb_dequeue(&x25->fragment_queue)) != NULL) {
skb_pull(skbo, (x25->neighbour->extended) ?
X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
- memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+ skb_copy_from_linear_data(skbo,
+ skb_put(skbn, skbo->len),
+ skbo->len);
kfree_skb(skbo);
}
@@ -112,8 +115,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
* Copy any Call User Data.
*/
if (skb->len >= 0) {
- memcpy(x25->calluserdata.cuddata, skb->data,
- skb->len);
+ skb_copy_from_linear_data(skb,
+ x25->calluserdata.cuddata,
+ skb->len);
x25->calluserdata.cudlength = skb->len;
}
if (!sock_flag(sk, SOCK_DEAD))
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 6f573785391..2b96b52114d 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -61,7 +61,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
if (skb->len - header_len > max_len) {
/* Save a copy of the Header */
- memcpy(header, skb->data, header_len);
+ skb_copy_from_linear_data(skb, header, header_len);
skb_pull(skb, header_len);
frontlen = skb_headroom(skb);
@@ -84,12 +84,12 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
len = max_len > skb->len ? skb->len : max_len;
/* Copy the user data */
- memcpy(skb_put(skbn, len), skb->data, len);
+ skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
skb_pull(skb, len);
/* Duplicate the Header */
skb_push(skbn, header_len);
- memcpy(skbn->data, header, header_len);
+ skb_copy_to_linear_data(skbn, header, header_len);
if (skb->len > 0) {
if (x25->neighbour->extended)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f373a8a7d9c..6249a9405bb 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -612,175 +612,6 @@ EXPORT_SYMBOL_GPL(skb_icv_walk);
#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
-/* Looking generic it is not used in another places. */
-
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
- int start = skb_headlen(skb);
- int i, copy = start - offset;
- int elt = 0;
-
- if (copy > 0) {
- if (copy > len)
- copy = len;
- sg[elt].page = virt_to_page(skb->data + offset);
- sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
- sg[elt].length = copy;
- elt++;
- if ((len -= copy) == 0)
- return elt;
- offset += copy;
- }
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- int end;
-
- BUG_TRAP(start <= offset + len);
-
- end = start + skb_shinfo(skb)->frags[i].size;
- if ((copy = end - offset) > 0) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- if (copy > len)
- copy = len;
- sg[elt].page = frag->page;
- sg[elt].offset = frag->page_offset+offset-start;
- sg[elt].length = copy;
- elt++;
- if (!(len -= copy))
- return elt;
- offset += copy;
- }
- start = end;
- }
-
- if (skb_shinfo(skb)->frag_list) {
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
- for (; list; list = list->next) {
- int end;
-
- BUG_TRAP(start <= offset + len);
-
- end = start + list->len;
- if ((copy = end - offset) > 0) {
- if (copy > len)
- copy = len;
- elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
- if ((len -= copy) == 0)
- return elt;
- offset += copy;
- }
- start = end;
- }
- }
- BUG_ON(len);
- return elt;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
-
-/* Check that skb data bits are writable. If they are not, copy data
- * to newly created private area. If "tailbits" is given, make sure that
- * tailbits bytes beyond current end of skb are writable.
- *
- * Returns amount of elements of scatterlist to load for subsequent
- * transformations and pointer to writable trailer skb.
- */
-
-int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
-{
- int copyflag;
- int elt;
- struct sk_buff *skb1, **skb_p;
-
- /* If skb is cloned or its head is paged, reallocate
- * head pulling out all the pages (pages are considered not writable
- * at the moment even if they are anonymous).
- */
- if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
- return -ENOMEM;
-
- /* Easy case. Most of packets will go this way. */
- if (!skb_shinfo(skb)->frag_list) {
- /* A little of trouble, not enough of space for trailer.
- * This should not happen, when stack is tuned to generate
- * good frames. OK, on miss we reallocate and reserve even more
- * space, 128 bytes is fair. */
-
- if (skb_tailroom(skb) < tailbits &&
- pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
- return -ENOMEM;
-
- /* Voila! */
- *trailer = skb;
- return 1;
- }
-
- /* Misery. We are in troubles, going to mincer fragments... */
-
- elt = 1;
- skb_p = &skb_shinfo(skb)->frag_list;
- copyflag = 0;
-
- while ((skb1 = *skb_p) != NULL) {
- int ntail = 0;
-
- /* The fragment is partially pulled by someone,
- * this can happen on input. Copy it and everything
- * after it. */
-
- if (skb_shared(skb1))
- copyflag = 1;
-
- /* If the skb is the last, worry about trailer. */
-
- if (skb1->next == NULL && tailbits) {
- if (skb_shinfo(skb1)->nr_frags ||
- skb_shinfo(skb1)->frag_list ||
- skb_tailroom(skb1) < tailbits)
- ntail = tailbits + 128;
- }
-
- if (copyflag ||
- skb_cloned(skb1) ||
- ntail ||
- skb_shinfo(skb1)->nr_frags ||
- skb_shinfo(skb1)->frag_list) {
- struct sk_buff *skb2;
-
- /* Fuck, we are miserable poor guys... */
- if (ntail == 0)
- skb2 = skb_copy(skb1, GFP_ATOMIC);
- else
- skb2 = skb_copy_expand(skb1,
- skb_headroom(skb1),
- ntail,
- GFP_ATOMIC);
- if (unlikely(skb2 == NULL))
- return -ENOMEM;
-
- if (skb1->sk)
- skb_set_owner_w(skb2, skb1->sk);
-
- /* Looking around. Are we still alive?
- * OK, link new skb, drop old one */
-
- skb2->next = skb1->next;
- *skb_p = skb2;
- kfree_skb(skb1);
- skb1 = skb2;
- }
- elt++;
- *trailer = skb1;
- skb_p = &skb1->next;
- }
-
- return elt;
-}
-EXPORT_SYMBOL_GPL(skb_cow_data);
-
void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
{
if (tail != skb) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ee15bdae141..5c4695840c5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
case IPPROTO_COMP:
if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
return -EINVAL;
- *spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2)));
+ *spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2)));
*seq = 0;
return 0;
default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
if (!pskb_may_pull(skb, 16))
return -EINVAL;
- *spi = *(__be32*)(skb->h.raw + offset);
- *seq = *(__be32*)(skb->h.raw + offset_seq);
+ *spi = *(__be32*)(skb_transport_header(skb) + offset);
+ *seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
return 0;
}
EXPORT_SYMBOL(xfrm_parse_spi);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 785c3e39f06..95271e8426a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_policy_timer(unsigned long data)
{
struct xfrm_policy *xp = (struct xfrm_policy*)data;
- unsigned long now = (unsigned long)xtime.tv_sec;
+ unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
int dir;
@@ -579,8 +579,22 @@ static inline int xfrm_byidx_should_resize(int total)
return 0;
}
-static DEFINE_MUTEX(hash_resize_mutex);
+void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
+{
+ read_lock_bh(&xfrm_policy_lock);
+ si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
+ si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
+ si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
+ si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+ si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+ si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+ si->spdhcnt = xfrm_idx_hmask;
+ si->spdhmcnt = xfrm_policy_hashmax;
+ read_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_spd_getinfo);
+static DEFINE_MUTEX(hash_resize_mutex);
static void xfrm_hash_resize(struct work_struct *__unused)
{
int dir, total;
@@ -690,7 +704,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
- policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+ policy->curlft.add_time = get_seconds();
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy);
@@ -1049,7 +1063,7 @@ static inline int policy_to_flow_dir(int dir)
return FLOW_DIR_OUT;
case XFRM_POLICY_FWD:
return FLOW_DIR_FWD;
- };
+ }
}
static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
@@ -1133,7 +1147,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
old_pol = sk->sk_policy[dir];
sk->sk_policy[dir] = pol;
if (pol) {
- pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+ pol->curlft.add_time = get_seconds();
pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
}
@@ -1330,6 +1344,40 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
return err;
}
+static int inline
+xfrm_dst_alloc_copy(void **target, void *src, int size)
+{
+ if (!*target) {
+ *target = kmalloc(size, GFP_ATOMIC);
+ if (!*target)
+ return -ENOMEM;
+ }
+ memcpy(*target, src, size);
+ return 0;
+}
+
+static int inline
+xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ return xfrm_dst_alloc_copy((void **)&(xdst->partner),
+ sel, sizeof(*sel));
+#else
+ return 0;
+#endif
+}
+
+static int inline
+xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
+#else
+ return 0;
+#endif
+}
static int stale_bundle(struct dst_entry *dst);
@@ -1386,7 +1434,7 @@ restart:
return 0;
family = dst_orig->ops->family;
- policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+ policy->curlft.use_time = get_seconds();
pols[0] = policy;
npols ++;
xfrm_nr += pols[0]->xfrm_nr;
@@ -1518,6 +1566,18 @@ restart:
err = -EHOSTUNREACH;
goto error;
}
+
+ if (npols > 1)
+ err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+ else
+ err = xfrm_dst_update_origin(dst, fl);
+ if (unlikely(err)) {
+ write_unlock_bh(&policy->lock);
+ if (dst)
+ dst_free(dst);
+ goto error;
+ }
+
dst->next = policy->bundles;
policy->bundles = dst;
dst_hold(dst);
@@ -1682,7 +1742,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+ pol->curlft.use_time = get_seconds();
pols[0] = pol;
npols ++;
@@ -1694,7 +1754,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1]))
return 0;
- pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+ pols[1]->curlft.use_time = get_seconds();
npols ++;
}
}
@@ -1933,6 +1993,15 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
(dst->dev && !netif_running(dst->dev)))
return 0;
+#ifdef CONFIG_XFRM_SUB_POLICY
+ if (fl) {
+ if (first->origin && !flow_cache_uli_match(first->origin, fl))
+ return 0;
+ if (first->partner &&
+ !xfrm_selector_match(first->partner, fl, family))
+ return 0;
+ }
+#endif
last = NULL;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index e3a0bcfa5df..9955ff4da0a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_timer_handler(unsigned long data)
{
struct xfrm_state *x = (struct xfrm_state*)data;
- unsigned long now = (unsigned long)xtime.tv_sec;
+ unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
int err = 0;
@@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void)
init_timer(&x->rtimer);
x->rtimer.function = xfrm_replay_timer_handler;
x->rtimer.data = (unsigned long)x;
- x->curlft.add_time = (unsigned long)xtime.tv_sec;
+ x->curlft.add_time = get_seconds();
x->lft.soft_byte_limit = XFRM_INF;
x->lft.soft_packet_limit = XFRM_INF;
x->lft.hard_byte_limit = XFRM_INF;
@@ -421,6 +421,16 @@ restart:
}
EXPORT_SYMBOL(xfrm_state_flush);
+void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
+{
+ spin_lock_bh(&xfrm_state_lock);
+ si->sadcnt = xfrm_state_num;
+ si->sadhcnt = xfrm_state_hmask;
+ si->sadhmcnt = xfrm_state_hashmax;
+ spin_unlock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_sad_getinfo);
+
static int
xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
struct xfrm_tmpl *tmpl,
@@ -458,7 +468,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
x->id.daddr.a6))
continue;
break;
- };
+ }
xfrm_state_hold(x);
return x;
@@ -493,7 +503,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
x->props.saddr.a6))
continue;
break;
- };
+ }
xfrm_state_hold(x);
return x;
@@ -722,7 +732,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
(struct in6_addr *)saddr))
continue;
break;
- };
+ }
xfrm_state_hold(x);
return x;
@@ -755,7 +765,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
(struct in6_addr *)daddr);
break;
- };
+ }
x->km.state = XFRM_STATE_ACQ;
x->id.proto = proto;
@@ -1051,7 +1061,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
if (!x->curlft.use_time)
- x->curlft.use_time = (unsigned long)xtime.tv_sec;
+ x->curlft.use_time = get_seconds();
if (x->km.state != XFRM_STATE_VALID)
return -EINVAL;
@@ -1667,37 +1677,17 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-/*
- * This function is NOT optimal. For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal. However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
int xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
- int res = mtu;
-
- res -= x->props.header_len;
-
- for (;;) {
- int m = res;
-
- if (m < 68)
- return 68;
-
- spin_lock_bh(&x->lock);
- if (x->km.state == XFRM_STATE_VALID &&
- x->type && x->type->get_max_size)
- m = x->type->get_max_size(x, m);
- else
- m += x->props.header_len;
- spin_unlock_bh(&x->lock);
-
- if (m <= mtu)
- break;
- res -= (m - mtu);
- }
+ int res;
+ spin_lock_bh(&x->lock);
+ if (x->km.state == XFRM_STATE_VALID &&
+ x->type && x->type->get_mtu)
+ res = x->type->get_mtu(x, mtu);
+ else
+ res = mtu;
+ spin_unlock_bh(&x->lock);
return res;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 816e3690b60..b14c7e590c3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -71,7 +71,7 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
default:
return -EINVAL;
- };
+ }
algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
return 0;
@@ -152,7 +152,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
default:
goto out;
- };
+ }
err = -EINVAL;
switch (p->id.proto) {
@@ -192,7 +192,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
default:
goto out;
- };
+ }
if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
goto out;
@@ -217,7 +217,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
default:
goto out;
- };
+ }
err = 0;
@@ -576,7 +576,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
struct sk_buff *skb = sp->out_skb;
struct xfrm_usersa_info *p;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
if (sp->this_idx < sp->start_idx)
goto out;
@@ -621,14 +621,14 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
if (x->lastused)
RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
out:
sp->this_idx++;
return 0;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -672,6 +672,113 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
return skb;
}
+static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+ struct xfrmk_spdinfo si;
+ struct xfrmu_spdinfo spc;
+ struct xfrmu_spdhinfo sph;
+ struct nlmsghdr *nlh;
+ u32 *f;
+
+ nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
+ if (nlh == NULL) /* shouldnt really happen ... */
+ return -EMSGSIZE;
+
+ f = nlmsg_data(nlh);
+ *f = flags;
+ xfrm_spd_getinfo(&si);
+ spc.incnt = si.incnt;
+ spc.outcnt = si.outcnt;
+ spc.fwdcnt = si.fwdcnt;
+ spc.inscnt = si.inscnt;
+ spc.outscnt = si.outscnt;
+ spc.fwdscnt = si.fwdscnt;
+ sph.spdhcnt = si.spdhcnt;
+ sph.spdhmcnt = si.spdhmcnt;
+
+ NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
+ NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
+{
+ struct sk_buff *r_skb;
+ u32 *flags = NLMSG_DATA(nlh);
+ u32 spid = NETLINK_CB(skb).pid;
+ u32 seq = nlh->nlmsg_seq;
+ int len = NLMSG_LENGTH(sizeof(u32));
+
+ len += RTA_SPACE(sizeof(struct xfrmu_spdinfo));
+ len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo));
+
+ r_skb = alloc_skb(len, GFP_ATOMIC);
+ if (r_skb == NULL)
+ return -ENOMEM;
+
+ if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+ BUG();
+
+ return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
+static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+ struct xfrmk_sadinfo si;
+ struct xfrmu_sadhinfo sh;
+ struct nlmsghdr *nlh;
+ u32 *f;
+
+ nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
+ if (nlh == NULL) /* shouldnt really happen ... */
+ return -EMSGSIZE;
+
+ f = nlmsg_data(nlh);
+ *f = flags;
+ xfrm_sad_getinfo(&si);
+
+ sh.sadhmcnt = si.sadhmcnt;
+ sh.sadhcnt = si.sadhcnt;
+
+ NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt);
+ NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
+{
+ struct sk_buff *r_skb;
+ u32 *flags = NLMSG_DATA(nlh);
+ u32 spid = NETLINK_CB(skb).pid;
+ u32 seq = nlh->nlmsg_seq;
+ int len = NLMSG_LENGTH(sizeof(u32));
+
+ len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo));
+ len += RTA_SPACE(sizeof(u32));
+
+ r_skb = alloc_skb(len, GFP_ATOMIC);
+
+ if (r_skb == NULL)
+ return -ENOMEM;
+
+ if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+ BUG();
+
+ return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct rtattr **xfrma)
{
@@ -711,7 +818,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
default:
return -EINVAL;
- };
+ }
if (p->min > p->max)
return -EINVAL;
@@ -789,7 +896,7 @@ static int verify_policy_dir(u8 dir)
default:
return -EINVAL;
- };
+ }
return 0;
}
@@ -805,7 +912,7 @@ static int verify_policy_type(u8 type)
default:
return -EINVAL;
- };
+ }
return 0;
}
@@ -821,7 +928,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
default:
return -EINVAL;
- };
+ }
switch (p->action) {
case XFRM_POLICY_ALLOW:
@@ -830,7 +937,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
default:
return -EINVAL;
- };
+ }
switch (p->sel.family) {
case AF_INET:
@@ -845,7 +952,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
default:
return -EINVAL;
- };
+ }
return verify_policy_dir(p->dir);
}
@@ -912,7 +1019,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
#endif
default:
return -EINVAL;
- };
+ }
}
return 0;
@@ -1157,7 +1264,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
struct sk_buff *in_skb = sp->in_skb;
struct sk_buff *skb = sp->out_skb;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
if (sp->this_idx < sp->start_idx)
goto out;
@@ -1176,13 +1283,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
out:
sp->this_idx++;
return 0;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1330,7 +1437,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
struct xfrm_aevent_id *id;
struct nlmsghdr *nlh;
struct xfrm_lifetime_cur ltime;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id));
id = NLMSG_DATA(nlh);
@@ -1362,12 +1469,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer);
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
rtattr_failure:
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1744,7 +1851,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
struct xfrm_migrate *mp;
struct xfrm_userpolicy_id *pol_id;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
int i;
nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id));
@@ -1764,10 +1871,10 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
goto nlmsg_failure;
}
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1823,6 +1930,8 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
[XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
[XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+ [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
+ [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
};
#undef XMSGSIZE
@@ -1850,55 +1959,40 @@ static struct xfrm_link {
[XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae },
[XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae },
[XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate },
+ [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo },
+ [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
};
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct rtattr *xfrma[XFRMA_MAX];
struct xfrm_link *link;
int type, min_len;
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
- return 0;
-
type = nlh->nlmsg_type;
-
- /* A control message: ignore them */
- if (type < XFRM_MSG_BASE)
- return 0;
-
- /* Unknown message: reply with EINVAL */
if (type > XFRM_MSG_MAX)
- goto err_einval;
+ return -EINVAL;
type -= XFRM_MSG_BASE;
link = &xfrm_dispatch[type];
/* All operations require privileges, even GET */
- if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
- *errp = -EPERM;
- return -1;
- }
+ if (security_netlink_recv(skb, CAP_NET_ADMIN))
+ return -EPERM;
if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
(nlh->nlmsg_flags & NLM_F_DUMP)) {
if (link->dump == NULL)
- goto err_einval;
-
- if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
- link->dump, NULL)) != 0) {
- return -1;
- }
+ return -EINVAL;
- netlink_queue_skip(nlh, skb);
- return -1;
+ return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
}
memset(xfrma, 0, sizeof(xfrma));
if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
- goto err_einval;
+ return -EINVAL;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -1908,7 +2002,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
unsigned short flavor = attr->rta_type;
if (flavor) {
if (flavor > XFRMA_MAX)
- goto err_einval;
+ return -EINVAL;
xfrma[flavor - 1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
@@ -1916,14 +2010,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
}
if (link->doit == NULL)
- goto err_einval;
- *errp = link->doit(skb, nlh, xfrma);
-
- return *errp;
+ return -EINVAL;
-err_einval:
- *errp = -EINVAL;
- return -1;
+ return link->doit(skb, nlh, xfrma);
}
static void xfrm_netlink_rcv(struct sock *sk, int len)
@@ -1942,7 +2031,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
{
struct xfrm_user_expire *ue;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE,
sizeof(*ue));
@@ -1952,11 +2041,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
copy_to_user_state(x, &ue->state);
ue->hard = (c->data.hard != 0) ? 1 : 0;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -1999,7 +2088,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)
struct xfrm_usersa_flush *p;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
skb = alloc_skb(len, GFP_ATOMIC);
@@ -2045,7 +2134,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
struct xfrm_usersa_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = xfrm_sa_len(x);
int headlen;
@@ -2129,7 +2218,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
{
struct xfrm_user_acquire *ua;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
__u32 seq = xfrm_get_acqseq();
nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
@@ -2153,11 +2242,11 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2249,7 +2338,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
struct xfrm_user_polexpire *upe;
struct nlmsghdr *nlh;
int hard = c->data.hard;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
upe = NLMSG_DATA(nlh);
@@ -2264,11 +2353,11 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
goto nlmsg_failure;
upe->hard = !!hard;
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2300,7 +2389,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
struct xfrm_userpolicy_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
int headlen;
@@ -2357,7 +2446,7 @@ static int xfrm_notify_policy_flush(struct km_event *c)
{
struct nlmsghdr *nlh;
struct sk_buff *skb;
- unsigned char *b;
+ sk_buff_data_t b;
int len = 0;
#ifdef CONFIG_XFRM_SUB_POLICY
len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
@@ -2410,7 +2499,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
{
struct xfrm_user_report *ur;
struct nlmsghdr *nlh;
- unsigned char *b = skb->tail;
+ unsigned char *b = skb_tail_pointer(skb);
nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
ur = NLMSG_DATA(nlh);
@@ -2422,12 +2511,12 @@ static int build_report(struct sk_buff *skb, u8 proto,
if (addr)
RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
- nlh->nlmsg_len = skb->tail - b;
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
nlmsg_failure:
rtattr_failure:
- skb_trim(skb, b - skb->data);
+ nlmsg_trim(skb, b);
return -1;
}
@@ -2466,7 +2555,7 @@ static int __init xfrm_user_init(void)
printk(KERN_INFO "Initializing XFRM netlink socket\n");
nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
- xfrm_netlink_rcv, THIS_MODULE);
+ xfrm_netlink_rcv, NULL, THIS_MODULE);
if (nlsk == NULL)
return -ENOMEM;
rcu_assign_pointer(xfrm_nl, nlsk);