summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 12:49:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 12:49:40 -0800
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/ipv4/tcp_ipv4.c
parent54a696bd07c14d3b1192d03ce7269bc59b45209a (diff)
parenteb56092fc168bf5af199d47af50c0d84a96db898 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c137
1 files changed, 85 insertions, 52 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5c8fa7f1e32..10172487921 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
}
#endif
-struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
- .lhash_users = ATOMIC_INIT(0),
- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
-};
+struct inet_hashinfo tcp_hashinfo;
static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
{
@@ -492,7 +488,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
- csum_partial((char *)th,
+ csum_partial(th,
th->doff << 2,
skb->csum));
}
@@ -726,7 +722,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
th->check = tcp_v4_check(skb->len,
ireq->loc_addr,
ireq->rmt_addr,
- csum_partial((char *)th, skb->len,
+ csum_partial(th, skb->len,
skb->csum));
err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
@@ -1139,10 +1135,9 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
if (net_ratelimit()) {
- printk(KERN_INFO "MD5 Hash failed for "
- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest),
+ printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+ &iph->saddr, ntohs(th->source),
+ &iph->daddr, ntohs(th->dest),
genhash ? " tcp_v4_calc_md5_hash failed" : "");
}
return 1;
@@ -1297,10 +1292,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* to destinations, already remembered
* to the moment of synflood.
*/
- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
- "request from " NIPQUAD_FMT "/%u\n",
- NIPQUAD(saddr),
- ntohs(tcp_hdr(skb)->source));
+ LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
+ &saddr, ntohs(tcp_hdr(skb)->source));
goto drop_and_release;
}
@@ -1804,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk)
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
- atomic_inc(&tcp_sockets_allocated);
+ percpu_counter_inc(&tcp_sockets_allocated);
return 0;
}
@@ -1852,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
sk->sk_sndmsg_page = NULL;
}
- atomic_dec(&tcp_sockets_allocated);
+ percpu_counter_dec(&tcp_sockets_allocated);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1860,32 +1853,35 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
{
- return hlist_empty(head) ? NULL :
+ return hlist_nulls_empty(head) ? NULL :
list_entry(head->first, struct inet_timewait_sock, tw_node);
}
static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
{
- return tw->tw_node.next ?
- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+ return !is_a_nulls(tw->tw_node.next) ?
+ hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
}
static void *listening_get_next(struct seq_file *seq, void *cur)
{
struct inet_connection_sock *icsk;
- struct hlist_node *node;
+ struct hlist_nulls_node *node;
struct sock *sk = cur;
- struct tcp_iter_state* st = seq->private;
+ struct inet_listen_hashbucket *ilb;
+ struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
if (!sk) {
st->bucket = 0;
- sk = sk_head(&tcp_hashinfo.listening_hash[0]);
+ ilb = &tcp_hashinfo.listening_hash[0];
+ spin_lock_bh(&ilb->lock);
+ sk = sk_nulls_head(&ilb->head);
goto get_sk;
}
-
+ ilb = &tcp_hashinfo.listening_hash[st->bucket];
++st->num;
if (st->state == TCP_SEQ_STATE_OPENREQ) {
@@ -1918,7 +1914,7 @@ get_req:
sk = sk_next(sk);
}
get_sk:
- sk_for_each_from(sk, node) {
+ sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
cur = sk;
goto out;
@@ -1935,8 +1931,11 @@ start_req:
}
read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
}
+ spin_unlock_bh(&ilb->lock);
if (++st->bucket < INET_LHTABLE_SIZE) {
- sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
+ ilb = &tcp_hashinfo.listening_hash[st->bucket];
+ spin_lock_bh(&ilb->lock);
+ sk = sk_nulls_head(&ilb->head);
goto get_sk;
}
cur = NULL;
@@ -1957,28 +1956,28 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
static inline int empty_bucket(struct tcp_iter_state *st)
{
- return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
- hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+ return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+ hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
}
static void *established_get_first(struct seq_file *seq)
{
- struct tcp_iter_state* st = seq->private;
+ struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
void *rc = NULL;
for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
struct sock *sk;
- struct hlist_node *node;
+ struct hlist_nulls_node *node;
struct inet_timewait_sock *tw;
- rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
+ spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(st))
continue;
- read_lock_bh(lock);
- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+ spin_lock_bh(lock);
+ sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != st->family ||
!net_eq(sock_net(sk), net)) {
continue;
@@ -1996,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)
rc = tw;
goto out;
}
- read_unlock_bh(lock);
+ spin_unlock_bh(lock);
st->state = TCP_SEQ_STATE_ESTABLISHED;
}
out:
@@ -2007,8 +2006,8 @@ static void *established_get_next(struct seq_file *seq, void *cur)
{
struct sock *sk = cur;
struct inet_timewait_sock *tw;
- struct hlist_node *node;
- struct tcp_iter_state* st = seq->private;
+ struct hlist_nulls_node *node;
+ struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
++st->num;
@@ -2024,7 +2023,7 @@ get_tw:
cur = tw;
goto out;
}
- read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
st->state = TCP_SEQ_STATE_ESTABLISHED;
/* Look for next non empty bucket */
@@ -2034,12 +2033,12 @@ get_tw:
if (st->bucket >= tcp_hashinfo.ehash_size)
return NULL;
- read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+ spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
} else
- sk = sk_next(sk);
+ sk = sk_nulls_next(sk);
- sk_for_each_from(sk, node) {
+ sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
goto found;
}
@@ -2067,14 +2066,12 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
{
void *rc;
- struct tcp_iter_state* st = seq->private;
+ struct tcp_iter_state *st = seq->private;
- inet_listen_lock(&tcp_hashinfo);
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_idx(seq, &pos);
if (!rc) {
- inet_listen_unlock(&tcp_hashinfo);
st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_idx(seq, pos);
}
@@ -2084,7 +2081,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct tcp_iter_state* st = seq->private;
+ struct tcp_iter_state *st = seq->private;
st->state = TCP_SEQ_STATE_LISTENING;
st->num = 0;
return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2093,7 +2090,7 @@ static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
void *rc = NULL;
- struct tcp_iter_state* st;
+ struct tcp_iter_state *st;
if (v == SEQ_START_TOKEN) {
rc = tcp_get_idx(seq, 0);
@@ -2106,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
case TCP_SEQ_STATE_LISTENING:
rc = listening_get_next(seq, v);
if (!rc) {
- inet_listen_unlock(&tcp_hashinfo);
st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_first(seq);
}
@@ -2123,7 +2119,7 @@ out:
static void tcp_seq_stop(struct seq_file *seq, void *v)
{
- struct tcp_iter_state* st = seq->private;
+ struct tcp_iter_state *st = seq->private;
switch (st->state) {
case TCP_SEQ_STATE_OPENREQ:
@@ -2133,12 +2129,12 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
}
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- inet_listen_unlock(&tcp_hashinfo);
+ spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
- read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
break;
}
}
@@ -2284,7 +2280,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw,
static int tcp4_seq_show(struct seq_file *seq, void *v)
{
- struct tcp_iter_state* st;
+ struct tcp_iter_state *st;
int len;
if (v == SEQ_START_TOKEN) {
@@ -2350,6 +2346,41 @@ void tcp4_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
+struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr,
+ skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ }
+
+ /* fall through */
+ case CHECKSUM_NONE:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+ return tcp_gro_receive(head, skb);
+}
+EXPORT_SYMBOL(tcp4_gro_receive);
+
+int tcp4_gro_complete(struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr *th = tcp_hdr(skb);
+
+ th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
+ iph->saddr, iph->daddr, 0);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ return tcp_gro_complete(skb);
+}
+EXPORT_SYMBOL(tcp4_gro_complete);
+
struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
@@ -2378,6 +2409,7 @@ struct proto tcp_prot = {
.sysctl_rmem = sysctl_tcp_rmem,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
+ .slab_flags = SLAB_DESTROY_BY_RCU,
.twsk_prot = &tcp_timewait_sock_ops,
.rsk_prot = &tcp_request_sock_ops,
.h.hashinfo = &tcp_hashinfo,
@@ -2407,6 +2439,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
void __init tcp_v4_init(void)
{
+ inet_hashinfo_init(&tcp_hashinfo);
if (register_pernet_device(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
}