summaryrefslogtreecommitdiffstats
path: root/net/ipv4/af_inet.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/af_inet.c')
-rw-r--r--net/ipv4/af_inet.c120
1 files changed, 88 insertions, 32 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fe4582ca969..24b384b7903 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -212,6 +212,26 @@ int inet_listen(struct socket *sock, int backlog)
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
+ /* Check special setups for testing purpose to enable TFO w/o
+ * requiring TCP_FASTOPEN sockopt.
+ * Note that only TCP sockets (SOCK_STREAM) will reach here.
+ * Also fastopenq may already been allocated because this
+ * socket was in TCP_LISTEN state previously but was
+ * shutdown() (rather than close()).
+ */
+ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+ inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
+ if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
+ err = fastopen_init_queue(sk, backlog);
+ else if ((sysctl_tcp_fastopen &
+ TFO_SERVER_WO_SOCKOPT2) != 0)
+ err = fastopen_init_queue(sk,
+ ((uint)sysctl_tcp_fastopen) >> 16);
+ else
+ err = 0;
+ if (err)
+ goto out;
+ }
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
@@ -326,7 +346,8 @@ lookup_protocol:
}
err = -EPERM;
- if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
+ if (sock->type == SOCK_RAW && !kern &&
+ !ns_capable(net->user_ns, CAP_NET_RAW))
goto out_rcu_unlock;
err = -EAFNOSUPPORT;
@@ -453,6 +474,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
int err;
@@ -476,7 +498,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
+ chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
@@ -496,7 +518,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
snum = ntohs(addr->sin_port);
err = -EACCES;
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ if (snum && snum < PROT_SOCK &&
+ !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out;
/* We keep a pair of addresses. rcv_saddr is the one
@@ -701,7 +724,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
sock_rps_record_flow(sk2);
WARN_ON(!((1 << sk2->sk_state) &
- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ (TCPF_ESTABLISHED | TCPF_SYN_RECV |
+ TCPF_CLOSE_WAIT | TCPF_CLOSE)));
sock_graft(sk2, newsock);
@@ -1230,7 +1254,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
static int inet_gso_send_check(struct sk_buff *skb)
{
- const struct net_protocol *ops;
+ const struct net_offload *ops;
const struct iphdr *iph;
int proto;
int ihl;
@@ -1254,9 +1278,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
err = -EPROTONOSUPPORT;
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (likely(ops && ops->gso_send_check))
- err = ops->gso_send_check(skb);
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_send_check))
+ err = ops->callbacks.gso_send_check(skb);
rcu_read_unlock();
out:
@@ -1267,7 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
- const struct net_protocol *ops;
+ const struct net_offload *ops;
struct iphdr *iph;
int proto;
int ihl;
@@ -1304,9 +1328,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (likely(ops && ops->gso_segment))
- segs = ops->gso_segment(skb, features);
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
rcu_read_unlock();
if (!segs || IS_ERR(segs))
@@ -1335,7 +1359,7 @@ out:
static struct sk_buff **inet_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
- const struct net_protocol *ops;
+ const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
const struct iphdr *iph;
@@ -1357,14 +1381,14 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
proto = iph->protocol;
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (!ops || !ops->gro_receive)
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
goto out_unlock;
if (*(u8 *)iph != 0x45)
goto out_unlock;
- if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+ if (unlikely(ip_fast_csum((u8 *)iph, 5)))
goto out_unlock;
id = ntohl(*(__be32 *)&iph->id);
@@ -1380,7 +1404,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
iph2 = ip_hdr(p);
if ((iph->protocol ^ iph2->protocol) |
- (iph->tos ^ iph2->tos) |
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -1390,6 +1413,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
/* All fields must match except length and checksum. */
NAPI_GRO_CB(p)->flush |=
(iph->ttl ^ iph2->ttl) |
+ (iph->tos ^ iph2->tos) |
((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush;
@@ -1399,7 +1423,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = ops->gro_receive(head, skb);
+ pp = ops->callbacks.gro_receive(head, skb);
out_unlock:
rcu_read_unlock();
@@ -1414,7 +1438,7 @@ static int inet_gro_complete(struct sk_buff *skb)
{
__be16 newlen = htons(skb->len - skb_network_offset(skb));
struct iphdr *iph = ip_hdr(skb);
- const struct net_protocol *ops;
+ const struct net_offload *ops;
int proto = iph->protocol;
int err = -ENOSYS;
@@ -1422,11 +1446,11 @@ static int inet_gro_complete(struct sk_buff *skb)
iph->tot_len = newlen;
rcu_read_lock();
- ops = rcu_dereference(inet_protos[proto]);
- if (WARN_ON(!ops || !ops->gro_complete))
+ ops = rcu_dereference(inet_offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
- err = ops->gro_complete(skb);
+ err = ops->callbacks.gro_complete(skb);
out_unlock:
rcu_read_unlock();
@@ -1537,23 +1561,33 @@ static const struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
- .gso_send_check = tcp_v4_gso_send_check,
- .gso_segment = tcp_tso_segment,
- .gro_receive = tcp4_gro_receive,
- .gro_complete = tcp4_gro_complete,
.no_policy = 1,
.netns_ok = 1,
};
+static const struct net_offload tcp_offload = {
+ .callbacks = {
+ .gso_send_check = tcp_v4_gso_send_check,
+ .gso_segment = tcp_tso_segment,
+ .gro_receive = tcp4_gro_receive,
+ .gro_complete = tcp4_gro_complete,
+ },
+};
+
static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
- .gso_send_check = udp4_ufo_send_check,
- .gso_segment = udp4_ufo_fragment,
.no_policy = 1,
.netns_ok = 1,
};
+static const struct net_offload udp_offload = {
+ .callbacks = {
+ .gso_send_check = udp4_ufo_send_check,
+ .gso_segment = udp4_ufo_fragment,
+ },
+};
+
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = ping_err,
@@ -1638,13 +1672,35 @@ static int ipv4_proc_init(void);
* IP protocol layer initialiser
*/
+static struct packet_offload ip_packet_offload __read_mostly = {
+ .type = cpu_to_be16(ETH_P_IP),
+ .callbacks = {
+ .gso_send_check = inet_gso_send_check,
+ .gso_segment = inet_gso_segment,
+ .gro_receive = inet_gro_receive,
+ .gro_complete = inet_gro_complete,
+ },
+};
+
+static int __init ipv4_offload_init(void)
+{
+ /*
+ * Add offloads
+ */
+ if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0)
+ pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
+ if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0)
+ pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__);
+
+ dev_add_offload(&ip_packet_offload);
+ return 0;
+}
+
+fs_initcall(ipv4_offload_init);
+
static struct packet_type ip_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
- .gso_send_check = inet_gso_send_check,
- .gso_segment = inet_gso_segment,
- .gro_receive = inet_gro_receive,
- .gro_complete = inet_gro_complete,
};
static int __init inet_init(void)