diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/arp.c | 2 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 10 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 20 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 6 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 7 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 21 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 7 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 14 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_sip.c | 14 | ||||
-rw-r--r-- | net/ipv4/ping.c | 22 | ||||
-rw-r--r-- | net/ipv4/raw.c | 18 | ||||
-rw-r--r-- | net/ipv4/route.c | 50 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 42 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 20 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp.c | 9 | ||||
-rw-r--r-- | net/ipv4/udp_diag.c | 5 |
23 files changed, 185 insertions, 146 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 77e87aff419..47800459e4c 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e3aef..e12fad77385 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -725,7 +725,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCSIFFLAGS: - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; break; @@ -733,7 +733,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; ret = -EINVAL; @@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) - rt_cache_flush(net, 0); + rt_cache_flush(net); } return ret; @@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(net, 0); + rt_cache_flush(net); } } @@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(net, 0); + rt_cache_flush(net); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c43ae3fba79..8e2b475da9f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -148,7 +148,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(net, -1); + rt_cache_flush(net); } /* @@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force, int delay) +static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), delay); + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } @@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1, 0); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); } break; } @@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2, -1); + fib_disable_ip(dev, 2); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0, 0); + fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; case NETDEV_UNREGISTER_BATCH: break; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a83d74e498d..274309d3ade 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(ops->fro_net, -1); + rt_cache_flush(ops->fro_net); } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 57bd978483e..d1b93595b4a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index db0cf17c00f..7f75f21d7b8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *newinet = inet_sk(newsk); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; + + rcu_read_lock(); + opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 570e61f9611..8bc005b1435 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -69,6 +69,7 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { @@ -124,7 +125,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } #endif - r->idiag_uid = sock_i_uid(sk); + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -199,11 +200,12 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, pid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, @@ -256,14 +258,16 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req_v2 *r, + struct user_namespace *user_ns, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, skb, r, pid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -311,6 +315,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s } err = sk_diag_fill(sk, rep, req, + sk_user_ns(NETLINK_CB(in_skb).ssk), NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -551,6 +556,7 @@ static int inet_csk_diag_dump(struct sock *sk, return 0; return inet_csk_diag_fill(sk, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).ssk), NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -591,7 +597,9 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, u32 pid, u32 seq, + struct request_sock *req, + struct user_namespace *user_ns, + u32 pid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -625,7 +633,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; - r->idiag_uid = sock_i_uid(sk); + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { @@ -702,6 +710,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, } err = inet_diag_fill_req(skb, sk, req, + sk_user_ns(NETLINK_CB(cb->skb).ssk), NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd3..000e3d239d6 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -194,7 +194,7 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } static int addr_compare(const struct inetpeer_addr *a, @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 147ccc3e93d..c196d749daf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ihl = 5; iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); + ip_select_ident(iph, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a053..ebdf06f938b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static struct mr_table *ipmr_new_table(struct net *net, u32 id); +static void ipmr_free_table(struct mr_table *mrt); + static int ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local); @@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct mr_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net) list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - kfree(mrt); + ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - kfree(net->ipv4.mrt); + ipmr_free_table(net->ipv4.mrt); } #endif @@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) return mrt; } +static void ipmr_free_table(struct mr_table *mrt) +{ + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt); + kfree(mrt); +} + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ea4a23813d2..9c87cde28ff 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { - unsigned int matchend, poff, plen, buflen, n; + unsigned int olen, matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; /* We're only interested in headers related to this @@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, goto next; } + olen = *datalen; if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; - matchend = matchoff + matchlen; + matchend = matchoff + matchlen + *datalen - olen; /* The maddr= parameter (RFC 2361) specifies where to send * the reply. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "maddr=", &poff, &plen, - &addr) > 0 && + &addr, true) > 0 && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", @@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * from which the server received the request. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "received=", &poff, &plen, - &addr) > 0 && + &addr, false) > 0 && addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", @@ -501,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); port = 0; break; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 6232d476f37..8f3d05424a3 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -185,10 +185,10 @@ exit: return sk; } -static void inet_get_ping_group_range_net(struct net *net, gid_t *low, - gid_t *high) +static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, + kgid_t *high) { - gid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.sysctl_ping_group_range; unsigned int seq; do { @@ -203,19 +203,13 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low, static int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - gid_t group = current_egid(); - gid_t range[2]; + kgid_t group = current_egid(); struct group_info *group_info = get_current_groups(); int i, j, count = group_info->ngroups; kgid_t low, high; - inet_get_ping_group_range_net(net, range, range+1); - low = make_kgid(&init_user_ns, range[0]); - high = make_kgid(&init_user_ns, range[1]); - if (!gid_valid(low) || !gid_valid(high) || gid_lt(high, low)) - return -EACCES; - - if (range[0] <= group && group <= range[1]) + inet_get_ping_group_range_net(net, &low, &high); + if (gid_lte(low, group) && gid_lte(group, high)) return 0; for (i = 0; i < group_info->nblocks; i++) { @@ -845,7 +839,9 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969e..73d1e4df4bf 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ @@ -992,7 +994,9 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4ba974f143..fd9af60397b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline int rt_genid(struct net *net) -{ - return atomic_read(&net->ipv4.rt_genid); -} - #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { @@ -447,27 +442,9 @@ static inline bool rt_is_expired(const struct rtable *rth) return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perturbation of rt_genid by a small quantity [1..256] - * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() - * many times (2^24) without giving recent rt_genid. - * Jenkins hash is strong enough that litle changes of rt_genid are OK. - */ -static void rt_cache_invalidate(struct net *net) -{ - unsigned char shuffle; - - get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &net->ipv4.rt_genid); -} - -/* - * delay < 0 : invalidate cache (fast : entries will be deleted later) - * delay >= 0 : invalidate & flush cache (can be long) - */ -void rt_cache_flush(struct net *net, int delay) +void rt_cache_flush(struct net *net) { - rt_cache_invalidate(net); + rt_genid_bump(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -934,12 +911,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } + rcu_read_unlock(); return mtu; } @@ -956,7 +935,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, dst->obsolete = DST_OBSOLETE_KILL; } else { rt->rt_pmtu = mtu; - dst_set_expires(&rt->dst, ip_rt_mtu_expires); + rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); } } @@ -1263,7 +1242,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - if (dst->flags & DST_NOCACHE) { + if (!list_empty(&rt->rt_uncached)) { spin_lock_bh(&rt_uncached_lock); list_del(&rt->rt_uncached); spin_unlock_bh(&rt_uncached_lock); @@ -2028,7 +2007,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) } dev_out = net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; - res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } @@ -2344,7 +2322,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(dev_net(in_dev->dev), 0); + rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL @@ -2353,16 +2331,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, size_t *lenp, loff_t *ppos) { if (write) { - int flush_delay; - ctl_table ctl; - struct net *net; - - memcpy(&ctl, __ctl, sizeof(ctl)); - ctl.data = &flush_delay; - proc_dointvec(&ctl, write, buffer, lenp, ppos); - - net = (struct net *)__ctl->extra1; - rt_cache_flush(net, flush_delay); + rt_cache_flush((struct net *)__ctl->extra1); return 0; } @@ -2532,8 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - get_random_bytes(&net->ipv4.rt_genid, - sizeof(net->ipv4.rt_genid)); + atomic_set(&net->rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1b5ce96707a..3e78c79b558 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -76,9 +76,9 @@ static int ipv4_local_port_range(ctl_table *table, int write, } -static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) +static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { - gid_t *data = table->data; + kgid_t *data = table->data; unsigned int seq; do { seq = read_seqbegin(&sysctl_local_ports.lock); @@ -89,12 +89,12 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, } /* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, gid_t range[2]) +static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) { - gid_t *data = table->data; + kgid_t *data = table->data; write_seqlock(&sysctl_local_ports.lock); - data[0] = range[0]; - data[1] = range[1]; + data[0] = low; + data[1] = high; write_sequnlock(&sysctl_local_ports.lock); } @@ -103,21 +103,33 @@ static int ipv4_ping_group_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct user_namespace *user_ns = current_user_ns(); int ret; - gid_t range[2]; + gid_t urange[2]; + kgid_t low, high; ctl_table tmp = { - .data = &range, - .maxlen = sizeof(range), + .data = &urange, + .maxlen = sizeof(urange), .mode = table->mode, .extra1 = &ip_ping_group_range_min, .extra2 = &ip_ping_group_range_max, }; - inet_get_ping_group_range_table(table, range, range + 1); + inet_get_ping_group_range_table(table, &low, &high); + urange[0] = from_kgid_munged(user_ns, low); + urange[1] = from_kgid_munged(user_ns, high); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); - if (write && ret == 0) - set_ping_group_range(table, range); + if (write && ret == 0) { + low = make_kgid(user_ns, urange[0]); + high = make_kgid(user_ns, urange[1]); + if (!gid_valid(low) || !gid_valid(high) || + (urange[1] < urange[0]) || gid_lt(high, low)) { + low = make_kgid(&init_user_ns, 1); + high = make_kgid(&init_user_ns, 0); + } + set_ping_group_range(table, low, high); + } return ret; } @@ -786,7 +798,7 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "ping_group_range", .data = &init_net.ipv4.sysctl_ping_group_range, - .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), + .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, }, @@ -830,8 +842,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) * Sane defaults - nobody may create ping sockets. * Boot scripts should set this to distro-specific group. */ - net->ipv4.sysctl_ping_group_range[0] = 1; - net->ipv4.sysctl_ping_group_range[1] = 0; + net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); tcp_init_mem(net); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4a1da..5f641934182 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1762,8 +1762,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ @@ -2325,10 +2331,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, tp->rx_opt.mss_clamp = opt.opt_val; break; case TCPOPT_WINDOW: - if (opt.opt_val > 14) - return -EFBIG; + { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; + + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = opt.opt_val; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85308b90df8..d377f4854cb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, bool is_dupack, + int prior_sacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); + int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_add_reno_sack(sk); } else do_lost = tcp_try_undo_partial(sk, pkts_acked); + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: if (flag & FLAG_DATA_ACKED) @@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } + newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); @@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - int newly_acked_sacked = 0; bool frto_cwnd = false; /* If the ack is older than previous acks @@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); pkts_acked = prior_packets - tp->packets_out; - newly_acked_sacked = (prior_packets - prior_sacked) - - (tp->packets_out - tp->sacked_out); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) @@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3718,8 +3718,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - newly_acked_sacked = tp->sacked_out - prior_sacked; - tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } @@ -4662,7 +4661,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); - else if (!sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); return; } @@ -5557,8 +5556,7 @@ no_ack: #endif if (eaten) kfree_skb_partial(skb, fragstolen); - else - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk, 0); return 0; } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 76782376401..be23a0b7b89 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ tp->mtu_info = info; - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + } else { + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + sock_hold(sk); + } goto out; } @@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; + inet_sk_rx_dst_set(newsk, skb); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); @@ -2390,7 +2393,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(const struct sock *sk, const struct request_sock *req, - struct seq_file *f, int i, int uid, int *len) + struct seq_file *f, int i, kuid_t uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); int ttd = req->expires - jiffies; @@ -2407,7 +2410,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->retrans, - uid, + from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->sk_refcnt), @@ -2458,7 +2461,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sk), + from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d9c9dcef2de..6ff7f10dce9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); - /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20dfd892c86..d04632673a9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); - - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { tcp_delack_timer_handler(sk); - - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { sk->sk_prot->mtu_reduced(sk); + __sock_put(sk); + } } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6df36ad55a3..b774a03bd1d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); @@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6f6d1aca3c3..79c8dbe59b5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1226,6 +1226,11 @@ try_again: if (unlikely(err)) { trace_kfree_skb(skb, udp_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } goto out_free; } @@ -2110,7 +2115,9 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 16d0960062b..d2f336ea82c 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -24,7 +24,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) return 0; - return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid, + return inet_sk_diag_fill(sk, NULL, skb, req, + sk_user_ns(NETLINK_CB(cb->skb).ssk), + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -69,6 +71,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = inet_sk_diag_fill(sk, NULL, rep, req, + sk_user_ns(NETLINK_CB(in_skb).ssk), NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { |