From f0d1f04f0a2f662b6b617e24d115fddcf6ef8723 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 Oct 2014 19:02:11 +0200 Subject: netfilter: fix wrong arithmetics regarding NFT_REJECT_ICMPX_MAX NFT_REJECT_ICMPX_MAX should be __NFT_REJECT_ICMPX_MAX - 1. nft_reject_icmp_code() and nft_reject_icmpv6_code() are called from the packet path, so BUG_ON in case we try to access an unknown abstracted ICMP code. This should not happen since we already validate this from nft_reject_{inet,bridge}_init(). Fixes: 51b0a5d ("netfilter: nft_reject: introduce icmp code abstraction for inet and bridge") Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_reject.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index ec8a456092a..57d3e1af563 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -72,7 +72,7 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_reject_dump); -static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { +static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = { [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH, [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH, [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH, @@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { int nft_reject_icmp_code(u8 code) { - if (code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; + BUG_ON(code > NFT_REJECT_ICMPX_MAX); return icmp_code_v4[code]; } @@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code) EXPORT_SYMBOL_GPL(nft_reject_icmp_code); -static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { +static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = { [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE, [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH, [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH, @@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { int nft_reject_icmpv6_code(u8 code) { - if (code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; + BUG_ON(code > NFT_REJECT_ICMPX_MAX); return icmp_code_v6[code]; } -- cgit v1.2.3-70-g09d2 From 2f29fed3f814f652a24b10c975b9d415a154fc9c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 7 Oct 2014 22:20:23 +0200 Subject: net: rfkill: kernel-doc warning fixes s/state/blocked Signed-off-by: Fabian Frederick Signed-off-by: John W. Linville --- net/rfkill/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index b3b16c070a7..fa7cd792791 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * This function sets the state of all switches of given type, * unless a specific switch is claimed by userspace (in which case, @@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) /** * rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. -- cgit v1.2.3-70-g09d2 From 59f35b810e3bb17efef2aa5feadffb66450190d9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 7 Oct 2014 22:31:32 +0200 Subject: netlabel: kernel-doc warning fix no secid argument in netlbl_cfg_unlbl_static_del Signed-off-by: Fabian Frederick Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_kapi.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 0b4692dd1c5..a845cd4cf21 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net, * @addr: IP address in network byte order (struct in[6]_addr) * @mask: address mask in network byte order (struct in[6]_addr) * @family: address family - * @secid: LSM secid value for the entry * @audit_info: NetLabel audit information * * Description: -- cgit v1.2.3-70-g09d2 From de3f0d0effecc2ccfbd679705519ed5b35f9cb33 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 9 Oct 2014 12:58:08 +0900 Subject: net: Missing @ before descriptions cause make xmldocs warning This patch fix following warning. Warning(.//net/core/skbuff.c:4142): No description found for parameter 'header_len' Warning(.//net/core/skbuff.c:4142): No description found for parameter 'data_len' Warning(.//net/core/skbuff.c:4142): No description found for parameter 'max_page_order' Warning(.//net/core/skbuff.c:4142): No description found for parameter 'errcode' Warning(.//net/core/skbuff.c:4142): No description found for parameter 'gfp_mask' Acutually the descriptions exist, but missing "@" in front. This problem start to happen when following commit was merged into Linus's tree during 3.18-rc1 merge period. commit 2e4e44107176d552f8bb1bb76053e850e3809841 net: add alloc_skb_with_frags() helper Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7b3df0d518a..a30d750647e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4126,11 +4126,11 @@ EXPORT_SYMBOL(skb_vlan_untag); /** * alloc_skb_with_frags - allocate skb with page frags * - * header_len: size of linear part - * data_len: needed length in frags - * max_page_order: max page order desired. - * errcode: pointer to error code if any - * gfp_mask: allocation mask + * @header_len: size of linear part + * @data_len: needed length in frags + * @max_page_order: max page order desired. + * @errcode: pointer to error code if any + * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ -- cgit v1.2.3-70-g09d2 From b8358d70ce1066dd4cc658cfdaf7862d459e2d78 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 9 Oct 2014 12:18:10 +0200 Subject: net_sched: restore qdisc quota fairness limits after bulk dequeue Restore the quota fairness between qdisc's, that we broke with commit 5772e9a346 ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE"). Before that commit, the quota in __qdisc_run() were in packets as dequeue_skb() would only dequeue a single packet, that assumption broke with bulk dequeue. We choose not to account for the number of packets inside the TSO/GSO packets (accessable via "skb_gso_segs"). As the previous fairness also had this "defect". Thus, GSO/TSO packets counts as a single packet. Further more, we choose to slack on accuracy, by allowing a bulk dequeue try_bulk_dequeue_skb() to exceed the "packets" limit, only limited by the BQL bytelimit. This is done because BQL prefers to get its full budget for appropriate feedback from TX completion. In future, we might consider reworking this further and, if it allows, switch to a time-based model, as suggested by Eric. Right now, we only restore old semantics. Joint work with Eric, Hannes, Daniel and Jesper. Hannes wrote the first patch in cooperation with Daniel and Jesper. Eric rewrote the patch. Fixes: 5772e9a346 ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE") Signed-off-by: Eric Dumazet Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 38d58e6cef0..6efca30894a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, - const struct netdev_queue *txq) + const struct netdev_queue *txq, + int *packets) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; @@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; + (*packets)++; /* GSO counts as one pkt */ } skb->next = NULL; } @@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, /* Note that dequeue_skb can possibly return a SKB list (via skb->next). * A requeued skb (via q->gso_skb) can also be a SKB list. */ -static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) +static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, + int *packets) { struct sk_buff *skb = q->gso_skb; const struct netdev_queue *txq = q->dev_queue; + *packets = 1; *validate = true; if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ @@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); if (skb && qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq); + try_bulk_dequeue_skb(q, skb, txq, packets); } } return skb; @@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct Qdisc *q) +static inline int qdisc_restart(struct Qdisc *q, int *packets) { struct netdev_queue *txq; struct net_device *dev; @@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate); + skb = dequeue_skb(q, &validate, packets); if (unlikely(!skb)) return 0; @@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q) void __qdisc_run(struct Qdisc *q) { int quota = weight_p; + int packets; - while (qdisc_restart(q)) { + while (qdisc_restart(q, &packets)) { /* * Ordered by possible occurrence: Postpone processing if * 1. we've exceeded packet quota * 2. another process needs the CPU; */ - if (--quota <= 0 || need_resched()) { + quota -= packets; + if (quota <= 0 || need_resched()) { __netif_schedule(q); break; } -- cgit v1.2.3-70-g09d2 From 38b3629adb8c048dda8b98e888505c79ed33ae92 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 9 Oct 2014 15:16:41 -0700 Subject: net: bpf: fix bpf syscall dependence on anon_inodes minimal configurations where EPOLL, PERF_EVENTS, etc are disabled, but NET is enabled, are failing to build with link error: kernel/built-in.o: In function `bpf_prog_load': syscall.c:(.text+0x3b728): undefined reference to `anon_inode_getfd' fix it by selecting ANON_INODES when NET is enabled Reported-by: Michal Sojka Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index d6b138e2c26..6272420a721 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,6 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS + select ANON_INODES ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even -- cgit v1.2.3-70-g09d2 From 8ea6e345a6123fa831e42cd8747f55892a58abff Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 10 Oct 2014 13:56:51 +0800 Subject: net: filter: fix the comments 1. sk_run_filter has been renamed, sk_filter() is using SK_RUN_FILTER. 2. Remove wrong comments about storing intermediate value. 3. replace sk_run_filter with __bpf_prog_run for check_load_and_stores's comments Cc: Alexei Starovoitov Signed-off-by: Li RongQing Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index fcd3f6742a6..647b12265e1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -51,9 +51,9 @@ * @skb: buffer to filter * * Run the filter code and then cut skb->data to correct size returned by - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller + * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to sk_run_filter. It returns 0 if the packet should + * wrapper to SK_RUN_FILTER. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -565,12 +565,9 @@ err: } /* Security: - * - * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). * * As we dont want to clear mem[] array for each packet going through - * sk_run_filter(), we check that filter loaded by user never try to read + * __bpf_prog_run(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure * a malicious user doesn't try to abuse us. */ -- cgit v1.2.3-70-g09d2 From 5af7fb6e3e92c2797ee30d66138cf6aa6b29240d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 10 Oct 2014 12:09:12 -0700 Subject: flow-dissector: Fix alignment issue in __skb_flow_get_ports This patch addresses a kernel unaligned access bug seen on a sparc64 system with an igb adapter. Specifically the __skb_flow_get_ports was returning a be32 pointer which was then having the value directly returned. In order to prevent this it is actually easier to simply not populate the ports or address values when an skb is not present. In this case the assumption is that the data isn't needed and rather than slow down the faster aligned accesses by making them have to assume the unaligned path on architectures that don't support efficent unaligned access it makes more sense to simply switch off the bits that were copying the source and destination address/port for the case where we only care about the protocol types and lengths which are normally 16 bit fields anyway. Reported-by: David S. Miller Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8560dea5880..45084938c40 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -100,6 +100,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; + /* skip the address processing if skb is NULL. The assumption + * here is that if there is no skb we are not looking for flow + * info but lengths and protocols. + */ + if (!skb) + break; + iph_to_flow_copy_addrs(flow, iph); break; } @@ -114,17 +121,15 @@ ipv6: return false; ip_proto = iph->nexthdr; - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); - /* skip the flow label processing if skb is NULL. The - * assumption here is that if there is no skb we are not - * looking for flow info as much as we are length. - */ + /* see comment above in IPv4 section */ if (!skb) break; + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -231,9 +236,13 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; + /* unless skb is set we don't need to record port info */ + if (skb) + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + return true; } EXPORT_SYMBOL(__skb_flow_dissect); @@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, switch (keys->ip_proto) { case IPPROTO_TCP: { - const struct tcphdr *tcph; - struct tcphdr _tcph; + /* access doff as u8 to avoid unaligned access */ + const u8 *doff; + u8 _doff; - tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), - data, hlen, &_tcph); - if (!tcph) + doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), + data, hlen, &_doff); + if (!doff) return poff; - poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); break; } case IPPROTO_UDP: -- cgit v1.2.3-70-g09d2 From 4c450583d9d0a8241f0f62b80038ac47b43ff843 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Oct 2014 04:48:18 -0700 Subject: net: fix races in page->_count manipulation This is illegal to use atomic_set(&page->_count, ...) even if we 'own' the page. Other entities in the kernel need to use get_page_unless_zero() to get a reference to the page before testing page properties, so we could loose a refcount increment. The only case it is valid is when page->_count is 0 Fixes: 540eb7bf0bbed ("net: Update alloc frag to reduce get/put page usage and recycle pages") Signed-off-by: Eric Dumaze Signed-off-by: David S. Miller --- net/core/skbuff.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a30d750647e..829d013745a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -360,18 +360,29 @@ refill: goto end; } nc->frag.size = PAGE_SIZE << order; -recycle: - atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, + &nc->frag.page->_count); nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; nc->frag.offset = 0; } if (nc->frag.offset + fragsz > nc->frag.size) { - /* avoid unnecessary locked operations if possible */ - if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || - atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) - goto recycle; - goto refill; + if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { + if (!atomic_sub_and_test(nc->pagecnt_bias, + &nc->frag.page->_count)) + goto refill; + /* OK, page count is 0, we can safely set it */ + atomic_set(&nc->frag.page->_count, + NETDEV_PAGECNT_MAX_BIAS); + } else { + atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, + &nc->frag.page->_count); + } + nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; + nc->frag.offset = 0; } data = page_address(nc->frag.page) + nc->frag.offset; -- cgit v1.2.3-70-g09d2