From 27f852282ab9a028f57da96d05c26f38c424a315 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 25 Mar 2013 01:08:20 +0000 Subject: xen-netback: remove skb in xen_netbk_alloc_page This variable is never used. Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index cd49ba94963..aa28550fc9b 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -942,7 +942,6 @@ static int netbk_count_requests(struct xenvif *vif, } static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, - struct sk_buff *skb, u16 pending_idx) { struct page *page; @@ -976,7 +975,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, index = pending_index(netbk->pending_cons++); pending_idx = netbk->pending_ring[index]; - page = xen_netbk_alloc_page(netbk, skb, pending_idx); + page = xen_netbk_alloc_page(netbk, pending_idx); if (!page) goto err; @@ -1381,7 +1380,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) } /* XXX could copy straight to head */ - page = xen_netbk_alloc_page(netbk, skb, pending_idx); + page = xen_netbk_alloc_page(netbk, pending_idx); if (!page) { kfree_skb(skb); netbk_tx_err(vif, &txreq, idx); -- cgit v1.2.3-70-g09d2 From f9ca8f74399f9195fd8e01f67a8424a8d33efa55 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Mar 2013 20:19:58 +0000 Subject: netback: set transport header before passing it to kernel Currently, for the packets receives from netback, before doing header check, kernel just reset the transport header in netif_receive_skb() which pretends non l4 header. This is suboptimal for precise packet length estimation (introduced in 1def9238: net_sched: more precise pkt_len computation) which needs correct l4 header for gso packets. The patch just reuse the header probed by netback for partial checksum packets and tries to use skb_flow_dissect() for other cases, if both fail, just pretend no l4 header. Cc: Eric Dumazet Cc: Ian Campbell Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index aa28550fc9b..fc8faa74b25 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -1184,6 +1185,7 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) if (th >= skb_tail_pointer(skb)) goto out; + skb_set_transport_header(skb, 4 * iph->ihl); skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: @@ -1495,6 +1497,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) skb->dev = vif->dev; skb->protocol = eth_type_trans(skb, skb->dev); + skb_reset_network_header(skb); if (checksum_setup(vif, skb)) { netdev_dbg(vif->dev, @@ -1503,6 +1506,15 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) continue; } + if (!skb_transport_header_was_set(skb)) { + struct flow_keys keys; + + if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_reset_transport_header(skb); + } + vif->dev->stats.rx_bytes += skb->len; vif->dev->stats.rx_packets++; -- cgit v1.2.3-70-g09d2 From 40893fd0fd4e0eda8c6a53db6a8e6013b2d44c16 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 26 Mar 2013 23:11:22 +0000 Subject: net: switch to use skb_probe_transport_header() Switch to use the new help skb_probe_transport_header() to do the l4 header probing for untrusted sources. For packets with partial csum, the header should already been set by skb_partial_csum_set(). Cc: Eric Dumazet Signed-off-by: Jason Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 9 +-------- drivers/net/tun.c | 10 +--------- drivers/net/xen-netback/netback.c | 10 +--------- net/packet/af_packet.c | 22 +++------------------- 4 files changed, 6 insertions(+), 45 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index acf6450ceff..59e9605de31 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -21,7 +21,6 @@ #include #include #include -#include /* * A macvtap queue is the central object of this driver, it connects @@ -646,7 +645,6 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, int vnet_hdr_len = 0; int copylen = 0; bool zerocopy = false; - struct flow_keys keys; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = q->vnet_hdr_sz; @@ -727,12 +725,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, goto err_kfree; } - if (skb->ip_summed == CHECKSUM_PARTIAL) - skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_set_transport_header(skb, ETH_HLEN); + skb_probe_transport_header(skb, ETH_HLEN); rcu_read_lock_bh(); vlan = rcu_dereference_bh(q->vlan); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 48cd73a2dc5..29538e6e914 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -70,7 +70,6 @@ #include #include -#include /* Uncomment to enable debugging */ /* #define TUN_DEBUG 1 */ @@ -1050,7 +1049,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, bool zerocopy = false; int err; u32 rxhash; - struct flow_keys keys; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) > total_len) @@ -1205,13 +1203,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } skb_reset_network_header(skb); - - if (skb->ip_summed == CHECKSUM_PARTIAL) - skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_reset_transport_header(skb); + skb_probe_transport_header(skb, 0); rxhash = skb_get_rxhash(skb); netif_rx_ni(skb); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index fc8faa74b25..83905a97c56 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -39,7 +39,6 @@ #include #include -#include #include #include @@ -1506,14 +1505,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) continue; } - if (!skb_transport_header_was_set(skb)) { - struct flow_keys keys; - - if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_reset_transport_header(skb); - } + skb_probe_transport_header(skb, 0); vif->dev->stats.rx_bytes += skb->len; vif->dev->stats.rx_packets++; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 83fdd0a87eb..8e4644ff8d3 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,7 +88,6 @@ #include #include #include -#include #ifdef CONFIG_INET #include @@ -1413,7 +1412,6 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, __be16 proto = 0; int err; int extra_len = 0; - struct flow_keys keys; /* * Get and verify the address. @@ -1514,10 +1512,7 @@ retry: if (unlikely(extra_len == 4)) skb->no_fcs = 1; - if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_reset_transport_header(skb); + skb_probe_transport_header(skb, 0); dev_queue_xmit(skb); rcu_read_unlock(); @@ -1925,7 +1920,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, struct page *page; void *data; int err; - struct flow_keys keys; ph.raw = frame; @@ -1950,11 +1944,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - - if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_reset_transport_header(skb); + skb_probe_transport_header(skb, 0); if (po->tp_tx_has_off) { int off_min, off_max, off; @@ -2212,7 +2202,6 @@ static int packet_snd(struct socket *sock, unsigned short gso_type = 0; int hlen, tlen; int extra_len = 0; - struct flow_keys keys; /* * Get and verify the address. @@ -2365,12 +2354,7 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } - if (skb->ip_summed == CHECKSUM_PARTIAL) - skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); - else - skb_set_transport_header(skb, reserve); + skb_probe_transport_header(skb, reserve); if (unlikely(extra_len == 4)) skb->no_fcs = 1; -- cgit v1.2.3-70-g09d2 From 9eaee8beeeb3bca0d9b14324fd9d467d48db784c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 10 Apr 2013 10:54:46 +0000 Subject: xen-netback: fix sparse warning Fix warning about 0 used as NULL. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 83905a97c56..01a33cc58cf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1551,7 +1551,7 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, xenvif_put(vif); - netbk->mmap_pages[pending_idx]->mapping = 0; + netbk->mmap_pages[pending_idx]->mapping = NULL; put_page(netbk->mmap_pages[pending_idx]); netbk->mmap_pages[pending_idx] = NULL; } -- cgit v1.2.3-70-g09d2 From bea893364784582d03494348a91b2c63e0d540d3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Apr 2013 20:35:29 +0000 Subject: xen-netback: switch to use skb_partial_csum_set() Switch to use skb_partial_csum_set() to simplify the codes. Cc: Ian Campbell Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 01a33cc58cf..9f718440426 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1156,7 +1156,6 @@ static int netbk_set_skb_gso(struct xenvif *vif, static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) { struct iphdr *iph; - unsigned char *th; int err = -EPROTO; int recalculate_partial_csum = 0; @@ -1180,28 +1179,26 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) goto out; iph = (void *)skb->data; - th = skb->data + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - - skb_set_transport_header(skb, 4 * iph->ihl); - skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct tcphdr, check))) + goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = (struct tcphdr *)th; + struct tcphdr *tcph = tcp_hdr(skb); tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct udphdr, check))) + goto out; if (recalculate_partial_csum) { - struct udphdr *udph = (struct udphdr *)th; + struct udphdr *udph = udp_hdr(skb); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_UDP, 0); @@ -1215,9 +1212,6 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) goto out; } - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - err = 0; out: -- cgit v1.2.3-70-g09d2 From 2810e5b9a7731ca5fce22bfbe12c96e16ac44b6f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 22 Apr 2013 02:20:42 +0000 Subject: xen-netback: coalesce slots in TX path and fix regressions This patch tries to coalesce tx requests when constructing grant copy structures. It enables netback to deal with situation when frontend's MAX_SKB_FRAGS is larger than backend's MAX_SKB_FRAGS. With the help of coalescing, this patch tries to address two regressions avoid reopening the security hole in XSA-39. Regression 1. The reduction of the number of supported ring entries (slots) per packet (from 18 to 17). This regression has been around for some time but remains unnoticed until XSA-39 security fix. This is fixed by coalescing slots. Regression 2. The XSA-39 security fix turning "too many frags" errors from just dropping the packet to a fatal error and disabling the VIF. This is fixed by coalescing slots (handling 18 slots when backend's MAX_SKB_FRAGS is 17) which rules out false positive (using 18 slots is legit) and dropping packets using 19 to `max_skb_slots` slots. To avoid reopening security hole in XSA-39, frontend sending packet using more than max_skb_slots is considered malicious. The behavior of netback for packet is thus: 1-18 slots: valid 19-max_skb_slots slots: drop and respond with an error max_skb_slots+ slots: fatal error max_skb_slots is configurable by admin, default value is 20. Also change variable name from "frags" to "slots" in netbk_count_requests. Please note that RX path still has dependency on MAX_SKB_FRAGS. This will be fixed with separate patch. Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 275 +++++++++++++++++++++++++++++++------- include/xen/interface/io/netif.h | 18 +++ 2 files changed, 242 insertions(+), 51 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 9f718440426..d9292c59789 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -47,11 +47,25 @@ #include #include +/* + * This is the maximum slots a skb can have. If a guest sends a skb + * which exceeds this limit it is considered malicious. + */ +#define MAX_SKB_SLOTS_DEFAULT 20 +static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT; +module_param(max_skb_slots, uint, 0444); + +typedef unsigned int pending_ring_idx_t; +#define INVALID_PENDING_RING_IDX (~0U) + struct pending_tx_info { - struct xen_netif_tx_request req; + struct xen_netif_tx_request req; /* coalesced tx request */ struct xenvif *vif; + pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX + * if it is head of one or more tx + * reqs + */ }; -typedef unsigned int pending_ring_idx_t; struct netbk_rx_meta { int id; @@ -102,7 +116,11 @@ struct xen_netbk { atomic_t netfront_count; struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; + /* Coalescing tx requests before copying makes number of grant + * copy ops greater or equal to number of slots required. In + * worst case a tx request consumes 2 gnttab_copy. + */ + struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; u16 pending_ring[MAX_PENDING_REQS]; @@ -118,6 +136,16 @@ struct xen_netbk { static struct xen_netbk *xen_netbk; static int xen_netbk_group_nr; +/* + * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of + * one or more merged tx requests, otherwise it is the continuation of + * previous tx request. + */ +static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) +{ + return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; +} + void xen_netbk_add_xenvif(struct xenvif *vif) { int i; @@ -250,6 +278,7 @@ static int max_required_rx_slots(struct xenvif *vif) { int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (vif->can_sg || vif->gso || vif->gso_prefix) max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ @@ -657,6 +686,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) __skb_queue_tail(&rxq, skb); /* Filled the batch queue? */ + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) break; } @@ -898,47 +928,78 @@ static void netbk_fatal_tx_err(struct xenvif *vif) static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, + RING_IDX first_idx, struct xen_netif_tx_request *txp, int work_to_do) { RING_IDX cons = vif->tx.req_cons; - int frags = 0; + int slots = 0; + int drop_err = 0; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { - if (frags >= work_to_do) { - netdev_err(vif->dev, "Need more frags\n"); + if (slots >= work_to_do) { + netdev_err(vif->dev, + "Asked for %d slots but exceeds this limit\n", + work_to_do); netbk_fatal_tx_err(vif); return -ENODATA; } - if (unlikely(frags >= MAX_SKB_FRAGS)) { - netdev_err(vif->dev, "Too many frags\n"); + /* This guest is really using too many slots and + * considered malicious. + */ + if (unlikely(slots >= max_skb_slots)) { + netdev_err(vif->dev, + "Malicious frontend using %d slots, threshold %u\n", + slots, max_skb_slots); netbk_fatal_tx_err(vif); return -E2BIG; } - memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), + /* Xen network protocol had implicit dependency on + * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the + * historical MAX_SKB_FRAGS value 18 to honor the same + * behavior as before. Any packet using more than 18 + * slots but less than max_skb_slots slots is dropped + */ + if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Too many slots (%d) exceeding limit (%d), dropping packet\n", + slots, XEN_NETIF_NR_SLOTS_MIN); + drop_err = -E2BIG; + } + + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); if (txp->size > first->size) { - netdev_err(vif->dev, "Frag is bigger than frame.\n"); + netdev_err(vif->dev, + "Invalid tx request, slot size %u > remaining size %u\n", + txp->size, first->size); netbk_fatal_tx_err(vif); return -EIO; } first->size -= txp->size; - frags++; + slots++; if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { - netdev_err(vif->dev, "txp->offset: %x, size: %u\n", + netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", txp->offset, txp->size); netbk_fatal_tx_err(vif); return -EINVAL; } } while ((txp++)->flags & XEN_NETTXF_more_data); - return frags; + + if (drop_err) { + netbk_tx_err(vif, first, first_idx + slots); + return drop_err; + } + + return slots; } static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, @@ -962,48 +1023,114 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = *((u16 *)skb->data); - int i, start; + u16 head_idx = 0; + int slot, start; + struct page *page; + pending_ring_idx_t index, start_idx = 0; + uint16_t dst_offset; + unsigned int nr_slots; + struct pending_tx_info *first = NULL; + + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN. + */ + nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); - for (i = start; i < shinfo->nr_frags; i++, txp++) { - struct page *page; - pending_ring_idx_t index; + /* Coalesce tx requests, at this point the packet passed in + * should be <= 64K. Any packets larger than 64K have been + * handled in netbk_count_requests(). + */ + for (shinfo->nr_frags = slot = start; slot < nr_slots; + shinfo->nr_frags++) { struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; - index = pending_index(netbk->pending_cons++); - pending_idx = netbk->pending_ring[index]; - page = xen_netbk_alloc_page(netbk, pending_idx); + page = alloc_page(GFP_KERNEL|__GFP_COLD); if (!page) goto err; - gop->source.u.ref = txp->gref; - gop->source.domid = vif->domid; - gop->source.offset = txp->offset; - - gop->dest.u.gmfn = virt_to_mfn(page_address(page)); - gop->dest.domid = DOMID_SELF; - gop->dest.offset = txp->offset; - - gop->len = txp->size; - gop->flags = GNTCOPY_source_gref; + dst_offset = 0; + first = NULL; + while (dst_offset < PAGE_SIZE && slot < nr_slots) { + gop->flags = GNTCOPY_source_gref; + + gop->source.u.ref = txp->gref; + gop->source.domid = vif->domid; + gop->source.offset = txp->offset; + + gop->dest.domid = DOMID_SELF; + + gop->dest.offset = dst_offset; + gop->dest.u.gmfn = virt_to_mfn(page_address(page)); + + if (dst_offset + txp->size > PAGE_SIZE) { + /* This page can only merge a portion + * of tx request. Do not increment any + * pointer / counter here. The txp + * will be dealt with in future + * rounds, eventually hitting the + * `else` branch. + */ + gop->len = PAGE_SIZE - dst_offset; + txp->offset += gop->len; + txp->size -= gop->len; + dst_offset += gop->len; /* quit loop */ + } else { + /* This tx request can be merged in the page */ + gop->len = txp->size; + dst_offset += gop->len; + + index = pending_index(netbk->pending_cons++); + + pending_idx = netbk->pending_ring[index]; + + memcpy(&pending_tx_info[pending_idx].req, txp, + sizeof(*txp)); + xenvif_get(vif); + + pending_tx_info[pending_idx].vif = vif; + + /* Poison these fields, corresponding + * fields for head tx req will be set + * to correct values after the loop. + */ + netbk->mmap_pages[pending_idx] = (void *)(~0UL); + pending_tx_info[pending_idx].head = + INVALID_PENDING_RING_IDX; + + if (!first) { + first = &pending_tx_info[pending_idx]; + start_idx = index; + head_idx = pending_idx; + } + + txp++; + slot++; + } - gop++; + gop++; + } - memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); - xenvif_get(vif); - pending_tx_info[pending_idx].vif = vif; - frag_set_pending_idx(&frags[i], pending_idx); + first->req.offset = 0; + first->req.size = dst_offset; + first->head = start_idx; + set_page_ext(page, netbk, head_idx); + netbk->mmap_pages[head_idx] = page; + frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); } + BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); + return gop; err: /* Unwind, freeing all pages and sending error responses. */ - while (i-- > start) { - xen_netbk_idx_release(netbk, frag_get_pending_idx(&frags[i]), - XEN_NETIF_RSP_ERROR); + while (shinfo->nr_frags-- > start) { + xen_netbk_idx_release(netbk, + frag_get_pending_idx(&frags[shinfo->nr_frags]), + XEN_NETIF_RSP_ERROR); } /* The head too, if necessary. */ if (start) @@ -1019,8 +1146,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, struct gnttab_copy *gop = *gopp; u16 pending_idx = *((u16 *)skb->data); struct skb_shared_info *shinfo = skb_shinfo(skb); + struct pending_tx_info *tx_info; int nr_frags = shinfo->nr_frags; int i, err, start; + u16 peek; /* peek into next tx request */ /* Check status of header. */ err = gop->status; @@ -1032,11 +1161,20 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, for (i = start; i < nr_frags; i++) { int j, newerr; + pending_ring_idx_t head; pending_idx = frag_get_pending_idx(&shinfo->frags[i]); + tx_info = &netbk->pending_tx_info[pending_idx]; + head = tx_info->head; /* Check error status: if okay then remember grant handle. */ - newerr = (++gop)->status; + do { + newerr = (++gop)->status; + if (newerr) + break; + peek = netbk->pending_ring[pending_index(++head)]; + } while (!pending_tx_is_head(netbk, peek)); + if (likely(!newerr)) { /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) @@ -1256,11 +1394,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; + struct xen_netif_tx_request txfrags[max_skb_slots]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1321,7 +1460,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) continue; } - ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); + ret = netbk_count_requests(vif, &txreq, idx, + txfrags, work_to_do); if (unlikely(ret < 0)) continue; @@ -1348,7 +1488,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < MAX_SKB_FRAGS) ? + ret < XEN_NETIF_NR_SLOTS_MIN) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1398,6 +1538,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) memcpy(&netbk->pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); netbk->pending_tx_info[pending_idx].vif = vif; + netbk->pending_tx_info[pending_idx].head = index; *((u16 *)skb->data) = pending_idx; __skb_put(skb, data_len); @@ -1528,7 +1669,10 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, { struct xenvif *vif; struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; + pending_ring_idx_t head; + u16 peek; /* peek into next tx request */ + + BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL)); /* Already complete? */ if (netbk->mmap_pages[pending_idx] == NULL) @@ -1537,19 +1681,40 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, pending_tx_info = &netbk->pending_tx_info[pending_idx]; vif = pending_tx_info->vif; + head = pending_tx_info->head; - make_tx_response(vif, &pending_tx_info->req, status); + BUG_ON(!pending_tx_is_head(netbk, head)); + BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx); - index = pending_index(netbk->pending_prod++); - netbk->pending_ring[index] = pending_idx; + do { + pending_ring_idx_t index; + pending_ring_idx_t idx = pending_index(head); + u16 info_idx = netbk->pending_ring[idx]; - xenvif_put(vif); + pending_tx_info = &netbk->pending_tx_info[info_idx]; + make_tx_response(vif, &pending_tx_info->req, status); + + /* Setting any number other than + * INVALID_PENDING_RING_IDX indicates this slot is + * starting a new packet / ending a previous packet. + */ + pending_tx_info->head = 0; + + index = pending_index(netbk->pending_prod++); + netbk->pending_ring[index] = netbk->pending_ring[info_idx]; - netbk->mmap_pages[pending_idx]->mapping = NULL; + xenvif_put(vif); + + peek = netbk->pending_ring[pending_index(++head)]; + + } while (!pending_tx_is_head(netbk, peek)); + + netbk->mmap_pages[pending_idx]->mapping = 0; put_page(netbk->mmap_pages[pending_idx]); netbk->mmap_pages[pending_idx] = NULL; } + static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st) @@ -1602,8 +1767,9 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && - !list_empty(&netbk->net_schedule_list)) + if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) return 1; return 0; @@ -1686,6 +1852,13 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; + if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) { + printk(KERN_INFO + "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n", + max_skb_slots, XEN_NETIF_NR_SLOTS_MIN); + max_skb_slots = XEN_NETIF_NR_SLOTS_MIN; + } + xen_netbk_group_nr = num_online_cpus(); xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); if (!xen_netbk) diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index 58fadcac33a..3ef3fe05ee9 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -12,6 +12,24 @@ #include #include +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + /* * Notifications after enqueuing any type of message should be conditional on * the appropriate req_event or rsp_event field in the shared ring. -- cgit v1.2.3-70-g09d2 From 03393fd5cc2b6cdeec32b704ecba64dbb0feae3c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 22 Apr 2013 02:20:43 +0000 Subject: xen-netback: don't disconnect frontend when seeing oversize packet Some frontend drivers are sending packets > 64 KiB in length. This length overflows the length field in the first slot making the following slots have an invalid length. Turn this error back into a non-fatal error by dropping the packet. To avoid having the following slots having fatal errors, consume all slots in the packet. This does not reopen the security hole in XSA-39 as if the packet as an invalid number of slots it will still hit fatal error case. Signed-off-by: David Vrabel Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d9292c59789..a2865f17c66 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -975,12 +975,22 @@ static int netbk_count_requests(struct xenvif *vif, memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); - if (txp->size > first->size) { - netdev_err(vif->dev, - "Invalid tx request, slot size %u > remaining size %u\n", - txp->size, first->size); - netbk_fatal_tx_err(vif); - return -EIO; + + /* If the guest submitted a frame >= 64 KiB then + * first->size overflowed and following slots will + * appear to be larger than the frame. + * + * This cannot be fatal error as there are buggy + * frontends that do this. + * + * Consume all slots and drop the packet. + */ + if (!drop_err && txp->size > first->size) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Invalid tx request, slot size %u > remaining size %u\n", + txp->size, first->size); + drop_err = -EIO; } first->size -= txp->size; -- cgit v1.2.3-70-g09d2 From ac69c26e7accb04ae2cb9ab0872068983a42b3c8 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 2 May 2013 00:43:57 +0000 Subject: xen-netback: remove redundent parameter in netbk_count_requests Tracking down from the caller, first_idx is always equal to vif->tx.req_cons. Remove it to avoid confusion. Suggested-by: Jan Beulich Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index a2865f17c66..c44772d6bba 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -928,7 +928,6 @@ static void netbk_fatal_tx_err(struct xenvif *vif) static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, - RING_IDX first_idx, struct xen_netif_tx_request *txp, int work_to_do) { @@ -1005,7 +1004,7 @@ static int netbk_count_requests(struct xenvif *vif, } while ((txp++)->flags & XEN_NETTXF_more_data); if (drop_err) { - netbk_tx_err(vif, first, first_idx + slots); + netbk_tx_err(vif, first, cons + slots); return drop_err; } @@ -1470,8 +1469,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) continue; } - ret = netbk_count_requests(vif, &txreq, idx, - txfrags, work_to_do); + ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); if (unlikely(ret < 0)) continue; -- cgit v1.2.3-70-g09d2 From 59ccb4ebbc35e36a3c143f2d1355deb75c2e628f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 2 May 2013 00:43:58 +0000 Subject: xen-netback: avoid allocating variable size array on stack Tune xen_netbk_count_requests to not touch working array beyond limit, so that we can make working array size constant. Suggested-by: Jan Beulich Signed-off-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index c44772d6bba..ce8109f7d56 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -934,11 +934,14 @@ static int netbk_count_requests(struct xenvif *vif, RING_IDX cons = vif->tx.req_cons; int slots = 0; int drop_err = 0; + int more_data; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { + struct xen_netif_tx_request dropped_tx = { 0 }; + if (slots >= work_to_do) { netdev_err(vif->dev, "Asked for %d slots but exceeds this limit\n", @@ -972,6 +975,9 @@ static int netbk_count_requests(struct xenvif *vif, drop_err = -E2BIG; } + if (drop_err) + txp = &dropped_tx; + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); @@ -1001,7 +1007,13 @@ static int netbk_count_requests(struct xenvif *vif, netbk_fatal_tx_err(vif); return -EINVAL; } - } while ((txp++)->flags & XEN_NETTXF_more_data); + + more_data = txp->flags & XEN_NETTXF_more_data; + + if (!drop_err) + txp++; + + } while (more_data); if (drop_err) { netbk_tx_err(vif, first, cons + slots); @@ -1408,7 +1420,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[max_skb_slots]; + struct xen_netif_tx_request txfrags[XEN_NETIF_NR_SLOTS_MIN]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; -- cgit v1.2.3-70-g09d2 From 376414945d15aa636e65f7e773c1e398b7a21cb9 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 2 May 2013 00:43:59 +0000 Subject: xen-netback: better names for thresholds This patch only changes some names to avoid confusion. In this patch we have: MAX_SKB_SLOTS_DEFAULT -> FATAL_SKB_SLOTS_DEFAULT max_skb_slots -> fatal_skb_slots #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN The fatal_skb_slots is the threshold to determine whether a packet is malicious. XEN_NETBK_LEGACY_SLOTS_MAX is the maximum slots a valid packet can have at this point. It is defined to be XEN_NETIF_NR_SLOTS_MIN because that's guaranteed to be supported by all backends. Suggested-by: Ian Campbell Signed-off-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 49 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 20 deletions(-) (limited to 'drivers/net/xen-netback/netback.c') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index ce8109f7d56..37984e6d4e9 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -51,9 +51,17 @@ * This is the maximum slots a skb can have. If a guest sends a skb * which exceeds this limit it is considered malicious. */ -#define MAX_SKB_SLOTS_DEFAULT 20 -static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT; -module_param(max_skb_slots, uint, 0444); +#define FATAL_SKB_SLOTS_DEFAULT 20 +static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; +module_param(fatal_skb_slots, uint, 0444); + +/* + * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating + * the maximum slots a valid packet can use. Now this value is defined + * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by + * all backend. + */ +#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN typedef unsigned int pending_ring_idx_t; #define INVALID_PENDING_RING_IDX (~0U) @@ -953,25 +961,26 @@ static int netbk_count_requests(struct xenvif *vif, /* This guest is really using too many slots and * considered malicious. */ - if (unlikely(slots >= max_skb_slots)) { + if (unlikely(slots >= fatal_skb_slots)) { netdev_err(vif->dev, "Malicious frontend using %d slots, threshold %u\n", - slots, max_skb_slots); + slots, fatal_skb_slots); netbk_fatal_tx_err(vif); return -E2BIG; } /* Xen network protocol had implicit dependency on - * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the - * historical MAX_SKB_FRAGS value 18 to honor the same - * behavior as before. Any packet using more than 18 - * slots but less than max_skb_slots slots is dropped + * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to + * the historical MAX_SKB_FRAGS value 18 to honor the + * same behavior as before. Any packet using more than + * 18 slots but less than fatal_skb_slots slots is + * dropped */ - if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) { + if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { if (net_ratelimit()) netdev_dbg(vif->dev, "Too many slots (%d) exceeding limit (%d), dropping packet\n", - slots, XEN_NETIF_NR_SLOTS_MIN); + slots, XEN_NETBK_LEGACY_SLOTS_MAX); drop_err = -E2BIG; } @@ -1053,7 +1062,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct pending_tx_info *first = NULL; /* At this point shinfo->nr_frags is in fact the number of - * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN. + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ nr_slots = shinfo->nr_frags; @@ -1415,12 +1424,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[XEN_NETIF_NR_SLOTS_MIN]; + struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1508,7 +1517,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < XEN_NETIF_NR_SLOTS_MIN) ? + ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1787,7 +1796,7 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) return 1; @@ -1872,11 +1881,11 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; - if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) { + if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { printk(KERN_INFO - "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n", - max_skb_slots, XEN_NETIF_NR_SLOTS_MIN); - max_skb_slots = XEN_NETIF_NR_SLOTS_MIN; + "xen-netback: fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", + fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); + fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; } xen_netbk_group_nr = num_online_cpus(); -- cgit v1.2.3-70-g09d2