summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/bnx2.c2
-rw-r--r--drivers/net/loopback.c67
-rw-r--r--drivers/net/tun.c105
-rw-r--r--include/linux/if_tun.h1
-rw-r--r--include/linux/skbuff.h4
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--net/bridge/br_device.c15
-rw-r--r--net/core/datagram.c87
-rw-r--r--net/core/dev.c47
-rw-r--r--net/core/skbuff.c12
-rw-r--r--net/dccp/input.c12
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/sched/sch_api.c22
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_generic.c68
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_prio.c4
-rw-r--r--net/sched/sch_tbf.c11
18 files changed, 279 insertions, 190 deletions
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d4548101e49..2486a656f12 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -35,8 +35,8 @@
#include <linux/time.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
#include <linux/if_vlan.h>
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
#define BCM_VLAN 1
#endif
#include <net/ip.h>
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 49f6bc036a9..3b43bfd85a0 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -64,68 +64,6 @@ struct pcpu_lstats {
unsigned long bytes;
};
-/* KISS: just allocate small chunks and copy bits.
- *
- * So, in fact, this is documentation, explaining what we expect
- * of largesending device modulo TCP checksum, which is ignored for loopback.
- */
-
-#ifdef LOOPBACK_TSO
-static void emulate_large_send_offload(struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
- struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
- (iph->ihl * 4));
- unsigned int doffset = (iph->ihl + th->doff) * 4;
- unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
- unsigned int offset = 0;
- u32 seq = ntohl(th->seq);
- u16 id = ntohs(iph->id);
-
- while (offset + doffset < skb->len) {
- unsigned int frag_size = min(mtu, skb->len - offset) - doffset;
- struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC);
-
- if (!nskb)
- break;
- skb_reserve(nskb, 32);
- skb_set_mac_header(nskb, -ETH_HLEN);
- skb_reset_network_header(nskb);
- iph = ip_hdr(nskb);
- skb_copy_to_linear_data(nskb, skb_network_header(skb),
- doffset);
- if (skb_copy_bits(skb,
- doffset + offset,
- nskb->data + doffset,
- frag_size))
- BUG();
- skb_put(nskb, doffset + frag_size);
- nskb->ip_summed = CHECKSUM_UNNECESSARY;
- nskb->dev = skb->dev;
- nskb->priority = skb->priority;
- nskb->protocol = skb->protocol;
- nskb->dst = dst_clone(skb->dst);
- memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
- nskb->pkt_type = skb->pkt_type;
-
- th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
- iph->tot_len = htons(frag_size + doffset);
- iph->id = htons(id);
- iph->check = 0;
- iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl);
- th->seq = htonl(seq);
- if (offset + doffset + frag_size < skb->len)
- th->fin = th->psh = 0;
- netif_rx(nskb);
- offset += frag_size;
- seq += frag_size;
- id++;
- }
-
- dev_kfree_skb(skb);
-}
-#endif /* LOOPBACK_TSO */
-
/*
* The higher levels take care of making this non-reentrant (it's
* called with bh's disabled).
@@ -137,9 +75,6 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
skb_orphan(skb);
skb->protocol = eth_type_trans(skb,dev);
-#ifndef LOOPBACK_MUST_CHECKSUM
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-#endif
#ifdef LOOPBACK_TSO
if (skb_is_gso(skb)) {
@@ -234,9 +169,7 @@ static void loopback_setup(struct net_device *dev)
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
dev->flags = IFF_LOOPBACK;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
-#ifdef LOOPBACK_TSO
| NETIF_F_TSO
-#endif
| NETIF_F_NO_CSUM
| NETIF_F_HIGHDMA
| NETIF_F_LLTX
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e6bbc639c2d..6daea0c9186 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -358,6 +358,66 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
return mask;
}
+/* prepad is the amount to reserve at front. len is length after that.
+ * linear is a hint as to how much to copy (usually headers). */
+static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear,
+ gfp_t gfp)
+{
+ struct sk_buff *skb;
+ unsigned int i;
+
+ skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN);
+ if (skb) {
+ skb_reserve(skb, prepad);
+ skb_put(skb, len);
+ return skb;
+ }
+
+ /* Under a page? Don't bother with paged skb. */
+ if (prepad + len < PAGE_SIZE)
+ return NULL;
+
+ /* Start with a normal skb, and add pages. */
+ skb = alloc_skb(prepad + linear, gfp);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, prepad);
+ skb_put(skb, linear);
+
+ len -= linear;
+
+ for (i = 0; i < MAX_SKB_FRAGS; i++) {
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+
+ f->page = alloc_page(gfp|__GFP_ZERO);
+ if (!f->page)
+ break;
+
+ f->page_offset = 0;
+ f->size = PAGE_SIZE;
+
+ skb->data_len += PAGE_SIZE;
+ skb->len += PAGE_SIZE;
+ skb->truesize += PAGE_SIZE;
+ skb_shinfo(skb)->nr_frags++;
+
+ if (len < PAGE_SIZE) {
+ len = 0;
+ break;
+ }
+ len -= PAGE_SIZE;
+ }
+
+ /* Too large, or alloc fail? */
+ if (unlikely(len)) {
+ kfree_skb(skb);
+ skb = NULL;
+ }
+
+ return skb;
+}
+
/* Get packet from user space buffer */
static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
{
@@ -391,14 +451,12 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
return -EINVAL;
}
- if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
+ if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) {
tun->dev->stats.rx_dropped++;
return -ENOMEM;
}
- if (align)
- skb_reserve(skb, align);
- if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+ if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
tun->dev->stats.rx_dropped++;
kfree_skb(skb);
return -EFAULT;
@@ -748,6 +806,36 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
return err;
}
+static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
+{
+ struct tun_struct *tun = file->private_data;
+
+ if (!tun)
+ return -EBADFD;
+
+ DBG(KERN_INFO "%s: tun_get_iff\n", tun->dev->name);
+
+ strcpy(ifr->ifr_name, tun->dev->name);
+
+ ifr->ifr_flags = 0;
+
+ if (ifr->ifr_flags & TUN_TUN_DEV)
+ ifr->ifr_flags |= IFF_TUN;
+ else
+ ifr->ifr_flags |= IFF_TAP;
+
+ if (tun->flags & TUN_NO_PI)
+ ifr->ifr_flags |= IFF_NO_PI;
+
+ if (tun->flags & TUN_ONE_QUEUE)
+ ifr->ifr_flags |= IFF_ONE_QUEUE;
+
+ if (tun->flags & TUN_VNET_HDR)
+ ifr->ifr_flags |= IFF_VNET_HDR;
+
+ return 0;
+}
+
/* This is like a cut-down ethtool ops, except done via tun fd so no
* privs required. */
static int set_offload(struct net_device *dev, unsigned long arg)
@@ -833,6 +921,15 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
switch (cmd) {
+ case TUNGETIFF:
+ ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(argp, &ifr, sizeof(ifr)))
+ return -EFAULT;
+ break;
+
case TUNSETNOCSUM:
/* Disable/Enable checksum */
if (arg)
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 4c6307ad9fd..8529f57ba26 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -45,6 +45,7 @@
#define TUNGETFEATURES _IOR('T', 207, unsigned int)
#define TUNSETOFFLOAD _IOW('T', 208, unsigned int)
#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
+#define TUNGETIFF _IOR('T', 210, unsigned int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 358661c9990..90992371783 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1452,6 +1452,10 @@ extern int skb_copy_datagram_iovec(const struct sk_buff *from,
extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
int hlen,
struct iovec *iov);
+extern int skb_copy_datagram_from_iovec(struct sk_buff *skb,
+ int offset,
+ struct iovec *from,
+ int len);
extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
unsigned int flags);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a7abfda3e44..84d25f2e618 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -27,6 +27,7 @@ enum qdisc_state_t
{
__QDISC_STATE_RUNNING,
__QDISC_STATE_SCHED,
+ __QDISC_STATE_DEACTIVATED,
};
struct qdisc_size_table {
@@ -60,7 +61,6 @@ struct Qdisc
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
- struct rcu_head q_rcu;
int (*reshape_fail)(struct sk_buff *skb,
struct Qdisc *q);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9b58d70b0e7..4f52c3d50eb 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -148,11 +148,16 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
}
static struct ethtool_ops br_ethtool_ops = {
- .get_drvinfo = br_getinfo,
- .get_link = ethtool_op_get_link,
- .set_sg = br_set_sg,
- .set_tx_csum = br_set_tx_csum,
- .set_tso = br_set_tso,
+ .get_drvinfo = br_getinfo,
+ .get_link = ethtool_op_get_link,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = br_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = br_set_sg,
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = br_set_tso,
+ .get_ufo = ethtool_op_get_ufo,
+ .get_flags = ethtool_op_get_flags,
};
void br_dev_setup(struct net_device *dev)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index dd61dcad601..52f577a0f54 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -339,6 +339,93 @@ fault:
return -EFAULT;
}
+/**
+ * skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying to
+ * @from: io vector to copy to
+ * @len: amount of data to copy to buffer from iovec
+ *
+ * Returns 0 or -EFAULT.
+ * Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+ struct iovec *from, int len)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ if (memcpy_fromiovec(skb->data + offset, from, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ }
+
+ /* Copy paged appendix. Hmm... why does this look so complicated? */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ int err;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap(page);
+ err = memcpy_fromiovec(vaddr + frag->page_offset +
+ offset - start, from, copy);
+ kunmap(page);
+ if (err)
+ goto fault;
+
+ if (!(len -= copy))
+ return 0;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_datagram_from_iovec(list,
+ offset - start,
+ from, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
+
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
u8 __user *to, int len,
__wsum *csump)
diff --git a/net/core/dev.c b/net/core/dev.c
index 600bb23c4c2..8d133802372 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1339,19 +1339,23 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
}
-void __netif_schedule(struct Qdisc *q)
+static inline void __netif_reschedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
- struct softnet_data *sd;
- unsigned long flags;
+ struct softnet_data *sd;
+ unsigned long flags;
- local_irq_save(flags);
- sd = &__get_cpu_var(softnet_data);
- q->next_sched = sd->output_queue;
- sd->output_queue = q;
- raise_softirq_irqoff(NET_TX_SOFTIRQ);
- local_irq_restore(flags);
- }
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ q->next_sched = sd->output_queue;
+ sd->output_queue = q;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+}
+
+void __netif_schedule(struct Qdisc *q)
+{
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+ __netif_reschedule(q);
}
EXPORT_SYMBOL(__netif_schedule);
@@ -1800,9 +1804,13 @@ gso:
spin_lock(root_lock);
- rc = qdisc_enqueue_root(skb, q);
- qdisc_run(q);
-
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ kfree_skb(skb);
+ rc = NET_XMIT_DROP;
+ } else {
+ rc = qdisc_enqueue_root(skb, q);
+ qdisc_run(q);
+ }
spin_unlock(root_lock);
goto out;
@@ -1974,15 +1982,15 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
- smp_mb__before_clear_bit();
- clear_bit(__QDISC_STATE_SCHED, &q->state);
-
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
+ smp_mb__before_clear_bit();
+ clear_bit(__QDISC_STATE_SCHED,
+ &q->state);
qdisc_run(q);
spin_unlock(root_lock);
} else {
- __netif_schedule(q);
+ __netif_reschedule(q);
}
}
}
@@ -2084,7 +2092,8 @@ static int ing_filter(struct sk_buff *skb)
q = rxq->qdisc;
if (q != &noop_qdisc) {
spin_lock(qdisc_lock(q));
- result = qdisc_enqueue_root(skb, q);
+ if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+ result = qdisc_enqueue_root(skb, q);
spin_unlock(qdisc_lock(q));
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 84640172d65..ca1ccdf1ef7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2256,14 +2256,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
segs = nskb;
tail = nskb;
- nskb->dev = skb->dev;
- skb_copy_queue_mapping(nskb, skb);
- nskb->priority = skb->priority;
- nskb->protocol = skb->protocol;
- nskb->vlan_tci = skb->vlan_tci;
- nskb->dst = dst_clone(skb->dst);
- memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
- nskb->pkt_type = skb->pkt_type;
+ __copy_skb_header(nskb, skb);
nskb->mac_len = skb->mac_len;
skb_reserve(nskb, headroom);
@@ -2274,6 +2267,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
doffset);
if (!sg) {
+ nskb->ip_summed = CHECKSUM_NONE;
nskb->csum = skb_copy_and_csum_bits(skb, offset,
skb_put(nskb, len),
len, 0);
@@ -2283,8 +2277,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
frag = skb_shinfo(nskb)->frags;
k = 0;
- nskb->ip_summed = CHECKSUM_PARTIAL;
- nskb->csum = skb->csum;
skb_copy_from_linear_data_offset(skb, offset,
skb_put(nskb, hsize), hsize);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index df2f110df94..803933ab396 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -411,12 +411,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
struct dccp_sock *dp = dccp_sk(sk);
long tstamp = dccp_timestamp();
- /* Stop the REQUEST timer */
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
- WARN_ON(sk->sk_send_head == NULL);
- __kfree_skb(sk->sk_send_head);
- sk->sk_send_head = NULL;
-
if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh)) {
dccp_pr_debug("invalid ackno: S.AWL=%llu, "
@@ -441,6 +435,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
DCCP_ACKVEC_STATE_RECEIVED))
goto out_invalid_packet; /* FIXME: change error code */
+ /* Stop the REQUEST timer */
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+ WARN_ON(sk->sk_send_head == NULL);
+ kfree_skb(sk->sk_send_head);
+ sk->sk_send_head = NULL;
+
dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_update_gsr(sk, dp->dccps_isr);
/*
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 741cfcd96f8..4e5eac301f9 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -911,7 +911,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
} else {
if (np->rxopt.bits.rxinfo) {
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif;
+ src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if;
ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
@@ -921,7 +921,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
if (np->rxopt.bits.rxoinfo) {
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif;
+ src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if;
ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c25465e5607..d91a2338877 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,6 +27,7 @@
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
+#include <linux/lockdep.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -426,7 +427,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
wd->qdisc->flags &= ~TCQ_F_THROTTLED;
smp_wmb();
- __netif_schedule(wd->qdisc);
+ __netif_schedule(qdisc_root(wd->qdisc));
return HRTIMER_NORESTART;
}
@@ -637,11 +638,8 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid
if (new || old)
qdisc_notify(skb, n, clid, old, new);
- if (old) {
- spin_lock_bh(&old->q.lock);
+ if (old)
qdisc_destroy(old);
- spin_unlock_bh(&old->q.lock);
- }
}
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
@@ -707,6 +705,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
return err;
}
+/* lockdep annotation is needed for ingress; egress gets it only for name */
+static struct lock_class_key qdisc_tx_lock;
+static struct lock_class_key qdisc_rx_lock;
+
/*
Allocate and initialize new qdisc.
@@ -767,6 +769,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
} else {
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
@@ -774,6 +777,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
if (handle == 0)
goto err_out3;
}
+ lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
}
sch->handle = handle;
@@ -1085,16 +1089,10 @@ create_n_graft:
graft:
if (1) {
- spinlock_t *root_lock;
-
err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
if (err) {
- if (q) {
- root_lock = qdisc_root_lock(q);
- spin_lock_bh(root_lock);
+ if (q)
qdisc_destroy(q);
- spin_unlock_bh(root_lock);
- }
return err;
}
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4e261ce62f4..47ef492c4ff 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -654,7 +654,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
}
sch->flags &= ~TCQ_F_THROTTLED;
- __netif_schedule(sch);
+ __netif_schedule(qdisc_root(sch));
return HRTIMER_NORESTART;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 468574682ca..c3ed4d44fc1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -518,14 +518,17 @@ void qdisc_reset(struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_reset);
-/* this is the rcu callback function to clean up a qdisc when there
- * are no further references to it */
-
-static void __qdisc_destroy(struct rcu_head *head)
+void qdisc_destroy(struct Qdisc *qdisc)
{
- struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
const struct Qdisc_ops *ops = qdisc->ops;
+ if (qdisc->flags & TCQ_F_BUILTIN ||
+ !atomic_dec_and_test(&qdisc->refcnt))
+ return;
+
+ if (qdisc->parent)
+ list_del(&qdisc->list);
+
#ifdef CONFIG_NET_SCHED
qdisc_put_stab(qdisc->stab);
#endif
@@ -542,20 +545,6 @@ static void __qdisc_destroy(struct rcu_head *head)
kfree((char *) qdisc - qdisc->padded);
}
-
-/* Under qdisc_lock(qdisc) and BH! */
-
-void qdisc_destroy(struct Qdisc *qdisc)
-{
- if (qdisc->flags & TCQ_F_BUILTIN ||
- !atomic_dec_and_test(&qdisc->refcnt))
- return;
-
- if (qdisc->parent)
- list_del(&qdisc->list);
-
- call_rcu(&qdisc->q_rcu, __qdisc_destroy);
-}
EXPORT_SYMBOL(qdisc_destroy);
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
@@ -597,6 +586,9 @@ static void transition_one_qdisc(struct net_device *dev,
struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
int *need_watchdog_p = _need_watchdog;
+ if (!(new_qdisc->flags & TCQ_F_BUILTIN))
+ clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
+
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
*need_watchdog_p = 1;
@@ -640,6 +632,9 @@ static void dev_deactivate_queue(struct net_device *dev,
if (qdisc) {
spin_lock_bh(qdisc_lock(qdisc));
+ if (!(qdisc->flags & TCQ_F_BUILTIN))
+ set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
dev_queue->qdisc = qdisc_default;
qdisc_reset(qdisc);
@@ -647,7 +642,7 @@ static void dev_deactivate_queue(struct net_device *dev,
}
}
-static bool some_qdisc_is_busy(struct net_device *dev, int lock)
+static bool some_qdisc_is_busy(struct net_device *dev)
{
unsigned int i;
@@ -661,14 +656,12 @@ static bool some_qdisc_is_busy(struct net_device *dev, int lock)
q = dev_queue->qdisc_sleeping;
root_lock = qdisc_lock(q);
- if (lock)
- spin_lock_bh(root_lock);
+ spin_lock_bh(root_lock);
val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
test_bit(__QDISC_STATE_SCHED, &q->state));
- if (lock)
- spin_unlock_bh(root_lock);
+ spin_unlock_bh(root_lock);
if (val)
return true;
@@ -678,8 +671,6 @@ static bool some_qdisc_is_busy(struct net_device *dev, int lock)
void dev_deactivate(struct net_device *dev)
{
- bool running;
-
netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
@@ -689,25 +680,8 @@ void dev_deactivate(struct net_device *dev)
synchronize_rcu();
/* Wait for outstanding qdisc_run calls. */
- do {
- while (some_qdisc_is_busy(dev, 0))
- yield();
-
- /*
- * Double-check inside queue lock to ensure that all effects
- * of the queue run are visible when we return.
- */
- running = some_qdisc_is_busy(dev, 1);
-
- /*
- * The running flag should never be set at this point because
- * we've already set dev->qdisc to noop_qdisc *inside* the same
- * pair of spin locks. That is, if any qdisc_run starts after
- * our initial test it should see the noop_qdisc and then
- * clear the RUNNING bit before dropping the queue lock. So
- * if it is set here then we've found a bug.
- */
- } while (WARN_ON_ONCE(running));
+ while (some_qdisc_is_busy(dev))
+ yield();
}
static void dev_init_scheduler_queue(struct net_device *dev,
@@ -736,14 +710,10 @@ static void shutdown_scheduler_queue(struct net_device *dev,
struct Qdisc *qdisc_default = _qdisc_default;
if (qdisc) {
- spinlock_t *root_lock = qdisc_lock(qdisc);
-
dev_queue->qdisc = qdisc_default;
dev_queue->qdisc_sleeping = qdisc_default;
- spin_lock_bh(root_lock);
qdisc_destroy(qdisc);
- spin_unlock_bh(root_lock);
}
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6febd245e62..0df0df202ed 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -577,7 +577,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.drops++;
cl->qstats.drops++;
}
- return NET_XMIT_DROP;
+ return ret;
} else {
cl->bstats.packets +=
skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
@@ -623,7 +623,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.drops++;
cl->qstats.drops++;
}
- return NET_XMIT_DROP;
+ return ret;
} else
htb_activate(q, cl);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index eac197610ed..a6697c686c7 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -113,11 +113,11 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->qstats.requeues++;
- return 0;
+ return NET_XMIT_SUCCESS;
}
if (net_xmit_drop_count(ret))
sch->qstats.drops++;
- return NET_XMIT_DROP;
+ return ret;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7d3b7ff3bf0..94c61598b86 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,15 +123,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
- if (qdisc_pkt_len(skb) > q->max_size) {
- sch->qstats.drops++;
-#ifdef CONFIG_NET_CLS_ACT
- if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
-#endif
- kfree_skb(skb);
-
- return NET_XMIT_DROP;
- }
+ if (qdisc_pkt_len(skb) > q->max_size)
+ return qdisc_reshape_fail(skb, sch);
ret = qdisc_enqueue(skb, q->qdisc);
if (ret != 0) {