From ee05d6939ed17b55e9c2466af32c208e0d547eb8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 14 Sep 2010 15:15:52 +0200 Subject: vhost-net: fix range checking in mrg bufs case In mergeable buffer case, we use headcount, log_num and seg as indexes in same-size arrays, and we know that headcount <= seg and log_num equals either 0 or seg. Therefore, the right thing to do is range-check seg, not headcount as we do now: these will be different if guest chains s/g descriptors (this does not happen now, but we can not trust the guest). Long term, we should add BUG_ON checks to verify two other indexes are what we think they should be. Reported-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 29e850a7a2f..7c8008225ee 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -243,7 +243,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, int r, nlogs = 0; while (datalen > 0) { - if (unlikely(headcount >= VHOST_NET_MAX_SG)) { + if (unlikely(seg >= VHOST_NET_MAX_SG)) { r = -ENOBUFS; goto err; } -- cgit v1.2.3-70-g09d2 From 7acc7c683a747689aaaaad4fce1683fc3f85e552 Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Wed, 8 Sep 2010 08:30:20 -0700 Subject: iwlwifi: do not perferm force reset while doing scan When uCode error condition detected, driver try to perform either rf reset or firmware reload in order bring device back to working condition. If rf reset is required and scan is in process, there is no need to issue rf reset since scan already reset the rf. If firmware reload is required and scan is in process, skip the reload request. There is a possibility firmware reload during scan cause problem. [ 485.804046] WARNING: at net/mac80211/main.c:310 ieee80211_restart_hw+0x28/0x62() [ 485.804049] Hardware name: Latitude E6400 [ 485.804052] ieee80211_restart_hw called with hardware scan in progress [ 485.804054] Modules linked in: iwlagn iwlcore bnep sco rfcomm l2cap crc16 bluetooth [last unloaded: iwlcore] [ 485.804069] Pid: 812, comm: kworker/u:3 Tainted: G W 2.6.36-rc3-wl+ #74 [ 485.804072] Call Trace: [ 485.804079] [] warn_slowpath_common+0x60/0x75 [ 485.804084] [] warn_slowpath_fmt+0x26/0x2a [ 485.804089] [] ieee80211_restart_hw+0x28/0x62 [ 485.804102] [] iwl_bg_restart+0x113/0x150 [iwlagn] [ 485.804108] [] process_one_work+0x181/0x25c [ 485.804119] [] ? iwl_bg_restart+0x0/0x150 [iwlagn] [ 485.804124] [] worker_thread+0xf9/0x1f2 [ 485.804128] [] ? worker_thread+0x0/0x1f2 [ 485.804133] [] kthread+0x64/0x69 [ 485.804137] [] ? kthread+0x0/0x69 [ 485.804141] [] kernel_thread_helper+0x6/0x10 [ 485.804145] ---[ end trace 3d4ebdc02d524bbb ]--- [ 485.804148] WG> 1 [ 485.804153] Pid: 812, comm: kworker/u:3 Tainted: G W 2.6.36-rc3-wl+ #74 [ 485.804156] Call Trace: [ 485.804161] [] ? ieee80211_restart_hw+0x5c/0x62 [ 485.804172] [] iwl_bg_restart+0x118/0x150 [iwlagn] [ 485.804177] [] process_one_work+0x181/0x25c [ 485.804188] [] ? iwl_bg_restart+0x0/0x150 [iwlagn] [ 485.804192] [] worker_thread+0xf9/0x1f2 [ 485.804197] [] ? worker_thread+0x0/0x1f2 [ 485.804201] [] kthread+0x64/0x69 [ 485.804205] [] ? kthread+0x0/0x69 [ 485.804209] [] kernel_thread_helper+0x6/0x10 Signed-off-by: Wey-Yi Guy --- drivers/net/wireless/iwlwifi/iwl-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 07dbc279644..e23c4060a0f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2613,6 +2613,11 @@ int iwl_force_reset(struct iwl_priv *priv, int mode, bool external) if (test_bit(STATUS_EXIT_PENDING, &priv->status)) return -EINVAL; + if (test_bit(STATUS_SCANNING, &priv->status)) { + IWL_DEBUG_INFO(priv, "scan in progress.\n"); + return -EINVAL; + } + if (mode >= IWL_MAX_FORCE_RESET) { IWL_DEBUG_INFO(priv, "invalid reset request.\n"); return -EINVAL; -- cgit v1.2.3-70-g09d2 From 04746ff1289f75af26af279eb4b0b3e231677ee4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Sep 2010 22:58:08 -0700 Subject: qlcnic: dont assume NET_IP_ALIGN is 2 qlcnic driver allocates rx skbs and gives to hardware too bytes of extra storage, allowing for corruption of kernel data. NET_IP_ALIGN being 0 on some platforms (including x86), drivers should not assume it's 2. rds_ring->skb_size = rds_ring->dma_size + NET_IP_ALIGN; ... skb = dev_alloc_skb(rds_ring->skb_size); skb_reserve(skb, 2); pci_map_single(pdev, skb->data, rds_ring->dma_size, PCI_DMA_FROMDEVICE); (and rds_ring->skb_size == rds_ring->dma_size) -> bug Because of extra alignment (1500 + 32) -> four extra bytes are available before the struct skb_shared_info, so corruption is not noticed. Note: this driver could use netdev_alloc_skb_ip_align() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/qlcnic/qlcnic_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c index 75ba744b173..60ab753f809 100644 --- a/drivers/net/qlcnic/qlcnic_init.c +++ b/drivers/net/qlcnic/qlcnic_init.c @@ -1316,7 +1316,7 @@ qlcnic_alloc_rx_skb(struct qlcnic_adapter *adapter, return -ENOMEM; } - skb_reserve(skb, 2); + skb_reserve(skb, NET_IP_ALIGN); dma = pci_map_single(pdev, skb->data, rds_ring->dma_size, PCI_DMA_FROMDEVICE); -- cgit v1.2.3-70-g09d2 From 842c74bffcdb1d305ccd9e61e417cceae86b9963 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Sep 2010 10:06:12 -0700 Subject: ip_gre: CONFIG_IPV6_MODULE support ipv6 can be a module, we should test CONFIG_IPV6 and CONFIG_IPV6_MODULE to enable ipv6 bits in ip_gre. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a5ad5..35c93e8b6a4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -45,7 +45,7 @@ #include #include -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include #include #include @@ -699,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; int addr_type; @@ -774,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); @@ -850,7 +850,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif -- cgit v1.2.3-70-g09d2 From df6d02300f7c2fbd0fbe626d819c8e5237d72c62 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Sep 2010 00:38:25 +0200 Subject: wext: fix potential private ioctl memory content leak When a driver doesn't fill the entire buffer, old heap contents may remain, and if it also doesn't update the length properly, this old heap content will be copied back to userspace. It is very unlikely that this happens in any of the drivers using private ioctls since it would show up as junk being reported by iwpriv, but it seems better to be safe here, so use kzalloc. Reported-by: Jeff Mahoney Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/wext-priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c index 3feb28e41c5..674d426a9d2 100644 --- a/net/wireless/wext-priv.c +++ b/net/wireless/wext-priv.c @@ -152,7 +152,7 @@ static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, } else if (!iwp->pointer) return -EFAULT; - extra = kmalloc(extra_size, GFP_KERNEL); + extra = kzalloc(extra_size, GFP_KERNEL); if (!extra) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 8444cf712c5f71845cba9dc30d8f530ff0d5ff83 Mon Sep 17 00:00:00 2001 From: Thomas Egerer Date: Mon, 20 Sep 2010 11:11:38 -0700 Subject: xfrm: Allow different selector family in temporary state The family parameter xfrm_state_find is used to find a state matching a certain policy. This value is set to the template's family (encap_family) right before xfrm_state_find is called. The family parameter is however also used to construct a temporary state in xfrm_state_find itself which is wrong for inter-family scenarios because it produces a selector for the wrong family. Since this selector is included in the xfrm_user_acquire structure, user space programs misinterpret IPv6 addresses as IPv4 and vice versa. This patch splits up the original init_tempsel function into a part that initializes the selector respectively the props and id of the temporary state, to allow for differing ip address families whithin the state. Signed-off-by: Thomas Egerer Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/ipv4/xfrm4_state.c | 33 +++++++++++++++++++-------------- net/ipv6/xfrm6_state.c | 33 +++++++++++++++++++-------------- net/xfrm/xfrm_policy.c | 5 ++--- net/xfrm/xfrm_state.c | 45 +++++++++++++++++++++++++++------------------ 5 files changed, 69 insertions(+), 51 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fc8f36dd0f5..4f53532d4c2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,8 +298,8 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); - void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, + void (*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl); + void (*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1ef1366a0a0..47947624ecc 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +{ + sel->daddr.a4 = fl->fl4_dst; + sel->saddr.a4 = fl->fl4_src; + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET; + sel->prefixlen_d = 32; + sel->prefixlen_s = 32; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) { - x->sel.daddr.a4 = fl->fl4_dst; - x->sel.saddr.a4 = fl->fl4_src; - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET; - x->sel.prefixlen_d = 32; - x->sel.prefixlen_s = 32; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; x->id = tmpl->id; if (x->id.daddr.a4 == 0) x->id.daddr.a4 = daddr->a4; @@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index f417b77fa0e..a67575d472a 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,23 +20,27 @@ #include static void -__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET6; - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) +{ x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); @@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, + .init_temprop = xfrm6_init_temprop, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b3ed7ad493..cbab6e1a8c9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - family = tmpl->encap_family; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(net, &tmp, remote, family); + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); if (error) goto fail; local = &tmp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5208b12fbfb..eb96ce52f17 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } @@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int error = 0; struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } if (best) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } @@ -829,7 +838,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { + tmpl->id.proto, encap_family)) != NULL) { to_put = x0; error = -EEXIST; goto out; @@ -839,9 +848,9 @@ found: error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); @@ -856,10 +865,10 @@ found: x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(net, daddr, saddr, family); + h = xfrm_src_hash(net, daddr, saddr, encap_family); hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; -- cgit v1.2.3-70-g09d2 From 9828e6e6e3f19efcb476c567b9999891d051f52f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 20 Sep 2010 15:40:35 -0700 Subject: rose: Fix signedness issues wrt. digi count. Just use explicit casts, since we really can't change the types of structures exported to userspace which have been around for 15 years or so. Reported-by: Dan Rosenberg Signed-off-by: David S. Miller --- net/rose/af_rose.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8e45e76a95f..d952e7eac18 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -679,7 +679,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) { @@ -739,7 +739,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; /* Source + Destination digis should not exceed ROSE_MAX_DIGIS */ -- cgit v1.2.3-70-g09d2 From a4d258036ed9b2a1811c3670c6099203a0f284a0 Mon Sep 17 00:00:00 2001 From: Tom Marshall Date: Mon, 20 Sep 2010 15:42:05 -0700 Subject: tcp: Fix race in tcp_poll If a RST comes in immediately after checking sk->sk_err, tcp_poll will return POLLIN but not POLLOUT. Fix this by checking sk->sk_err at the end of tcp_poll. Additionally, ensure the correct order of operations on SMP machines with memory barriers. Signed-off-by: Tom Marshall Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_input.c | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3fb1428e526..95d75d44392 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -386,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask = 0; - if (sk->sk_err) - mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't @@ -457,6 +455,11 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= POLLERR; + return mask; } EXPORT_SYMBOL(tcp_poll); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78a2ef..149e79ac289 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4048,6 +4048,8 @@ static void tcp_reset(struct sock *sk) default: sk->sk_err = ECONNRESET; } + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); -- cgit v1.2.3-70-g09d2 From 8df8fd27123054b02007361bd5483775db84b4a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Sep 2010 02:28:59 +0000 Subject: qlcnic: dont set skb->truesize skb->truesize is set in core network. Dont change it unless dealing with fragments. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/qlcnic/qlcnic_init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c index 60ab753f809..2c7cf0b6481 100644 --- a/drivers/net/qlcnic/qlcnic_init.c +++ b/drivers/net/qlcnic/qlcnic_init.c @@ -1404,7 +1404,6 @@ qlcnic_process_rcv(struct qlcnic_adapter *adapter, if (pkt_offset) skb_pull(skb, pkt_offset); - skb->truesize = skb->len + sizeof(struct sk_buff); skb->protocol = eth_type_trans(skb, netdev); napi_gro_receive(&sds_ring->napi, skb); @@ -1466,8 +1465,6 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, skb_put(skb, lro_length + data_offset); - skb->truesize = skb->len + sizeof(struct sk_buff) + skb_headroom(skb); - skb_pull(skb, l2_hdr_offset); skb->protocol = eth_type_trans(skb, netdev); @@ -1700,8 +1697,6 @@ qlcnic_process_rcv_diag(struct qlcnic_adapter *adapter, if (pkt_offset) skb_pull(skb, pkt_offset); - skb->truesize = skb->len + sizeof(struct sk_buff); - if (!qlcnic_check_loopback_buff(skb->data)) adapter->diag_cnt++; -- cgit v1.2.3-70-g09d2 From 7e96dc7045bff8758804b047c0dfb6868f182500 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 13:04:04 -0700 Subject: netxen: dont set skb->truesize skb->truesize is set in core network. Dont change it unless dealing with fragments. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_init.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index cabae7bb1fc..b075a35b85d 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -1540,7 +1540,6 @@ netxen_process_rcv(struct netxen_adapter *adapter, if (pkt_offset) skb_pull(skb, pkt_offset); - skb->truesize = skb->len + sizeof(struct sk_buff); skb->protocol = eth_type_trans(skb, netdev); napi_gro_receive(&sds_ring->napi, skb); @@ -1602,8 +1601,6 @@ netxen_process_lro(struct netxen_adapter *adapter, skb_put(skb, lro_length + data_offset); - skb->truesize = skb->len + sizeof(struct sk_buff) + skb_headroom(skb); - skb_pull(skb, l2_hdr_offset); skb->protocol = eth_type_trans(skb, netdev); -- cgit v1.2.3-70-g09d2 From 3d13008e7345fa7a79d8f6438150dc15d6ba6e9d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 08:47:45 +0000 Subject: ip: fix truesize mismatch in ip fragmentation Special care should be taken when slow path is hit in ip_fragment() : When walking through frags, we transfert truesize ownership from skb to frags. Then if we hit a slow_path condition, we must undo this or risk uncharging frags->truesize twice, and in the end, having negative socket sk_wmem_alloc counter, or even freeing socket sooner than expected. Many thanks to Nick Bowler, who provided a very clean bug report and test program. Thanks to Jarek for reviewing my first patch and providing a V2 While Nick bisection pointed to commit 2b85a34e911 (net: No more expensive sock_hold()/sock_put() on each tx), underlying bug is older (2.6.12-rc5) A side effect is to extend work done in commit b2722b1c3a893e (ip_fragment: also adjust skb->truesize for packets not owned by a socket) to ipv6 as well. Reported-and-bisected-by: Nick Bowler Tested-by: Nick Bowler Signed-off-by: Eric Dumazet CC: Jarek Poplawski CC: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 19 +++++++++++++------ net/ipv6/ip6_output.c | 18 +++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5..7649d775007 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * we can switch to copy when see the first bad fragment. */ if (skb_has_frags(skb)) { - struct sk_buff *frag; + struct sk_buff *frag, *frag2; int first_len = skb_pagelen(skb); - int truesizes = 0; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } - truesizes += frag->truesize; + skb->truesize -= frag->truesize; } /* Everything is OK. Generate! */ @@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); @@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d40b330c0ee..980912ed7a3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb_has_frags(skb)) { int first_len = skb_pagelen(skb); - int truesizes = 0; + struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + skb->truesize -= frag->truesize; } err = 0; @@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); @@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IPSTATS_MIB_FRAGFAILS); dst_release(&rt->dst); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: -- cgit v1.2.3-70-g09d2 From 5786aee8bf6d747ea59595601a19e78ad33d6929 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 22 Sep 2010 12:31:53 +0200 Subject: vhost: fix log ctx signalling The log eventfd signalling got put in dead code. We didn't notice because qemu currently does polling instead of eventfd select. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c579dcc9200..dd3d6f7406f 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -858,11 +858,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, if (r < 0) return r; len -= l; - if (!len) + if (!len) { + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); return 0; + } } - if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); /* Length written exceeds what we have stored. This is a bug. */ BUG(); return 0; -- cgit v1.2.3-70-g09d2 From d485d500cf6b13a33bc7a6c09091deea7ea603ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 21:17:29 +0000 Subject: netfilter: tproxy: nf_tproxy_assign_sock() can handle tw sockets transparent field of a socket is either inet_twsk(sk)->tw_transparent for timewait sockets, or inet_sk(sk)->transparent for other sockets (TCP/UDP). Signed-off-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_tproxy_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 5490fc37c92..daab8c4a903 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -70,7 +70,11 @@ nf_tproxy_destructor(struct sk_buff *skb) int nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - if (inet_sk(sk)->transparent) { + bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_transparent : + inet_sk(sk)->transparent; + + if (transparent) { skb_orphan(skb); skb->sk = sk; skb->destructor = nf_tproxy_destructor; -- cgit v1.2.3-70-g09d2 From 7874896a26624214bd7c05eeba7c8ab01548b1b5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 21 Sep 2010 21:17:30 +0000 Subject: netfilter: nf_ct_sip: default to NF_ACCEPT in sip_help_tcp() I initially noticed this because of the compiler warning below, but it does seem to be a valid concern in the case where ct_sip_get_header() returns 0 in the first iteration of the while loop. net/netfilter/nf_conntrack_sip.c: In function 'sip_help_tcp': net/netfilter/nf_conntrack_sip.c:1379: warning: 'ret' may be used uninitialized in this function Signed-off-by: Simon Horman [Patrick: changed NF_DROP to NF_ACCEPT] Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 53d892210a0..f64de954486 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1376,7 +1376,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, unsigned int msglen, origlen; const char *dptr, *end; s16 diff, tdiff = 0; - int ret; + int ret = NF_ACCEPT; typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && -- cgit v1.2.3-70-g09d2 From b46ffb854554ff939701bdd492b81558da5706fc Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 21 Sep 2010 21:17:31 +0000 Subject: netfilter: fix ipt_REJECT TCP RST routing for indev == outdev ip_route_me_harder can't create the route cache when the outdev is the same with the indev for the skbs whichout a valid protocol set. __mkroute_input functions has this check: 1998 if (skb->protocol != htons(ETH_P_IP)) { 1999 /* Not IP (i.e. ARP). Do not create route, if it is 2000 * invalid for proxy arp. DNAT routes are always valid. 2001 * 2002 * Proxy arp feature have been extended to allow, ARP 2003 * replies back to the same interface, to support 2004 * Private VLAN switch technologies. See arp.c. 2005 */ 2006 if (out_dev == in_dev && 2007 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { 2008 err = -EINVAL; 2009 goto cleanup; 2010 } 2011 } This patch gives the new skb a valid protocol to bypass this check. In order to make ipt_REJECT work with bridges, you also need to enable ip_forward. This patch also fixes a regression. When we used skb_copy_expand(), we didn't have this issue stated above, as the protocol was properly set. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_REJECT.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b254dafaf42..43eec80c0e7 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -112,6 +112,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->protocol = htons(ETH_P_IP); if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; -- cgit v1.2.3-70-g09d2 From 15cdeadaa5d76009e20c7792aed69f5a73808f97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 21:17:32 +0000 Subject: netfilter: fix a race in nf_ct_ext_create() As soon as rcu_read_unlock() is called, there is no guarantee current thread can safely derefence t pointer, rcu protected. Fix is to copy t->alloc_size in a temporary variable. Signed-off-by: Eric Dumazet Reviewed-by: Paul E. McKenney Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_extend.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 7dcf7a40419..8d9e4c949b9 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -48,15 +48,17 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) { unsigned int off, len; struct nf_ct_ext_type *t; + size_t alloc_size; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len; + alloc_size = t->alloc_size; rcu_read_unlock(); - *ext = kzalloc(t->alloc_size, gfp); + *ext = kzalloc(alloc_size, gfp); if (!*ext) return NULL; -- cgit v1.2.3-70-g09d2 From d6120b8afacec587f5feb37781bc751bc5d68a10 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 21 Sep 2010 21:17:33 +0000 Subject: netfilter: nf_nat_snmp: fix checksum calculation (v4) Fix checksum calculation in nf_nat_snmp_basic. Based on patches by Clark Wang and Stephen Hemminger . https://bugzilla.kernel.org/show_bug.cgi?id=17622 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 1679e2c0963..ee5f419d0a5 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -893,13 +893,15 @@ static void fast_csum(__sum16 *csum, unsigned char s[4]; if (offset & 1) { - s[0] = s[2] = 0; + s[0] = ~0; s[1] = ~*optr; + s[2] = 0; s[3] = *nptr; } else { - s[1] = s[3] = 0; s[0] = ~*optr; + s[1] = ~0; s[2] = *nptr; + s[3] = 0; } *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); -- cgit v1.2.3-70-g09d2 From cbdd769ab9de26764bde0520a91536caa1587e13 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Sep 2010 21:17:34 +0000 Subject: netfilter: nf_conntrack_defrag: check socket type before touching nodefrag flag we need to check proper socket type within ipv4_conntrack_defrag function before referencing the nodefrag flag. For example the tun driver receive path produces skbs with AF_UNSPEC socket type, and so current code is causing unwanted fragmented packets going out. Signed-off-by: Jiri Olsa Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_defrag_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index eab8de32f20..f3a9b42b16c 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -66,9 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(skb->sk); - if (inet && inet->nodefrag) + if (sk && (sk->sk_family == PF_INET) && + inet->nodefrag) return NF_ACCEPT; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -- cgit v1.2.3-70-g09d2 From 56b49f4b8f6728b91d10c556c116175051b77b60 Mon Sep 17 00:00:00 2001 From: Ollie Wild Date: Wed, 22 Sep 2010 05:54:54 +0000 Subject: net: Move "struct net" declaration inside the __KERNEL__ macro guard This patch reduces namespace pollution by moving the "struct net" declaration out of the userspace-facing portion of linux/netlink.h. It has no impact on the kernel. (This came up because we have several C++ applications which use "net" as a namespace name.) Signed-off-by: Ollie Wild Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 59d066936ab..123566912d7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -27,8 +27,6 @@ #define MAX_LINKS 32 -struct net; - struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ unsigned short nl_pad; /* zero */ @@ -151,6 +149,8 @@ struct nlattr { #include #include +struct net; + static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; -- cgit v1.2.3-70-g09d2 From ec5a32f67c603b11d68eb283d94eb89a4f6cfce1 Mon Sep 17 00:00:00 2001 From: Luca Tettamanti Date: Wed, 22 Sep 2010 10:41:58 +0000 Subject: atl1: fix resume adapter->cmb.cmb is initialized when the device is opened and freed when it's closed. Accessing it unconditionally during resume results either in a crash (NULL pointer dereference, when the interface has not been opened yet) or data corruption (when the interface has been used and brought down adapter->cmb.cmb points to a deallocated memory area). Cc: stable@kernel.org Signed-off-by: Luca Tettamanti Acked-by: Chris Snook Signed-off-by: David S. Miller --- drivers/net/atlx/atl1.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 63b9ba0cc67..bbd6e3009be 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2847,10 +2847,11 @@ static int atl1_resume(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3cold, 0); atl1_reset_hw(&adapter->hw); - adapter->cmb.cmb->int_stats = 0; - if (netif_running(netdev)) + if (netif_running(netdev)) { + adapter->cmb.cmb->int_stats = 0; atl1_up(adapter); + } netif_device_attach(netdev); return 0; -- cgit v1.2.3-70-g09d2 From 3f5a2a713aad28480d86b0add00c68484b54febc Mon Sep 17 00:00:00 2001 From: Luca Tettamanti Date: Wed, 22 Sep 2010 10:42:31 +0000 Subject: atl1: zero out CMB and SBM in atl1_free_ring_resources They are allocated in atl1_setup_ring_resources, zero out the pointers in atl1_free_ring_resources (like the other resources). Signed-off-by: Luca Tettamanti Acked-by: Chris Snook Signed-off-by: David S. Miller --- drivers/net/atlx/atl1.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index bbd6e3009be..c73be284831 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -1251,6 +1251,12 @@ static void atl1_free_ring_resources(struct atl1_adapter *adapter) rrd_ring->desc = NULL; rrd_ring->dma = 0; + + adapter->cmb.dma = 0; + adapter->cmb.cmb = NULL; + + adapter->smb.dma = 0; + adapter->smb.smb = NULL; } static void atl1_setup_mac_ctrl(struct atl1_adapter *adapter) -- cgit v1.2.3-70-g09d2 From 94e2238969e89f5112297ad2a00103089dde7e8f Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Wed, 22 Sep 2010 06:45:11 +0000 Subject: xfrm4: strip ECN bits from tos field otherwise ECT(1) bit will get interpreted as RTO_ONLINK and routing will fail with XfrmOutBundleGenError. Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 869078d4eeb..a580349f0b8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -61,7 +61,7 @@ static int xfrm4_get_saddr(struct net *net, static int xfrm4_get_tos(struct flowi *fl) { - return fl->fl4_tos; + return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, -- cgit v1.2.3-70-g09d2 From 8395ae8303255b31a8625035fc98391c88b0c257 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 22 Sep 2010 17:15:08 +0000 Subject: e1000e: 82577/8/9 issues with device in Sx When going to Sx, disable gigabit in PHY (e1000_oem_bits_config_ich8lan) in addition to the MAC before configuring PHY wakeup otherwise the PHY configuration writes might be missed. Also write the LED configuration and SMBus address to the PHY registers (e1000_oem_bits_config_ich8lan and e1000_write_smbus_addr, respectively). The reset is no longer needed since re-auto-negotiation is forced in e1000_oem_bits_config_ich8lan and leaving it in causes issues with auto-negotiating the link. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ich8lan.c | 47 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 63930d12711..822de4830c6 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -125,6 +125,7 @@ /* SMBus Address Phy Register */ #define HV_SMB_ADDR PHY_REG(768, 26) +#define HV_SMB_ADDR_MASK 0x007F #define HV_SMB_ADDR_PEC_EN 0x0200 #define HV_SMB_ADDR_VALID 0x0080 @@ -894,6 +895,34 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) return (fwsm & E1000_ICH_FWSM_RSPCIPHY) ? 0 : E1000_BLK_PHY_RESET; } +/** + * e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states + * @hw: pointer to the HW structure + * + * Assumes semaphore already acquired. + * + **/ +static s32 e1000_write_smbus_addr(struct e1000_hw *hw) +{ + u16 phy_data; + u32 strap = er32(STRAP); + s32 ret_val = 0; + + strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; + + ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data); + if (ret_val) + goto out; + + phy_data &= ~HV_SMB_ADDR_MASK; + phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT); + phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data); + +out: + return ret_val; +} + /** * e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration * @hw: pointer to the HW structure @@ -970,12 +999,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) * When both NVM bits are cleared, SW will configure * them instead. */ - data = er32(STRAP); - data &= E1000_STRAP_SMBUS_ADDRESS_MASK; - reg_data = data >> E1000_STRAP_SMBUS_ADDRESS_SHIFT; - reg_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; - ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, - reg_data); + ret_val = e1000_write_smbus_addr(hw); if (ret_val) goto out; @@ -3460,13 +3484,20 @@ void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) void e1000e_disable_gig_wol_ich8lan(struct e1000_hw *hw) { u32 phy_ctrl; + s32 ret_val; phy_ctrl = er32(PHY_CTRL); phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_GBE_DISABLE; ew32(PHY_CTRL, phy_ctrl); - if (hw->mac.type >= e1000_pchlan) - e1000_phy_hw_reset_ich8lan(hw); + if (hw->mac.type >= e1000_pchlan) { + e1000_oem_bits_config_ich8lan(hw, true); + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return; + e1000_write_smbus_addr(hw); + hw->phy.ops.release(hw); + } } /** -- cgit v1.2.3-70-g09d2 From 87fb7410cd8d4396dee0155526568645adba3b99 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 22 Sep 2010 17:15:33 +0000 Subject: e1000e: 82579 SMBus address and LEDs incorrect after device reset Since the hardware is prevented from performing automatic PHY configuration (the driver does it instead), the OEM_WRITE_ENABLE bit in the EXTCNF_CTRL register will not get cleared preventing the SMBus address and the LED configuration to be written to the PHY registers. On 82579, do not check the OEM_WRITE_ENABLE bit. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ich8lan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 822de4830c6..fc8c3cef052 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -990,9 +990,9 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK; cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT; - if (!(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) && - ((hw->mac.type == e1000_pchlan) || - (hw->mac.type == e1000_pch2lan))) { + if ((!(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) && + (hw->mac.type == e1000_pchlan)) || + (hw->mac.type == e1000_pch2lan)) { /* * HW configures the SMBus address and LEDs when the * OEM and LCD Write Enable bits are set in the NVM. -- cgit v1.2.3-70-g09d2 From 5f3eed6fe0e36e4b56c8dd9160241a868ee0de2a Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 22 Sep 2010 17:15:54 +0000 Subject: e1000e: 82566DC fails to get link Two recent patches to cleanup the reset[1] and initial PHY configuration[2] code paths for ICH/PCH devices inadvertently left out a 10msec delay and device ID check respectively which are necessary for the 82566DC (device id 0x104b) to be configured properly, otherwise it will not get link. [1] commit e98cac447cc1cc418dff1d610a5c79c4f2bdec7f [2] commit 3f0c16e84438d657d29446f85fe375794a93f159 CC: stable@kernel.org Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ich8lan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index fc8c3cef052..6f9cb0d44d3 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -932,7 +932,6 @@ out: **/ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) { - struct e1000_adapter *adapter = hw->adapter; struct e1000_phy_info *phy = &hw->phy; u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask; s32 ret_val = 0; @@ -950,7 +949,8 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) if (phy->type != e1000_phy_igp_3) return ret_val; - if (adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) { + if ((hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) || + (hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_C)) { sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG; break; } @@ -1626,6 +1626,9 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) if (e1000_check_reset_block(hw)) goto out; + /* Allow time for h/w to get to quiescent state after reset */ + msleep(10); + /* Perform any necessary post-reset workarounds */ switch (hw->mac.type) { case e1000_pchlan: -- cgit v1.2.3-70-g09d2 From 831bd2e6a6c09588fdde453ecb858f050ac1b942 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 22 Sep 2010 17:16:18 +0000 Subject: e1000e: 82579 unaccounted missed packets On 82579, there is a hardware bug that can cause received packets to not get transferred from the PHY to the MAC due to K1 (a power saving feature of the PHY-MAC interconnect similar to ASPM L1). Since the MAC controls the accounting of missed packets, these will go unnoticed. Workaround the issue by setting the K1 beacon duration according to the link speed. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/hw.h | 1 + drivers/net/e1000e/ich8lan.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/net/e1000e/hw.h b/drivers/net/e1000e/hw.h index 66ed08f726f..ba302a5c2c3 100644 --- a/drivers/net/e1000e/hw.h +++ b/drivers/net/e1000e/hw.h @@ -57,6 +57,7 @@ enum e1e_registers { E1000_SCTL = 0x00024, /* SerDes Control - RW */ E1000_FCAL = 0x00028, /* Flow Control Address Low - RW */ E1000_FCAH = 0x0002C, /* Flow Control Address High -RW */ + E1000_FEXTNVM4 = 0x00024, /* Future Extended NVM 4 - RW */ E1000_FEXTNVM = 0x00028, /* Future Extended NVM - RW */ E1000_FCT = 0x00030, /* Flow Control Type - RW */ E1000_VET = 0x00038, /* VLAN Ether Type - RW */ diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 6f9cb0d44d3..89b1e1aea52 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -105,6 +105,10 @@ #define E1000_FEXTNVM_SW_CONFIG 1 #define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M :/ */ +#define E1000_FEXTNVM4_BEACON_DURATION_MASK 0x7 +#define E1000_FEXTNVM4_BEACON_DURATION_8USEC 0x7 +#define E1000_FEXTNVM4_BEACON_DURATION_16USEC 0x3 + #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 @@ -238,6 +242,7 @@ static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); +static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg) { @@ -653,6 +658,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) goto out; } + if (hw->mac.type == e1000_pch2lan) { + ret_val = e1000_k1_workaround_lv(hw); + if (ret_val) + goto out; + } + /* * Check if there was DownShift, must be checked * immediately after link-up @@ -1582,6 +1593,43 @@ out: return ret_val; } +/** + * e1000_k1_gig_workaround_lv - K1 Si workaround + * @hw: pointer to the HW structure + * + * Workaround to set the K1 beacon duration for 82579 parts + **/ +static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) +{ + s32 ret_val = 0; + u16 status_reg = 0; + u32 mac_reg; + + if (hw->mac.type != e1000_pch2lan) + goto out; + + /* Set K1 beacon duration based on 1Gbps speed or otherwise */ + ret_val = e1e_rphy(hw, HV_M_STATUS, &status_reg); + if (ret_val) + goto out; + + if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) + == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) { + mac_reg = er32(FEXTNVM4); + mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; + + if (status_reg & HV_M_STATUS_SPEED_1000) + mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; + else + mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC; + + ew32(FEXTNVM4, mac_reg); + } + +out: + return ret_val; +} + /** * e1000_lan_init_done_ich8lan - Check for PHY config completion * @hw: pointer to the HW structure -- cgit v1.2.3-70-g09d2 From a1ce647378c0262fe72757f989e961b2de6460a5 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 22 Sep 2010 17:16:40 +0000 Subject: e1000e: 82579 jumbo frame workaround causing CRC errors The subject workaround was causing CRC errors due to writing the wrong register with updates of the RCTL register. It was also found that the workaround function which modifies the RCTL register was being called in the middle of a read-modify-write operation of the RCTL register, so the function call has been moved appropriately. Lastly, jumbo frames must not be allowed when CRC stripping is disabled by a module parameter because the workaround requires the CRC be stripped. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ich8lan.c | 12 +----------- drivers/net/e1000e/netdev.c | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 89b1e1aea52..bb346ae3d9a 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -1475,10 +1475,6 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) goto out; /* Enable jumbo frame workaround in the PHY */ - e1e_rphy(hw, PHY_REG(769, 20), &data); - ret_val = e1e_wphy(hw, PHY_REG(769, 20), data & ~(1 << 14)); - if (ret_val) - goto out; e1e_rphy(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); data |= (0x37 << 5); @@ -1487,7 +1483,6 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) goto out; e1e_rphy(hw, PHY_REG(769, 16), &data); data &= ~(1 << 13); - data |= (1 << 12); ret_val = e1e_wphy(hw, PHY_REG(769, 16), data); if (ret_val) goto out; @@ -1512,7 +1507,7 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) mac_reg = er32(RCTL); mac_reg &= ~E1000_RCTL_SECRC; - ew32(FFLT_DBG, mac_reg); + ew32(RCTL, mac_reg); ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, @@ -1538,17 +1533,12 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) goto out; /* Write PHY register values back to h/w defaults */ - e1e_rphy(hw, PHY_REG(769, 20), &data); - ret_val = e1e_wphy(hw, PHY_REG(769, 20), data & ~(1 << 14)); - if (ret_val) - goto out; e1e_rphy(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); ret_val = e1e_wphy(hw, PHY_REG(769, 23), data); if (ret_val) goto out; e1e_rphy(hw, PHY_REG(769, 16), &data); - data &= ~(1 << 12); data |= (1 << 13); ret_val = e1e_wphy(hw, PHY_REG(769, 16), data); if (ret_val) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 2b8ef44bd2b..e561d15c3eb 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2704,6 +2704,16 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) u32 psrctl = 0; u32 pages = 0; + /* Workaround Si errata on 82579 - configure jumbo frame flow */ + if (hw->mac.type == e1000_pch2lan) { + s32 ret_val; + + if (adapter->netdev->mtu > ETH_DATA_LEN) + ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true); + else + ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false); + } + /* Program MC offset vector base */ rctl = er32(RCTL); rctl &= ~(3 << E1000_RCTL_MO_SHIFT); @@ -2744,16 +2754,6 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) e1e_wphy(hw, 22, phy_data); } - /* Workaround Si errata on 82579 - configure jumbo frame flow */ - if (hw->mac.type == e1000_pch2lan) { - s32 ret_val; - - if (rctl & E1000_RCTL_LPE) - ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true); - else - ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false); - } - /* Setup buffer sizes */ rctl &= ~E1000_RCTL_SZ_4096; rctl |= E1000_RCTL_BSEX; @@ -4833,6 +4833,15 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } + /* Jumbo frame workaround on 82579 requires CRC be stripped */ + if ((adapter->hw.mac.type == e1000_pch2lan) && + !(adapter->flags2 & FLAG2_CRC_STRIPPING) && + (new_mtu > ETH_DATA_LEN)) { + e_err("Jumbo Frames not supported on 82579 when CRC " + "stripping is disabled.\n"); + return -EINVAL; + } + /* 82573 Errata 17 */ if (((adapter->hw.mac.type == e1000_82573) || (adapter->hw.mac.type == e1000_82574)) && -- cgit v1.2.3-70-g09d2 From 605c82bab5abe0816e5e32716875c245f89f39da Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Wed, 22 Sep 2010 17:17:01 +0000 Subject: e1000e: 82579 do not gate auto config of PHY by hardware during nominal use For non-managed versions of 82579, set the bit that prevents the hardware from automatically configuring the PHY after resets only when the driver performs a reset, clear the bit after resets. This is so the hardware can configure the PHY automatically when the part is reset in a manner that is not controlled by the driver (e.g. in a virtual environment via PCI FLR) otherwise the PHY will be mis-configured causing issues such as failing to link at 1000Mbps. For managed versions of 82579, keep the previous behavior since the manageability firmware will handle the PHY configuration. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/ich8lan.c | 77 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index bb346ae3d9a..57b5435599a 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -243,6 +243,7 @@ static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); +static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate); static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg) { @@ -278,7 +279,7 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val) static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - u32 ctrl; + u32 ctrl, fwsm; s32 ret_val = 0; phy->addr = 1; @@ -300,7 +301,8 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) * disabled, then toggle the LANPHYPC Value bit to force * the interconnect to PCIe mode. */ - if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + fwsm = er32(FWSM); + if (!(fwsm & E1000_ICH_FWSM_FW_VALID)) { ctrl = er32(CTRL); ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE; ctrl &= ~E1000_CTRL_LANPHYPC_VALUE; @@ -309,6 +311,13 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE; ew32(CTRL, ctrl); msleep(50); + + /* + * Gate automatic PHY configuration by hardware on + * non-managed 82579 + */ + if (hw->mac.type == e1000_pch2lan) + e1000_gate_hw_phy_config_ich8lan(hw, true); } /* @@ -321,6 +330,13 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) if (ret_val) goto out; + /* Ungate automatic PHY configuration on non-managed 82579 */ + if ((hw->mac.type == e1000_pch2lan) && + !(fwsm & E1000_ICH_FWSM_FW_VALID)) { + msleep(10); + e1000_gate_hw_phy_config_ich8lan(hw, false); + } + phy->id = e1000_phy_unknown; ret_val = e1000e_get_phy_id(hw); if (ret_val) @@ -567,13 +583,10 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_adapter *adapter) if (mac->type == e1000_ich8lan) e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true); - /* Disable PHY configuration by hardware, config by software */ - if (mac->type == e1000_pch2lan) { - u32 extcnf_ctrl = er32(EXTCNF_CTRL); - - extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; - ew32(EXTCNF_CTRL, extcnf_ctrl); - } + /* Gate automatic PHY configuration by hardware on managed 82579 */ + if ((mac->type == e1000_pch2lan) && + (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + e1000_gate_hw_phy_config_ich8lan(hw, true); return 0; } @@ -1620,6 +1633,32 @@ out: return ret_val; } +/** + * e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware + * @hw: pointer to the HW structure + * @gate: boolean set to true to gate, false to ungate + * + * Gate/ungate the automatic PHY configuration via hardware; perform + * the configuration via software instead. + **/ +static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate) +{ + u32 extcnf_ctrl; + + if (hw->mac.type != e1000_pch2lan) + return; + + extcnf_ctrl = er32(EXTCNF_CTRL); + + if (gate) + extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; + else + extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG; + + ew32(EXTCNF_CTRL, extcnf_ctrl); + return; +} + /** * e1000_lan_init_done_ich8lan - Check for PHY config completion * @hw: pointer to the HW structure @@ -1695,6 +1734,13 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) /* Configure the LCD with the OEM bits in NVM */ ret_val = e1000_oem_bits_config_ich8lan(hw, true); + /* Ungate automatic PHY configuration on non-managed 82579 */ + if ((hw->mac.type == e1000_pch2lan) && + !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + msleep(10); + e1000_gate_hw_phy_config_ich8lan(hw, false); + } + out: return ret_val; } @@ -1711,6 +1757,11 @@ static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) { s32 ret_val = 0; + /* Gate automatic PHY configuration by hardware on non-managed 82579 */ + if ((hw->mac.type == e1000_pch2lan) && + !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + e1000_gate_hw_phy_config_ich8lan(hw, true); + ret_val = e1000e_phy_hw_reset_generic(hw); if (ret_val) goto out; @@ -2975,6 +3026,14 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) * external PHY is reset. */ ctrl |= E1000_CTRL_PHY_RST; + + /* + * Gate automatic PHY configuration by hardware on + * non-managed 82579 + */ + if ((hw->mac.type == e1000_pch2lan) && + !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + e1000_gate_hw_phy_config_ich8lan(hw, true); } ret_val = e1000_acquire_swflag_ich8lan(hw); e_dbg("Issuing a global reset to ich8lan\n"); -- cgit v1.2.3-70-g09d2 From f064af1e500a2bf4607706f0f458163bdb2a6ea5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 12:43:39 +0000 Subject: net: fix a lockdep splat We have for each socket : One spinlock (sk_slock.slock) One rwlock (sk_callback_lock) Possible scenarios are : (A) (this is used in net/sunrpc/xprtsock.c) read_lock(&sk->sk_callback_lock) (without blocking BH) spin_lock(&sk->sk_slock.slock); ... read_lock(&sk->sk_callback_lock); ... (B) write_lock_bh(&sk->sk_callback_lock) stuff write_unlock_bh(&sk->sk_callback_lock) (C) spin_lock_bh(&sk->sk_slock) ... write_lock_bh(&sk->sk_callback_lock) stuff write_unlock_bh(&sk->sk_callback_lock) spin_unlock_bh(&sk->sk_slock) This (C) case conflicts with (A) : CPU1 [A] CPU2 [C] read_lock(callback_lock) spin_lock_bh(slock) We have one problematic (C) use case in inet_csk_listen_stop() : local_bh_disable(); bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock) WARN_ON(sock_owned_by_user(child)); ... sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock) lockdep is not happy with this, as reported by Tetsuo Handa It seems only way to deal with this is to use read_lock_bh(callbacklock) everywhere. Thanks to Jarek for pointing a bug in my first attempt and suggesting this solution. Reported-by: Tetsuo Handa Tested-by: Tetsuo Handa Signed-off-by: Eric Dumazet CC: Jarek Poplawski Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 8 ++++---- net/rds/tcp_connect.c | 4 ++-- net/rds/tcp_listen.c | 4 ++-- net/rds/tcp_recv.c | 4 ++-- net/rds/tcp_send.c | 4 ++-- net/sunrpc/xprtsock.c | 28 ++++++++++++++-------------- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index b05b9b6ddb8..ef30e9d286e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk) { int uid; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); return uid; } EXPORT_SYMBOL(sock_i_uid); @@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk) { unsigned long ino; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); return ino; } EXPORT_SYMBOL(sock_i_ino); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index c397524c039..c519939e8da 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { state_change = sk->sk_state_change; @@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 975183fe695..27844f231d1 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) rdsdebug("listen data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (ready == NULL) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) queue_work(rds_wq, &rds_tcp_listen_work); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 1aba6878fa5..e4379740410 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) rdsdebug("data ready sk %p bytes %d\n", sk, bytes); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index a28b895ff0d..2f012a07d94 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { write_space = sk->sk_write_space; @@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b6309db5622..fe9306bf10c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) u32 _xid; __be32 *xp; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); dprintk("RPC: xs_udp_data_ready...\n"); if (!(xprt = xprt_from_sock(sk))) goto out; @@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) dropit: skb_free_datagram(sk, skb); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) @@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) dprintk("RPC: xs_tcp_data_ready...\n"); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; if (xprt->shutdown) @@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); } while (read > 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /* @@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk) { struct rpc_xprt *xprt; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); @@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt_wake_pending_tasks(xprt, -EAGAIN); } - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); break; case TCP_FIN_WAIT1: /* The client initiated a shutdown of the socket */ @@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk) xs_sock_mark_closed(xprt); } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" @@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk) __func__, xprt, sk->sk_err); xprt_wake_pending_tasks(xprt, -EAGAIN); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void xs_write_space(struct sock *sk) @@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk) */ static void xs_udp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); /* from net/core/sock.c:sock_def_write_space */ if (sock_writeable(sk)) xs_write_space(sk); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk) */ static void xs_tcp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); /* from net/core/stream.c:sk_stream_write_space */ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) xs_write_space(sk); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) -- cgit v1.2.3-70-g09d2 From e0f9c4f332c99b213d4a0b7cd21dc0781ceb3d86 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 23 Sep 2010 10:59:18 +0000 Subject: de2104x: disable autonegotiation on broken hardware At least on older 21041-AA chips (mine is rev. 11), TP duplex autonegotiation causes the card not to work at all (link is up but no packets are transmitted). de4x5 disables autonegotiation completely. But it seems to work on newer (21041-PA rev. 21) so disable it only on rev<20 chips. Signed-off-by: Ondrej Zary Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/tulip/de2104x.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 5efa57757a2..9d6b7e9c7a6 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -364,6 +364,8 @@ static u16 t21040_csr15[] = { 0, 0, 0x0006, 0x0000, 0x0000, }; /* 21041 transceiver register settings: TP AUTO, BNC, AUI, TP, TP FD*/ static u16 t21041_csr13[] = { 0xEF01, 0xEF09, 0xEF09, 0xEF01, 0xEF09, }; static u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, }; +/* If on-chip autonegotiation is broken, use half-duplex (FF3F) instead */ +static u16 t21041_csr14_brk[] = { 0xFF3F, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, }; static u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, }; @@ -1911,8 +1913,14 @@ fill_defaults: for (i = 0; i < DE_MAX_MEDIA; i++) { if (de->media[i].csr13 == 0xffff) de->media[i].csr13 = t21041_csr13[i]; - if (de->media[i].csr14 == 0xffff) - de->media[i].csr14 = t21041_csr14[i]; + if (de->media[i].csr14 == 0xffff) { + /* autonegotiation is broken at least on some chip + revisions - rev. 0x21 works, 0x11 does not */ + if (de->pdev->revision < 0x20) + de->media[i].csr14 = t21041_csr14_brk[i]; + else + de->media[i].csr14 = t21041_csr14[i]; + } if (de->media[i].csr15 == 0xffff) de->media[i].csr15 = t21041_csr15[i]; } -- cgit v1.2.3-70-g09d2 From b0255a02351b00ca55f4eb2588d05a5db9dd1a58 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 24 Sep 2010 23:57:02 +0000 Subject: de2104x: fix power management At least my 21041 cards come out of suspend with bus mastering disabled so they did not work after resume(no data transferred). After adding pci_set_master(), the driver oopsed immediately on resume - because de_clean_rings() is called on suspend but de_init_rings() call was missing in resume. Also disable link (reset SIA) before sleep (de4x5 does this too). Signed-off-by: Ondrej Zary Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/tulip/de2104x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 9d6b7e9c7a6..a0be7c28c58 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -1231,6 +1231,7 @@ static void de_adapter_sleep (struct de_private *de) if (de->de21040) return; + dw32(CSR13, 0); /* Reset phy */ pci_read_config_dword(de->pdev, PCIPM, &pmctl); pmctl |= PM_Sleep; pci_write_config_dword(de->pdev, PCIPM, pmctl); @@ -2166,6 +2167,8 @@ static int de_resume (struct pci_dev *pdev) dev_err(&dev->dev, "pci_enable_device failed in resume\n"); goto out; } + pci_set_master(pdev); + de_init_rings(de); de_init_hw(de); out_attach: netif_device_attach(dev); -- cgit v1.2.3-70-g09d2 From ca9a783575d2affed30ef27a3427a7705527ddac Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Sat, 25 Sep 2010 10:39:17 +0000 Subject: de2104x: fix TP link detection Compex FreedomLine 32 PnP-PCI2 cards have only TP and BNC connectors but the SROM contains AUI port too. When TP loses link, the driver switches to non-existing AUI port (which reports that carrier is always present). Connecting TP back generates LinkPass interrupt but de_media_interrupt() is broken - it only updates the link state of currently connected media, ignoring the fact that LinkPass and LinkFail bits of MacStatus register belong to the TP port only (the chip documentation says that). This patch changes de_media_interrupt() to switch media to TP when link goes up (and media type is not locked) and also to update the link state only when the TP port is used. Also the NonselPortActive (and also SelPortActive) bits of SIAStatus register need to be cleared (by writing 1) after reading or they're useless. Signed-off-by: Ondrej Zary Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/tulip/de2104x.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index a0be7c28c58..9124c5c638d 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -243,6 +243,7 @@ enum { NWayState = (1 << 14) | (1 << 13) | (1 << 12), NWayRestart = (1 << 12), NonselPortActive = (1 << 9), + SelPortActive = (1 << 8), LinkFailStatus = (1 << 2), NetCxnErr = (1 << 1), }; @@ -1066,6 +1067,9 @@ static void de21041_media_timer (unsigned long data) unsigned int carrier; unsigned long flags; + /* clear port active bits */ + dw32(SIAStatus, NonselPortActive | SelPortActive); + carrier = (status & NetCxnErr) ? 0 : 1; if (carrier) { @@ -1160,14 +1164,29 @@ no_link_yet: static void de_media_interrupt (struct de_private *de, u32 status) { if (status & LinkPass) { + /* Ignore if current media is AUI or BNC and we can't use TP */ + if ((de->media_type == DE_MEDIA_AUI || + de->media_type == DE_MEDIA_BNC) && + (de->media_lock || + !de_ok_to_advertise(de, DE_MEDIA_TP_AUTO))) + return; + /* If current media is not TP, change it to TP */ + if ((de->media_type == DE_MEDIA_AUI || + de->media_type == DE_MEDIA_BNC)) { + de->media_type = DE_MEDIA_TP_AUTO; + de_stop_rxtx(de); + de_set_media(de); + de_start_rxtx(de); + } de_link_up(de); mod_timer(&de->media_timer, jiffies + DE_TIMER_LINK); return; } BUG_ON(!(status & LinkFail)); - - if (netif_carrier_ok(de->dev)) { + /* Mark the link as down only if current media is TP */ + if (netif_carrier_ok(de->dev) && de->media_type != DE_MEDIA_AUI && + de->media_type != DE_MEDIA_BNC) { de_link_down(de); mod_timer(&de->media_timer, jiffies + DE_TIMER_NO_LINK); } -- cgit v1.2.3-70-g09d2 From a3d6713fbd2ccb50898a6f88664da96a7857c039 Mon Sep 17 00:00:00 2001 From: Karl Hiramoto Date: Thu, 23 Sep 2010 01:50:54 +0000 Subject: br2684: fix scheduling while atomic You can't call atomic_notifier_chain_unregister() while in atomic context. Fix, call un/register_atmdevice_notifier in module __init and __exit. Bug report: http://comments.gmane.org/gmane.linux.network/172603 Reported-by: Mikko Vinni Tested-by: Mikko Vinni Signed-off-by: Karl Hiramoto Signed-off-by: David S. Miller --- net/atm/br2684.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 651babdfab3..ad2b232a205 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -399,12 +399,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) unregister_netdev(net_dev); free_netdev(net_dev); } - read_lock_irq(&devs_lock); - if (list_empty(&br2684_devs)) { - /* last br2684 device */ - unregister_atmdevice_notifier(&atm_dev_notifier); - } - read_unlock_irq(&devs_lock); return; } @@ -675,7 +669,6 @@ static int br2684_create(void __user *arg) if (list_empty(&br2684_devs)) { /* 1st br2684 device */ - register_atmdevice_notifier(&atm_dev_notifier); brdev->number = 1; } else brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; @@ -815,6 +808,7 @@ static int __init br2684_init(void) return -ENOMEM; #endif register_atm_ioctl(&br2684_ioctl_ops); + register_atmdevice_notifier(&atm_dev_notifier); return 0; } @@ -830,9 +824,7 @@ static void __exit br2684_exit(void) #endif - /* if not already empty */ - if (!list_empty(&br2684_devs)) - unregister_atmdevice_notifier(&atm_dev_notifier); + unregister_atmdevice_notifier(&atm_dev_notifier); while (!list_empty(&br2684_devs)) { net_dev = list_entry_brdev(br2684_devs.next); -- cgit v1.2.3-70-g09d2 From 693019e90ca45d881109d32c0c6d29adf03f6447 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 23 Sep 2010 11:19:54 +0000 Subject: net: reset skb queue mapping when rx'ing over tunnel Reset queue mapping when an skb is reentering the stack via a tunnel. On second pass, the queue mapping from the original device is no longer valid. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/dst.h b/include/net/dst.h index 81d1413a870..02386505033 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); skb_dst_drop(skb); nf_reset(skb); } -- cgit v1.2.3-70-g09d2 From 62038e4a146b97352d5911e6ede36c58d4187c3e Mon Sep 17 00:00:00 2001 From: Vincent Stehlé Date: Sun, 26 Sep 2010 18:50:05 -0700 Subject: smsc911x: Add MODULE_ALIAS() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables auto loading for the smsc911x ethernet driver. Signed-off-by: Vincent Stehlé Signed-off-by: David S. Miller --- drivers/net/smsc911x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index 0909ae934ad..8150ba15411 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -58,6 +58,7 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(SMSC_DRV_VERSION); +MODULE_ALIAS("platform:smsc911x"); #if USE_DEBUG > 0 static int debug = 16; -- cgit v1.2.3-70-g09d2 From 52933f052186877afd218aef7a1b2dbdb010939f Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Sat, 25 Sep 2010 23:58:00 +0000 Subject: ibm_newemac: use free_netdev(netdev) instead of kfree() Freeing netdev without free_netdev() leads to net, tx leaks. I might lead to dereferencing freed pointer. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) @@ struct net_device* dev; @@ -kfree(dev) +free_netdev(dev) Signed-off-by: David S. Miller --- drivers/net/ibm_newemac/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c index 3506fd6ad72..519e19e2395 100644 --- a/drivers/net/ibm_newemac/core.c +++ b/drivers/net/ibm_newemac/core.c @@ -2928,7 +2928,7 @@ static int __devinit emac_probe(struct platform_device *ofdev, if (dev->emac_irq != NO_IRQ) irq_dispose_mapping(dev->emac_irq); err_free: - kfree(ndev); + free_netdev(ndev); err_gone: /* if we were on the bootlist, remove us as we won't show up and * wake up all waiters to notify them in case they were waiting @@ -2971,7 +2971,7 @@ static int __devexit emac_remove(struct platform_device *ofdev) if (dev->emac_irq != NO_IRQ) irq_dispose_mapping(dev->emac_irq); - kfree(dev->ndev); + free_netdev(dev->ndev); return 0; } -- cgit v1.2.3-70-g09d2 From 22138d307329e1968fc698821095b87c2fd5de12 Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Sat, 25 Sep 2010 23:58:03 +0000 Subject: rionet: use free_netdev(netdev) instead of kfree() Freeing netdev without free_netdev() leads to net, tx leaks. I might lead to dereferencing freed pointer. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) @@ struct net_device* dev; @@ -kfree(dev) +free_netdev(dev) Signed-off-by: David S. Miller --- drivers/net/rionet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 07eb884ff98..44150f2f7bf 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -384,7 +384,7 @@ static void rionet_remove(struct rio_dev *rdev) free_pages((unsigned long)rionet_active, rdev->net->hport->sys_size ? __ilog2(sizeof(void *)) + 4 : 0); unregister_netdev(ndev); - kfree(ndev); + free_netdev(ndev); list_for_each_entry_safe(peer, tmp, &rionet_peers, node) { list_del(&peer->node); -- cgit v1.2.3-70-g09d2 From 8d879de89807d82bc4cc3e9d73609b874fa9458c Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Sat, 25 Sep 2010 23:58:06 +0000 Subject: sgiseeq: use free_netdev(netdev) instead of kfree() Freeing netdev without free_netdev() leads to net, tx leaks. I might lead to dereferencing freed pointer. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) @@ struct net_device* dev; @@ -kfree(dev) +free_netdev(dev) Signed-off-by: David S. Miller --- drivers/net/sgiseeq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c index cc4bd8c65f8..9265315baa0 100644 --- a/drivers/net/sgiseeq.c +++ b/drivers/net/sgiseeq.c @@ -804,7 +804,7 @@ static int __devinit sgiseeq_probe(struct platform_device *pdev) err_out_free_page: free_page((unsigned long) sp->srings); err_out_free_dev: - kfree(dev); + free_netdev(dev); err_out: return err; -- cgit v1.2.3-70-g09d2 From bc68580d41b131396054a1a04a7df4948555ed97 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sun, 26 Sep 2010 18:56:06 -0700 Subject: s390: use free_netdev(netdev) instead of kfree() Freeing netdev without free_netdev() leads to net, tx leaks. I might lead to dereferencing freed pointer. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) @@ struct net_device* dev; @@ -kfree(dev) +free_netdev(dev) Signed-off-by: David S. Miller --- drivers/s390/net/ctcm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 6edf20b62de..2c7d2d9be4d 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1154,7 +1154,7 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv) dev_fsm, dev_fsm_len, GFP_KERNEL); if (priv->fsm == NULL) { CTCMY_DBF_DEV(SETUP, dev, "init_fsm error"); - kfree(dev); + free_netdev(dev); return NULL; } fsm_newstate(priv->fsm, DEV_STATE_STOPPED); @@ -1165,7 +1165,7 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv) grp = ctcmpc_init_mpc_group(priv); if (grp == NULL) { MPC_DBF_DEV(SETUP, dev, "init_mpc_group error"); - kfree(dev); + free_netdev(dev); return NULL; } tasklet_init(&grp->mpc_tasklet2, -- cgit v1.2.3-70-g09d2 From 2cc6d2bf3d6195fabcf0febc192c01f99519a8f3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 24 Sep 2010 09:55:52 +0000 Subject: ipv6: add a missing unregister_pernet_subsys call Clean up a missing exit path in the ipv6 module init routines. In addrconf_init we call ipv6_addr_label_init which calls register_pernet_subsys for the ipv6_addr_label_ops structure. But if module loading fails, or if the ipv6 module is removed, there is no corresponding unregister_pernet_subsys call, which leaves a now-bogus address on the pernet_list, leading to oopses in subsequent registrations. This patch cleans up both the failed load path and the unload path. Tested by myself with good results. Signed-off-by: Neil Horman include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 11 ++++++++--- net/ipv6/addrlabel.c | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/addrconf.c | 11 ++++++++--- net/ipv6/addrlabel.c | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b41a2a..4d40c4d0230 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) * IPv6 Address Label subsystem (addrlabel.c) */ extern int ipv6_addr_label_init(void); +extern void ipv6_addr_label_cleanup(void); extern void ipv6_addr_label_rtnl_register(void); extern u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab70a3fbcaf..324fac3b6c1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4637,10 +4637,12 @@ int __init addrconf_init(void) if (err < 0) { printk(KERN_CRIT "IPv6 Addrconf:" " cannot initialize default policy table: %d.\n", err); - return err; + goto out; } - register_pernet_subsys(&addrconf_ops); + err = register_pernet_subsys(&addrconf_ops); + if (err < 0) + goto out_addrlabel; /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4692,7 +4694,9 @@ errout: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); - +out_addrlabel: + ipv6_addr_label_cleanup(); +out: return err; } @@ -4703,6 +4707,7 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); + ipv6_addr_label_cleanup(); rtnl_lock(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f0e774cea38..8175f802651 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void) return register_pernet_subsys(&ipv6_addr_label_ops); } +void ipv6_addr_label_cleanup(void) +{ + unregister_pernet_subsys(&ipv6_addr_label_ops); +} + static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, [IFAL_LABEL] = { .len = sizeof(u32), }, -- cgit v1.2.3-70-g09d2 From 3fd6c88ef875a14740801ebfc6b6e4e064a1cdd4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 27 Sep 2010 11:07:00 -0700 Subject: 3c59x: fix regression from patch "Add ethtool WOL support" This patch (commit 690a1f2002a3091bd18a501f46c9530f10481463) added a new call site for acpi_set_WOL() without checking that the function is actually suitable to be called via vortex_set_wol+0xcd/0xe0 [3c59x] dev_ethtool+0xa5a/0xb70 dev_ioctl+0x2e0/0x4b0 T.961+0x49/0x50 sock_ioctl+0x47/0x290 do_vfs_ioctl+0x7f/0x340 sys_ioctl+0x80/0xa0 system_call_fastpath+0x16/0x1b i.e. outside of code paths run when the device is not yet enabled or already disabled. In particular, putting the device into D3hot is a pretty bad idea when it was already brought up. Furthermore, all prior callers of the function made sure they're actually dealing with a PCI device, while the newly added one didn't. In the same spirit, the .get_wol handler shouldn't indicate support for WOL for non-PCI devices. Signed-off-by: Jan Beulich Signed-off-by: David S. Miller --- drivers/net/3c59x.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index fa42103b287..179871d9e71 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -2942,6 +2942,9 @@ static void vortex_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct vortex_private *vp = netdev_priv(dev); + if (!VORTEX_PCI(vp)) + return; + wol->supported = WAKE_MAGIC; wol->wolopts = 0; @@ -2952,6 +2955,10 @@ static void vortex_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int vortex_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct vortex_private *vp = netdev_priv(dev); + + if (!VORTEX_PCI(vp)) + return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_MAGIC) return -EINVAL; @@ -3201,6 +3208,9 @@ static void acpi_set_WOL(struct net_device *dev) return; } + if (VORTEX_PCI(vp)->current_state < PCI_D3hot) + return; + /* Change the power state to D3; RxEnable doesn't take effect. */ pci_set_power_state(VORTEX_PCI(vp), PCI_D3hot); } -- cgit v1.2.3-70-g09d2 From b3de7559afbb7a8a35b4be975a6adf6c5e3cdca0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 24 Sep 2010 13:22:06 +0000 Subject: tcp: fix TSO FACK loss marking in tcp_mark_head_lost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When TCP uses FACK algorithm to mark lost packets in tcp_mark_head_lost(), if the number of packets in the (TSO) skb is greater than the number of packets that should be marked lost, TCP incorrectly exits the loop and marks no packets lost in the skb. This underestimates tp->lost_out and affects the recovery/retransmission. This patch fargments the skb and marks the correct amount of packets lost. Signed-off-by: Yuchung Cheng Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 149e79ac289..b55f60f6fcb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2545,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) cnt += tcp_skb_pcount(skb); if (cnt > packets) { - if (tcp_is_sack(tp) || (oldcnt >= packets)) + if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (oldcnt >= packets)) break; mss = skb_shinfo(skb)->gso_size; -- cgit v1.2.3-70-g09d2 From 7e1b33e5ea392dfc984fc63b76ca75acbf249dcd Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 27 Sep 2010 15:02:18 -0700 Subject: ipv6: add IPv6 to neighbour table overflow warning IPv4 and IPv6 have separate neighbour tables, so the warning messages should be distinguishable. [ Add a suitable message prefix on the ipv4 side as well -DaveM ] Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- net/ipv6/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6298f75d5e9..ac6559cb54f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1231,7 +1231,7 @@ restart: } if (net_ratelimit()) - printk(KERN_WARNING "Neighbour table overflow.\n"); + printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); rt_drop(rt); return -ENOBUFS; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d126365ac04..8323136bdc5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -670,7 +670,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING - "Neighbour table overflow.\n"); + "ipv6: Neighbour table overflow.\n"); dst_free(&rt->dst); return NULL; } -- cgit v1.2.3-70-g09d2 From fb0c5f0bc8b69b40549449ee7fc65f3706f12062 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 27 Sep 2010 03:31:00 +0000 Subject: tproxy: check for transparent flag in ip_route_newports as done in ip_route_connect() Signed-off-by: Ulrich Weber Signed-off-by: David S. Miller --- include/net/route.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/route.h b/include/net/route.h index bd732d62e1c..7e5e73bfa4d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -199,6 +199,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; fl.proto = protocol; + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, &fl); -- cgit v1.2.3-70-g09d2 From 387a85628782690b56492dae4bbf544639f5d4a9 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 27 Sep 2010 11:41:45 +0000 Subject: de2104x: fix ethtool When the interface is up, using ethtool breaks it because: a) link is put down but media_timer interval is not shortened to NO_LINK b) rxtx is stopped but not restarted Also manual 10baseT-HD (and probably FD too - untested) mode does not work - the link is forced up, packets are transmitted but nothing is received. Changing CSR14 value to match documentation (not disabling link check) fixes this. Signed-off-by: Ondrej Zary Acked-by: Jeff Garzik Signed-off-by: David S. Miller --- drivers/net/tulip/de2104x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 9124c5c638d..6888e3d4146 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -364,9 +364,9 @@ static u16 t21040_csr15[] = { 0, 0, 0x0006, 0x0000, 0x0000, }; /* 21041 transceiver register settings: TP AUTO, BNC, AUI, TP, TP FD*/ static u16 t21041_csr13[] = { 0xEF01, 0xEF09, 0xEF09, 0xEF01, 0xEF09, }; -static u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, }; +static u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, }; /* If on-chip autonegotiation is broken, use half-duplex (FF3F) instead */ -static u16 t21041_csr14_brk[] = { 0xFF3F, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, }; +static u16 t21041_csr14_brk[] = { 0xFF3F, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, }; static u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, }; @@ -1596,12 +1596,15 @@ static int __de_set_settings(struct de_private *de, struct ethtool_cmd *ecmd) return 0; /* nothing to change */ de_link_down(de); + mod_timer(&de->media_timer, jiffies + DE_TIMER_NO_LINK); de_stop_rxtx(de); de->media_type = new_media; de->media_lock = media_lock; de->media_advertise = ecmd->advertising; de_set_media(de); + if (netif_running(de->dev)) + de_start_rxtx(de); return 0; } -- cgit v1.2.3-70-g09d2 From 0b20406cda621c2495d10baab1e87127ceb43337 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 27 Sep 2010 15:54:44 -0700 Subject: net/9p: Mount only matching virtio channels p9_virtio_create will only compare the the channel's tag characters against the device name till the end of the channel's tag but not till the end of the device name. This means that if a user defines channels with the tags foo and foobar then he would mount foo when he requested foonot and may mount foo when he requested foobar. Thus it is necessary to check both string lengths against each other in case of a successful partial string match. Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/9p/trans_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index dcfbe99ff81..b88515936e4 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -329,7 +329,8 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_lock(&virtio_9p_lock); list_for_each_entry(chan, &virtio_chan_list, chan_list) { - if (!strncmp(devname, chan->tag, chan->tag_len)) { + if (!strncmp(devname, chan->tag, chan->tag_len) && + strlen(devname) == chan->tag_len) { if (!chan->inuse) { chan->inuse = true; found = 1; -- cgit v1.2.3-70-g09d2 From 01db403cf99f739f86903314a489fb420e0e254f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Sep 2010 20:24:54 -0700 Subject: tcp: Fix >4GB writes on 64-bit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes kernel bugzilla #16603 tcp_sendmsg() truncates iov_len to an 'int' which a 4GB write to write zero bytes, for example. There is also the problem higher up of how verify_iovec() works. It wants to prevent the total length from looking like an error return value. However it does this using 'int', but syscalls return 'long' (and thus signed 64-bit on 64-bit machines). So it could trigger false-positives on 64-bit as written. So fix it to use 'long'. Reported-by: Olaf Bonorden Reported-by: Daniel Büse Reported-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- net/core/iovec.c | 5 +++-- net/ipv4/tcp.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index a2fada9becb..a8f56e1ec76 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, int offset, unsigned int len, __wsum *csump); -extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); +extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); diff --git a/net/core/iovec.c b/net/core/iovec.c index 1cd98df412d..e6b133b77cc 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -35,9 +35,10 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) +long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { - int size, err, ct; + int size, ct; + long err; if (m->msg_namelen) { if (mode == VERIFY_READ) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 95d75d44392..f115ea68a4e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -943,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = sk->sk_route_caps & NETIF_F_SG; while (--iovlen >= 0) { - int seglen = iov->iov_len; + size_t seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; -- cgit v1.2.3-70-g09d2