diff options
Diffstat (limited to 'net')
145 files changed, 2571 insertions, 2410 deletions
diff --git a/net/atm/mpc.c b/net/atm/mpc.c index e8e0e7a8a23..0e982222d42 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, } non_ip: - return mpc->old_ops->ndo_start_xmit(skb, dev); + return __netdev_start_xmit(mpc->old_ops, skb, dev, false); } static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7751c92c8c5..648d79ccf46 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1822,7 +1822,7 @@ static void br_multicast_query_expired(struct net_bridge *br, if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; - rcu_assign_pointer(querier, NULL); + RCU_INIT_POINTER(querier, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } diff --git a/net/core/dev.c b/net/core/dev.c index ab9a16530c3..3c6a967e583 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) return 0; } -struct dev_gso_cb { - void (*destructor)(struct sk_buff *skb); -}; - -#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) - -static void dev_gso_skb_destructor(struct sk_buff *skb) -{ - struct dev_gso_cb *cb; - - kfree_skb_list(skb->next); - skb->next = NULL; - - cb = DEV_GSO_CB(skb); - if (cb->destructor) - cb->destructor(skb); -} - -/** - * dev_gso_segment - Perform emulated hardware segmentation on skb. - * @skb: buffer to segment - * @features: device features as applicable to this skb - * - * This function segments the given skb and stores the list of segments - * in skb->next. - */ -static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) -{ - struct sk_buff *segs; - - segs = skb_gso_segment(skb, features); - - /* Verifying header integrity only. */ - if (!segs) - return 0; - - if (IS_ERR(segs)) - return PTR_ERR(segs); - - skb->next = segs; - DEV_GSO_CB(skb)->destructor = skb->destructor; - skb->destructor = dev_gso_skb_destructor; - - return 0; -} - /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ @@ -2605,119 +2559,125 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) } EXPORT_SYMBOL(netif_skb_features); -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) +static int xmit_one(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, bool more) { - const struct net_device_ops *ops = dev->netdev_ops; + unsigned int len; + int rc; + + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + + len = skb->len; + trace_net_dev_start_xmit(skb, dev); + rc = netdev_start_xmit(skb, dev, txq, more); + trace_net_dev_xmit(skb, rc, dev, len); + + return rc; +} + +struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev, + struct netdev_queue *txq, int *ret) +{ + struct sk_buff *skb = first; int rc = NETDEV_TX_OK; - unsigned int skb_len; - if (likely(!skb->next)) { - netdev_features_t features; + while (skb) { + struct sk_buff *next = skb->next; - /* - * If device doesn't need skb->dst, release it right now while - * its hot in this cpu cache - */ - if (dev->priv_flags & IFF_XMIT_DST_RELEASE) - skb_dst_drop(skb); + skb->next = NULL; + rc = xmit_one(skb, dev, txq, next != NULL); + if (unlikely(!dev_xmit_complete(rc))) { + skb->next = next; + goto out; + } - features = netif_skb_features(skb); + skb = next; + if (netif_xmit_stopped(txq) && skb) { + rc = NETDEV_TX_BUSY; + break; + } + } - if (vlan_tx_tag_present(skb) && - !vlan_hw_offload_capable(features, skb->vlan_proto)) { - skb = __vlan_put_tag(skb, skb->vlan_proto, - vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - goto out; +out: + *ret = rc; + return skb; +} +struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) +{ + if (vlan_tx_tag_present(skb) && + !vlan_hw_offload_capable(features, skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (skb) skb->vlan_tci = 0; - } + } + return skb; +} - /* If encapsulation offload request, verify we are testing - * hardware encapsulation features instead of standard - * features for the netdev - */ - if (skb->encapsulation) - features &= dev->hw_enc_features; +struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) +{ + netdev_features_t features; - if (netif_needs_gso(skb, features)) { - if (unlikely(dev_gso_segment(skb, features))) - goto out_kfree_skb; - if (skb->next) - goto gso; - } else { - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) - goto out_kfree_skb; + if (skb->next) + return skb; - /* If packet is not checksummed and device does not - * support checksumming for this protocol, complete - * checksumming here. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (skb->encapsulation) - skb_set_inner_transport_header(skb, - skb_checksum_start_offset(skb)); - else - skb_set_transport_header(skb, - skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_ALL_CSUM) && - skb_checksum_help(skb)) - goto out_kfree_skb; - } - } + /* If device doesn't need skb->dst, release it right now while + * its hot in this cpu cache + */ + if (dev->priv_flags & IFF_XMIT_DST_RELEASE) + skb_dst_drop(skb); - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); + features = netif_skb_features(skb); + skb = validate_xmit_vlan(skb, features); + if (unlikely(!skb)) + goto out_null; - skb_len = skb->len; - trace_net_dev_start_xmit(skb, dev); - rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc, dev, skb_len); - if (rc == NETDEV_TX_OK) - txq_trans_update(txq); - return rc; - } + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; -gso: - do { - struct sk_buff *nskb = skb->next; + if (netif_needs_gso(skb, features)) { + struct sk_buff *segs; - skb->next = nskb->next; - nskb->next = NULL; + segs = skb_gso_segment(skb, features); + kfree_skb(skb); + if (IS_ERR(segs)) + segs = NULL; + skb = segs; + } else { + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) + goto out_kfree_skb; - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(nskb, dev); - - skb_len = nskb->len; - trace_net_dev_start_xmit(nskb, dev); - rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc, dev, skb_len); - if (unlikely(rc != NETDEV_TX_OK)) { - if (rc & ~NETDEV_TX_MASK) - goto out_kfree_gso_skb; - nskb->next = skb->next; - skb->next = nskb; - return rc; + /* If packet is not checksummed and device does not + * support checksumming for this protocol, complete + * checksumming here. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->encapsulation) + skb_set_inner_transport_header(skb, + skb_checksum_start_offset(skb)); + else + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); + if (!(features & NETIF_F_ALL_CSUM) && + skb_checksum_help(skb)) + goto out_kfree_skb; } - txq_trans_update(txq); - if (unlikely(netif_xmit_stopped(txq) && skb->next)) - return NETDEV_TX_BUSY; - } while (skb->next); - -out_kfree_gso_skb: - if (likely(skb->next == NULL)) { - skb->destructor = DEV_GSO_CB(skb)->destructor; - consume_skb(skb); - return rc; } + + return skb; + out_kfree_skb: kfree_skb(skb); -out: - return rc; +out_null: + return NULL; } -EXPORT_SYMBOL_GPL(dev_hard_start_xmit); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -2785,7 +2745,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_bstats_update(q, skb); - if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { + skb = validate_xmit_skb(skb, dev); + if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) { if (unlikely(contended)) { spin_unlock(&q->busylock); contended = false; @@ -2925,11 +2886,15 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) goto recursion_alert; + skb = validate_xmit_skb(skb, dev); + if (!skb) + goto drop; + HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); - rc = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &rc); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); @@ -2950,10 +2915,11 @@ recursion_alert: } rc = -ENETDOWN; +drop: rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return rc; out: rcu_read_unlock_bh(); @@ -3130,8 +3096,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } if (map) { - tcpu = map->cpus[((u64) hash * map->len) >> 32]; - + tcpu = map->cpus[reciprocal_scale(hash, map->len)]; if (cpu_online(tcpu)) { cpu = tcpu; goto done; @@ -3965,11 +3930,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb)) + if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) goto normal; gro_list_prepare(napi, skb); - NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3983,6 +3947,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + /* Setup for GRO checksum validation */ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + NAPI_GRO_CB(skb)->csum_cnt = 0; + break; + case CHECKSUM_UNNECESSARY: + NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; + NAPI_GRO_CB(skb)->csum_valid = 0; + break; + default: + NAPI_GRO_CB(skb)->csum_cnt = 0; + NAPI_GRO_CB(skb)->csum_valid = 0; + } + pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; } @@ -4212,6 +4192,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* Compute the checksum from gro_offset and return the folded value + * after adding in any pseudo checksum. + */ +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) +{ + __wsum wsum; + __sum16 sum; + + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); + + /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ + sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + NAPI_GRO_CB(skb)->csum = wsum; + NAPI_GRO_CB(skb)->csum_valid = 1; + + return sum; +} +EXPORT_SYMBOL(__skb_gro_checksum_complete); + /* * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index cf999e09bcd..72e899a3efd 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -365,11 +365,8 @@ void dev_load(struct net *net, const char *name) no_module = !dev; if (no_module && capable(CAP_NET_ADMIN)) no_module = request_module("netdev-%s", name); - if (no_module && capable(CAP_SYS_MODULE)) { - if (!request_module("%s", name)) - pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n", - name); - } + if (no_module && capable(CAP_SYS_MODULE)) + request_module("%s", name); } EXPORT_SYMBOL(dev_load); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17cb912793f..27e61b88652 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } +static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) +{ + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + if (tuna->len != sizeof(u32) || + tuna->type_id != ETHTOOL_TUNABLE_U32) + return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) +{ + int ret; + struct ethtool_tunable tuna; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data; + + if (!ops->get_tunable) + return -EOPNOTSUPP; + if (copy_from_user(&tuna, useraddr, sizeof(tuna))) + return -EFAULT; + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; + data = kmalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + ret = ops->get_tunable(dev, &tuna, data); + if (ret) + goto out; + useraddr += sizeof(tuna); + ret = -EFAULT; + if (copy_to_user(useraddr, data, tuna.len)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) +{ + int ret; + struct ethtool_tunable tuna; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data; + + if (!ops->set_tunable) + return -EOPNOTSUPP; + if (copy_from_user(&tuna, useraddr, sizeof(tuna))) + return -EFAULT; + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; + data = kmalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + useraddr += sizeof(tuna); + ret = -EFAULT; + if (copy_from_user(data, useraddr, tuna.len)) + goto out; + ret = ops->set_tunable(dev, &tuna, data); + +out: + kfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: case ETHTOOL_GEEE: + case ETHTOOL_GTUNABLE: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GMODULEEEPROM: rc = ethtool_get_module_eeprom(dev, useraddr); break; + case ETHTOOL_GTUNABLE: + rc = ethtool_get_tunable(dev, useraddr); + break; + case ETHTOOL_STUNABLE: + rc = ethtool_set_tunable(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/filter.c b/net/core/filter.c index d814b8a89d0..fa5b7d0f77a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -113,7 +113,7 @@ static unsigned int pkt_type_offset(void) static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - return __skb_get_poff((struct sk_buff *)(unsigned long) ctx); + return skb_get_poff((struct sk_buff *)(unsigned long) ctx); } static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) @@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); + fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); if (!fp) return -ENOMEM; @@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - prog = kmalloc(bpf_fsize, GFP_KERNEL); + prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) return -ENOMEM; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5f362c1d033..8560dea5880 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -13,6 +13,7 @@ #include <linux/if_pppox.h> #include <linux/ppp_defs.h> #include <net/flow_keys.h> +#include <scsi/fc/fc_fcoe.h> /* copy saddr & daddr, possibly using 64bit load/store * Equivalent to : flow->src = iph->saddr; @@ -26,36 +27,61 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i } /** - * skb_flow_get_ports - extract the upper layer ports and return them - * @skb: buffer to extract the ports from + * __skb_flow_get_ports - extract the upper layer ports and return them + * @skb: sk_buff to extract the ports from * @thoff: transport header offset * @ip_proto: protocol for which to get port offset + * @data: raw buffer pointer to the packet, if NULL use skb->data + * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset */ -__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen) { int poff = proto_ports_offset(ip_proto); + if (!data) { + data = skb->data; + hlen = skb_headlen(skb); + } + if (poff >= 0) { __be32 *ports, _ports; - ports = skb_header_pointer(skb, thoff + poff, - sizeof(_ports), &_ports); + ports = __skb_header_pointer(skb, thoff + poff, + sizeof(_ports), data, hlen, &_ports); if (ports) return *ports; } return 0; } -EXPORT_SYMBOL(skb_flow_get_ports); +EXPORT_SYMBOL(__skb_flow_get_ports); -bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) +/** + * __skb_flow_dissect - extract the flow_keys struct and return it + * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified + * @data: raw buffer pointer to the packet, if NULL use skb->data + * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol + * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) + * @hlen: packet header length, if @data is NULL use skb_headlen(skb) + * + * The function will try to retrieve the struct flow_keys from either the skbuff + * or a raw buffer specified by the rest parameters + */ +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + void *data, __be16 proto, int nhoff, int hlen) { - int nhoff = skb_network_offset(skb); u8 ip_proto; - __be16 proto = skb->protocol; + + if (!data) { + data = skb->data; + proto = skb->protocol; + nhoff = skb_network_offset(skb); + hlen = skb_headlen(skb); + } memset(flow, 0, sizeof(*flow)); @@ -65,7 +91,7 @@ again: const struct iphdr *iph; struct iphdr _iph; ip: - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph || iph->ihl < 5) return false; nhoff += iph->ihl * 4; @@ -83,7 +109,7 @@ ip: __be32 flow_label; ipv6: - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph) return false; @@ -92,6 +118,13 @@ ipv6: flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); + /* skip the flow label processing if skb is NULL. The + * assumption here is that if there is no skb we are not + * looking for flow info as much as we are length. + */ + if (!skb) + break; + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -113,7 +146,7 @@ ipv6: const struct vlan_hdr *vlan; struct vlan_hdr _vlan; - vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); + vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) return false; @@ -126,7 +159,7 @@ ipv6: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; - hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; proto = hdr->proto; @@ -140,6 +173,9 @@ ipv6: return false; } } + case htons(ETH_P_FCOE): + flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + /* fall through */ default: return false; } @@ -151,7 +187,7 @@ ipv6: __be16 proto; } *hdr, _hdr; - hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; /* @@ -171,8 +207,9 @@ ipv6: const struct ethhdr *eth; struct ethhdr _eth; - eth = skb_header_pointer(skb, nhoff, - sizeof(_eth), &_eth); + eth = __skb_header_pointer(skb, nhoff, + sizeof(_eth), + data, hlen, &_eth); if (!eth) return false; proto = eth->h_proto; @@ -194,12 +231,12 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; return true; } -EXPORT_SYMBOL(skb_flow_dissect); +EXPORT_SYMBOL(__skb_flow_dissect); static u32 hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) @@ -286,30 +323,22 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, qcount = dev->tc_to_txq[tc].count; } - return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset; + return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); -/* __skb_get_poff() returns the offset to the payload as far as it could - * be dissected. The main user is currently BPF, so that we can dynamically - * truncate packets without needing to push actual payload to the user - * space and can analyze headers only, instead. - */ -u32 __skb_get_poff(const struct sk_buff *skb) +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen) { - struct flow_keys keys; - u32 poff = 0; - - if (!skb_flow_dissect(skb, &keys)) - return 0; + u32 poff = keys->thoff; - poff += keys.thoff; - switch (keys.ip_proto) { + switch (keys->ip_proto) { case IPPROTO_TCP: { const struct tcphdr *tcph; struct tcphdr _tcph; - tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), + data, hlen, &_tcph); if (!tcph) return poff; @@ -343,6 +372,21 @@ u32 __skb_get_poff(const struct sk_buff *skb) return poff; } +/* skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); +} + static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS @@ -359,9 +403,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) if (map->len == 1) queue_index = map->queues[0]; else - queue_index = map->queues[ - ((u64)skb_get_hash(skb) * map->len) >> 32]; - + queue_index = map->queues[reciprocal_scale(skb_get_hash(skb), + map->len)]; if (unlikely(queue_index >= dev->real_num_tx_queues)) queue_index = -1; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 907fb5e36c0..e6645b4f330 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644); static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { - const struct net_device_ops *ops = dev->netdev_ops; int status = NETDEV_TX_OK; netdev_features_t features; @@ -92,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = ops->ndo_start_xmit(skb, dev); - if (status == NETDEV_TX_OK) - txq_trans_update(txq); + status = netdev_start_xmit(skb, dev, txq, false); out: return status; @@ -116,7 +113,7 @@ static void queue_process(struct work_struct *work) continue; } - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); local_irq_save(flags); HARD_TX_LOCK(dev, txq, smp_processor_id()); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8b849ddfef2..21cb4839bc9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -202,6 +202,7 @@ #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ #define F_NODE (1<<15) /* Node memory alloc*/ #define F_UDPCSUM (1<<16) /* Include UDP checksum */ +#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */ /* Thread control flag bits */ #define T_STOP (1<<0) /* Stop run */ @@ -638,6 +639,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->flags & F_UDPCSUM) seq_puts(seq, "UDPCSUM "); + if (pkt_dev->flags & F_NO_TIMESTAMP) + seq_puts(seq, "NO_TIMESTAMP "); + if (pkt_dev->flags & F_MPLS_RND) seq_puts(seq, "MPLS_RND "); @@ -1243,6 +1247,9 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "!UDPCSUM") == 0) pkt_dev->flags &= ~F_UDPCSUM; + else if (strcmp(f, "NO_TIMESTAMP") == 0) + pkt_dev->flags |= F_NO_TIMESTAMP; + else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", @@ -1251,6 +1258,7 @@ static ssize_t pktgen_if_write(struct file *file, "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, " "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, " "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, " + "NO_TIMESTAMP, " #ifdef CONFIG_XFRM "IPSEC, " #endif @@ -2685,9 +2693,14 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->pgh_magic = htonl(PKTGEN_MAGIC); pgh->seq_num = htonl(pkt_dev->seq_num); - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); + if (pkt_dev->flags & F_NO_TIMESTAMP) { + pgh->tv_sec = 0; + pgh->tv_usec = 0; + } else { + do_gettimeofday(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_usec); + } } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, @@ -3285,10 +3298,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; - netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) - = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; - u16 queue_map; int ret; /* If device is offline, then don't send */ @@ -3326,8 +3336,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->delay && pkt_dev->last_ok) spin(pkt_dev, pkt_dev->next_tx); - queue_map = skb_get_queue_mapping(pkt_dev->skb); - txq = netdev_get_tx_queue(odev, queue_map); + txq = skb_get_tx_queue(odev, pkt_dev->skb); local_bh_disable(); @@ -3339,11 +3348,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = (*xmit)(pkt_dev->skb, odev); + ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false); switch (ret) { case NETDEV_TX_OK: - txq_trans_update(txq); pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f0493e3b747..a6882686ca3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1481,9 +1481,12 @@ static int do_set_master(struct net_device *dev, int ifindex) return 0; } +#define DO_SETLINK_MODIFIED 0x01 +/* notify flag means notify + modified. */ +#define DO_SETLINK_NOTIFY 0x03 static int do_setlink(const struct sk_buff *skb, struct net_device *dev, struct ifinfomsg *ifm, - struct nlattr **tb, char *ifname, int modified) + struct nlattr **tb, char *ifname, int status) { const struct net_device_ops *ops = dev->netdev_ops; int err; @@ -1502,7 +1505,7 @@ static int do_setlink(const struct sk_buff *skb, put_net(net); if (err) goto errout; - modified = 1; + status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_MAP]) { @@ -1531,7 +1534,7 @@ static int do_setlink(const struct sk_buff *skb, if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_ADDRESS]) { @@ -1551,19 +1554,19 @@ static int do_setlink(const struct sk_buff *skb, kfree(sa); if (err) goto errout; - modified = 1; + status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_MTU]) { err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GROUP]) { dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); - modified = 1; + status |= DO_SETLINK_NOTIFY; } /* @@ -1575,7 +1578,7 @@ static int do_setlink(const struct sk_buff *skb, err = dev_change_name(dev, ifname); if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_IFALIAS]) { @@ -1583,7 +1586,7 @@ static int do_setlink(const struct sk_buff *skb, nla_len(tb[IFLA_IFALIAS])); if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_BROADCAST]) { @@ -1601,25 +1604,35 @@ static int do_setlink(const struct sk_buff *skb, err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); if (err) goto errout; - modified = 1; + status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_CARRIER]) { err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) goto errout; - modified = 1; + status |= DO_SETLINK_MODIFIED; } - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_TXQLEN]) { + unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); + + if (dev->tx_queue_len ^ value) + status |= DO_SETLINK_NOTIFY; + + dev->tx_queue_len = value; + } if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) { + unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]); + write_lock_bh(&dev_base_lock); - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + if (dev->link_mode ^ value) + status |= DO_SETLINK_NOTIFY; + dev->link_mode = value; write_unlock_bh(&dev_base_lock); } @@ -1634,7 +1647,7 @@ static int do_setlink(const struct sk_buff *skb, err = do_setvfinfo(dev, attr); if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_NOTIFY; } } err = 0; @@ -1664,7 +1677,7 @@ static int do_setlink(const struct sk_buff *skb, err = ops->ndo_set_vf_port(dev, vf, port); if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_NOTIFY; } } err = 0; @@ -1682,7 +1695,7 @@ static int do_setlink(const struct sk_buff *skb, err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port); if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_AF_SPEC]) { @@ -1699,15 +1712,20 @@ static int do_setlink(const struct sk_buff *skb, if (err < 0) goto errout; - modified = 1; + status |= DO_SETLINK_NOTIFY; } } err = 0; errout: - if (err < 0 && modified) - net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", - dev->name); + if (status & DO_SETLINK_MODIFIED) { + if (status & DO_SETLINK_NOTIFY) + netdev_state_change(dev); + + if (err < 0) + net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", + dev->name); + } return err; } @@ -1989,7 +2007,7 @@ replay: } if (dev) { - int modified = 0; + int status = 0; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; @@ -2004,7 +2022,7 @@ replay: err = ops->changelink(dev, tb, data); if (err < 0) return err; - modified = 1; + status |= DO_SETLINK_NOTIFY; } if (linkinfo[IFLA_INFO_SLAVE_DATA]) { @@ -2015,10 +2033,10 @@ replay: tb, slave_data); if (err < 0) return err; - modified = 1; + status |= DO_SETLINK_NOTIFY; } - return do_setlink(skb, dev, ifm, tb, ifname, modified); + return do_setlink(skb, dev, ifm, tb, ifname, status); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index ba71212f025..51dd3193a33 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq) * overlaps less than one time per MSL (2 minutes). * Choosing a clock of 64 ns period is OK. (period of 274 s) */ - return seq + (ktime_to_ns(ktime_get_real()) >> 6); + return seq + (ktime_get_real_ns() >> 6); } #endif @@ -135,7 +135,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, md5_transform(hash, net_secret); seq = hash[0] | (((u64)hash[1]) << 32); - seq += ktime_to_ns(ktime_get_real()); + seq += ktime_get_real_ns(); seq &= (1ull << 48) - 1; return seq; @@ -163,7 +163,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, md5_transform(hash, secret); seq = hash[0] | (((u64)hash[1]) << 32); - seq += ktime_to_ns(ktime_get_real()); + seq += ktime_get_real_ns(); seq &= (1ull << 48) - 1; return seq; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da1378a3e2c..a18dfb02d94 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3491,32 +3491,53 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sock_queue_err_skb); -void __skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps, - struct sock *sk, int tstype) +struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { - struct sock_exterr_skb *serr; - struct sk_buff *skb; - int err; + struct sk_buff_head *q = &sk->sk_error_queue; + struct sk_buff *skb, *skb_next; + int err = 0; - if (!sk) - return; + spin_lock_bh(&q->lock); + skb = __skb_dequeue(q); + if (skb && (skb_next = skb_peek(q))) + err = SKB_EXT_ERR(skb_next)->ee.ee_errno; + spin_unlock_bh(&q->lock); - if (hwtstamps) { - *skb_hwtstamps(orig_skb) = - *hwtstamps; - } else { - /* - * no hardware time stamps available, - * so keep the shared tx_flags and only - * store software time stamp - */ - orig_skb->tstamp = ktime_get_real(); + sk->sk_err = err; + if (err) + sk->sk_error_report(sk); + + return skb; +} +EXPORT_SYMBOL(sock_dequeue_err_skb); + +struct sk_buff *skb_clone_sk(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct sk_buff *clone; + + if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) + return NULL; + + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + sock_put(sk); + return NULL; } - skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!skb) - return; + clone->sk = sk; + clone->destructor = sock_efree; + + return clone; +} +EXPORT_SYMBOL(skb_clone_sk); + +static void __skb_complete_tx_timestamp(struct sk_buff *skb, + struct sock *sk, + int tstype) +{ + struct sock_exterr_skb *serr; + int err; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); @@ -3534,6 +3555,42 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (err) kfree_skb(skb); } + +void skb_complete_tx_timestamp(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct sock *sk = skb->sk; + + /* take a reference to prevent skb_orphan() from freeing the socket */ + sock_hold(sk); + + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + + sock_put(sk); +} +EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); + +void __skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps, + struct sock *sk, int tstype) +{ + struct sk_buff *skb; + + if (!sk) + return; + + if (hwtstamps) + *skb_hwtstamps(orig_skb) = *hwtstamps; + else + orig_skb->tstamp = ktime_get_real(); + + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) + return; + + __skb_complete_tx_timestamp(skb, sk, tstype); +} EXPORT_SYMBOL_GPL(__skb_tstamp_tx); void skb_tstamp_tx(struct sk_buff *orig_skb, diff --git a/net/core/sock.c b/net/core/sock.c index d372b4bd3f9..6f436b5e496 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -437,7 +437,6 @@ static void sock_disable_timestamp(struct sock *sk, unsigned long flags) int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; - int skb_len; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; @@ -459,13 +458,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; skb_set_owner_r(skb, sk); - /* Cache the SKB length before we tack it onto the receive - * queue. Once it is added it no longer belongs to us and - * may be freed by other threads of control pulling packets - * from the queue. - */ - skb_len = skb->len; - /* we escape from rcu protected region, make sure we dont leak * a norefcounted dst */ @@ -1645,18 +1637,24 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +void sock_efree(struct sk_buff *skb) +{ + sock_put(skb->sk); +} +EXPORT_SYMBOL(sock_efree); + +#ifdef CONFIG_INET void sock_edemux(struct sk_buff *skb) { struct sock *sk = skb->sk; -#ifdef CONFIG_INET if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_put(inet_twsk(sk)); else -#endif sock_put(sk); } EXPORT_SYMBOL(sock_edemux); +#endif kuid_t sock_i_uid(struct sock *sk) { @@ -2498,11 +2496,11 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type) { struct sock_exterr_skb *serr; - struct sk_buff *skb, *skb2; + struct sk_buff *skb; int copied, err; err = -EAGAIN; - skb = skb_dequeue(&sk->sk_error_queue); + skb = sock_dequeue_err_skb(sk); if (skb == NULL) goto out; @@ -2523,16 +2521,6 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, msg->msg_flags |= MSG_ERRQUEUE; err = copied; - /* Reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { - sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else - spin_unlock_bh(&sk->sk_error_queue.lock); - out_free_skb: kfree_skb(skb); out: diff --git a/net/core/timestamping.c b/net/core/timestamping.c index a8770391ea5..43d3dd62fcc 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -36,10 +36,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) { struct phy_device *phydev; struct sk_buff *clone; - struct sock *sk = skb->sk; unsigned int type; - if (!sk) + if (!skb->sk) return; type = classify(skb); @@ -48,50 +47,14 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { - if (!atomic_inc_not_zero(&sk->sk_refcnt)) + clone = skb_clone_sk(skb); + if (!clone) return; - - clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - sock_put(sk); - return; - } - - clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } } EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); -void skb_complete_tx_timestamp(struct sk_buff *skb, - struct skb_shared_hwtstamps *hwtstamps) -{ - struct sock *sk = skb->sk; - struct sock_exterr_skb *serr; - int err; - - if (!hwtstamps) { - sock_put(sk); - kfree_skb(skb); - return; - } - - *skb_hwtstamps(skb) = *hwtstamps; - - serr = SKB_EXT_ERR(skb); - memset(serr, 0, sizeof(*serr)); - serr->ee.ee_errno = ENOMSG; - serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - skb->sk = NULL; - - err = sock_queue_err_skb(sk, skb); - - sock_put(sk); - if (err) - kfree_skb(skb); -} -EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); - bool skb_defer_rx_timestamp(struct sk_buff *skb) { struct phy_device *phydev; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index ae011b46c07..25733d53814 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -127,6 +127,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include <linux/stat.h> #include <linux/init.h> #include <linux/poll.h> +#include <linux/jiffies.h> #include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> @@ -598,7 +599,7 @@ int dn_destroy_timer(struct sock *sk) if (sk->sk_socket) return 0; - if ((jiffies - scp->stamp) >= (HZ * decnet_time_wait)) { + if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) { dn_unhash_sock(sk); sock_put(sk); return 1; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 3b726f31c64..4400da7739d 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -41,6 +41,7 @@ #include <linux/sysctl.h> #include <linux/notifier.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include <asm/uaccess.h> #include <net/net_namespace.h> #include <net/neighbour.h> @@ -875,7 +876,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa) { /* First check time since device went up */ - if ((jiffies - dn_db->uptime) < DRDELAY) + if (time_before(jiffies, dn_db->uptime + DRDELAY)) return 0; /* If there is no router, then yes... */ diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index d9c150cc59a..1d330fd43dc 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -23,6 +23,7 @@ #include <linux/spinlock.h> #include <net/sock.h> #include <linux/atomic.h> +#include <linux/jiffies.h> #include <net/flow.h> #include <net/dn.h> @@ -91,7 +92,7 @@ static void dn_slow_timer(unsigned long arg) * since the last successful transmission. */ if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) { - if ((jiffies - scp->stamp) >= scp->keepalive) + if (time_after_eq(jiffies, scp->stamp + scp->keepalive)) scp->keepalive_fxn(sk); } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index f5eede1d6cb..a585fd6352e 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -12,6 +12,9 @@ config NET_DSA if NET_DSA # tagging formats +config NET_DSA_TAG_BRCM + bool + config NET_DSA_TAG_DSA bool diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 7b9fcbbeda5..da06ed1df62 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o slave.o # tagging formats +dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0a49632fac4..61f145c4455 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -144,6 +144,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, goto out; } + /* Make the built-in MII bus mask match the number of ports, + * switch drivers can override this later + */ + ds->phys_mii_mask = ds->phys_port_mask; + /* * If the CPU connects to this switch, set the switch tree * tagging protocol to the preferred tagging format of this @@ -410,6 +415,7 @@ static int dsa_of_probe(struct platform_device *pdev) chip_index++; cd = &pd->chip[chip_index]; + cd->of_node = child; cd->mii_bus = &mdio_bus->dev; sw_addr = of_get_property(child, "reg", NULL); @@ -431,6 +437,8 @@ static int dsa_of_probe(struct platform_device *pdev) if (!port_name) continue; + cd->port_dn[port_index] = port; + cd->port_names[port_index] = kstrdup(port_name, GFP_KERNEL); if (!cd->port_names[port_index]) { @@ -608,7 +616,26 @@ static void dsa_shutdown(struct platform_device *pdev) { } +static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + + if (unlikely(dst == NULL)) { + kfree_skb(skb); + return 0; + } + + return dst->ops->rcv(skb, dev, pt, orig_dev); +} + +static struct packet_type dsa_pack_type __read_mostly = { + .type = cpu_to_be16(ETH_P_XDSA), + .func = dsa_switch_rcv, +}; + static const struct of_device_id dsa_of_match_table[] = { + { .compatible = "brcm,bcm7445-switch-v4.0" }, { .compatible = "marvell,dsa", }, {} }; @@ -633,30 +660,15 @@ static int __init dsa_init_module(void) if (rc) return rc; -#ifdef CONFIG_NET_DSA_TAG_DSA - dev_add_pack(&dsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - dev_add_pack(&edsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - dev_add_pack(&trailer_packet_type); -#endif + dev_add_pack(&dsa_pack_type); + return 0; } module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { -#ifdef CONFIG_NET_DSA_TAG_TRAILER - dev_remove_pack(&trailer_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - dev_remove_pack(&edsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_DSA - dev_remove_pack(&dsa_packet_type); -#endif + dev_remove_pack(&dsa_pack_type); platform_driver_unregister(&dsa_driver); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index d4cf5cc747e..98afed4d92b 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -33,6 +33,10 @@ struct dsa_slave_priv { * to this port. */ struct phy_device *phy; + phy_interface_t phy_interface; + int old_link; + int old_pause; + int old_duplex; }; /* dsa.c */ @@ -45,16 +49,16 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds, int port, char *name); /* tag_dsa.c */ -netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type dsa_packet_type; +extern const struct dsa_device_ops dsa_netdev_ops; /* tag_edsa.c */ -netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type edsa_packet_type; +extern const struct dsa_device_ops edsa_netdev_ops; /* tag_trailer.c */ -netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type trailer_packet_type; +extern const struct dsa_device_ops trailer_netdev_ops; + +/* tag_brcm.c */ +extern const struct dsa_device_ops brcm_netdev_ops; #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 45a1e34c89e..7333a4aebb7 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -12,6 +12,8 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/phy.h> +#include <linux/of_net.h> +#include <linux/of_mdio.h> #include "dsa_priv.h" /* slave mii_bus handling ***************************************************/ @@ -19,7 +21,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { struct dsa_switch *ds = bus->priv; - if (ds->phys_port_mask & (1 << addr)) + if (ds->phys_mii_mask & (1 << addr)) return ds->drv->phy_read(ds, addr, reg); return 0xffff; @@ -29,7 +31,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) { struct dsa_switch *ds = bus->priv; - if (ds->phys_port_mask & (1 << addr)) + if (ds->phys_mii_mask & (1 << addr)) return ds->drv->phy_write(ds, addr, reg, val); return 0; @@ -171,6 +173,25 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } +static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch_tree *dst = p->parent->dst; + + return dst->ops->xmit(skb, dev); +} + +static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + skb->dev = p->parent->dst->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} + /* ethtool operations *******************************************************/ static int @@ -293,44 +314,107 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_sset_count = dsa_slave_get_sset_count, }; -#ifdef CONFIG_NET_DSA_TAG_DSA -static const struct net_device_ops dsa_netdev_ops = { - .ndo_init = dsa_slave_init, - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = dsa_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_do_ioctl = dsa_slave_ioctl, -}; -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA -static const struct net_device_ops edsa_netdev_ops = { +static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, - .ndo_start_xmit = edsa_xmit, + .ndo_start_xmit = dsa_slave_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER -static const struct net_device_ops trailer_netdev_ops = { - .ndo_init = dsa_slave_init, - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = trailer_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_do_ioctl = dsa_slave_ioctl, + +static const struct dsa_device_ops notag_netdev_ops = { + .xmit = dsa_slave_notag_xmit, + .rcv = NULL, }; -#endif + +static void dsa_slave_adjust_link(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + unsigned int status_changed = 0; + + if (p->old_link != p->phy->link) { + status_changed = 1; + p->old_link = p->phy->link; + } + + if (p->old_duplex != p->phy->duplex) { + status_changed = 1; + p->old_duplex = p->phy->duplex; + } + + if (p->old_pause != p->phy->pause) { + status_changed = 1; + p->old_pause = p->phy->pause; + } + + if (ds->drv->adjust_link && status_changed) + ds->drv->adjust_link(ds, p->port, p->phy); + + if (status_changed) + phy_print_status(p->phy); +} + +static int dsa_slave_fixed_link_update(struct net_device *dev, + struct fixed_phy_status *status) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->drv->fixed_link_update) + ds->drv->fixed_link_update(ds, p->port, status); + + return 0; +} /* slave device setup *******************************************************/ +static void dsa_slave_phy_setup(struct dsa_slave_priv *p, + struct net_device *slave_dev) +{ + struct dsa_switch *ds = p->parent; + struct dsa_chip_data *cd = ds->pd; + struct device_node *phy_dn, *port_dn; + bool phy_is_fixed = false; + int ret; + + port_dn = cd->port_dn[p->port]; + p->phy_interface = of_get_phy_mode(port_dn); + + phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); + if (of_phy_is_fixed_link(port_dn)) { + /* In the case of a fixed PHY, the DT node associated + * to the fixed PHY is the Port DT node + */ + ret = of_phy_register_fixed_link(port_dn); + if (ret) { + pr_err("failed to register fixed PHY\n"); + return; + } + phy_is_fixed = true; + phy_dn = port_dn; + } + + if (phy_dn) + p->phy = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, 0, + p->phy_interface); + + if (p->phy && phy_is_fixed) + fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); + + /* We could not connect to a designated PHY, so use the switch internal + * MDIO bus instead + */ + if (!p->phy) + p->phy = ds->slave_mii_bus->phy_map[p->port]; + else + pr_info("attached PHY at address %d [%s]\n", + p->phy->addr, p->phy->drv->name); +} + struct net_device * dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name) @@ -349,35 +433,48 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; + slave_dev->netdev_ops = &dsa_slave_netdev_ops; switch (ds->dst->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case htons(ETH_P_DSA): - slave_dev->netdev_ops = &dsa_netdev_ops; + ds->dst->ops = &dsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_EDSA case htons(ETH_P_EDSA): - slave_dev->netdev_ops = &edsa_netdev_ops; + ds->dst->ops = &edsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_TRAILER case htons(ETH_P_TRAILER): - slave_dev->netdev_ops = &trailer_netdev_ops; + ds->dst->ops = &trailer_netdev_ops; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_BRCM + case htons(ETH_P_BRCMTAG): + ds->dst->ops = &brcm_netdev_ops; break; #endif default: - BUG(); + ds->dst->ops = ¬ag_netdev_ops; + break; } SET_NETDEV_DEV(slave_dev, parent); + slave_dev->dev.of_node = ds->pd->port_dn[port]; slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); p->dev = slave_dev; p->parent = ds; p->port = port; - p->phy = ds->slave_mii_bus->phy_map[port]; + + p->old_pause = -1; + p->old_link = -1; + p->old_duplex = -1; + + dsa_slave_phy_setup(p, slave_dev); ret = register_netdev(slave_dev); if (ret) { diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c new file mode 100644 index 00000000000..e0b759ec209 --- /dev/null +++ b/net/dsa/tag_brcm.c @@ -0,0 +1,173 @@ +/* + * Broadcom tag support + * + * Copyright (C) 2014 Broadcom Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/slab.h> +#include "dsa_priv.h" + +/* This tag length is 4 bytes, older ones were 6 bytes, we do not + * handle them + */ +#define BRCM_TAG_LEN 4 + +/* Tag is constructed and desconstructed using byte by byte access + * because the tag is placed after the MAC Source Address, which does + * not make it 4-bytes aligned, so this might cause unaligned accesses + * on most systems where this is used. + */ + +/* Ingress and egress opcodes */ +#define BRCM_OPCODE_SHIFT 5 +#define BRCM_OPCODE_MASK 0x7 + +/* Ingress fields */ +/* 1st byte in the tag */ +#define BRCM_IG_TC_SHIFT 2 +#define BRCM_IG_TC_MASK 0x7 +/* 2nd byte in the tag */ +#define BRCM_IG_TE_MASK 0x3 +#define BRCM_IG_TS_SHIFT 7 +/* 3rd byte in the tag */ +#define BRCM_IG_DSTMAP2_MASK 1 +#define BRCM_IG_DSTMAP1_MASK 0xff + +/* Egress fields */ + +/* 2nd byte in the tag */ +#define BRCM_EG_CID_MASK 0xff + +/* 3rd byte in the tag */ +#define BRCM_EG_RC_MASK 0xff +#define BRCM_EG_RC_RSVD (3 << 6) +#define BRCM_EG_RC_EXCEPTION (1 << 5) +#define BRCM_EG_RC_PROT_SNOOP (1 << 4) +#define BRCM_EG_RC_PROT_TERM (1 << 3) +#define BRCM_EG_RC_SWITCH (1 << 2) +#define BRCM_EG_RC_MAC_LEARN (1 << 1) +#define BRCM_EG_RC_MIRROR (1 << 0) +#define BRCM_EG_TC_SHIFT 5 +#define BRCM_EG_TC_MASK 0x7 +#define BRCM_EG_PID_MASK 0x1f + +static netdev_tx_t brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *brcm_tag; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) + goto out_free; + + skb_push(skb, BRCM_TAG_LEN); + + memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN); + + /* Build the tag after the MAC Source Address */ + brcm_tag = skb->data + 2 * ETH_ALEN; + + /* Set the ingress opcode, traffic class, tag enforcment is + * deprecated + */ + brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) | + ((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK); + brcm_tag[1] = 0; + brcm_tag[2] = 0; + if (p->port == 8) + brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; + brcm_tag[3] = (1 << p->port) & BRCM_IG_DSTMAP1_MASK; + + /* Queue the SKB for transmission on the parent interface, but + * do not modify its EtherType + */ + skb->protocol = htons(ETH_P_BRCMTAG); + skb->dev = p->parent->dst->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + int source_port; + u8 *brcm_tag; + + if (unlikely(dst == NULL)) + goto out_drop; + + ds = dst->ds[0]; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) + goto out_drop; + + /* skb->data points to the EtherType, the tag is right before it */ + brcm_tag = skb->data - 2; + + /* The opcode should never be different than 0b000 */ + if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) + goto out_drop; + + /* We should never see a reserved reason code without knowing how to + * handle it + */ + WARN_ON(brcm_tag[2] & BRCM_EG_RC_RSVD); + + /* Locate which port this is coming from */ + source_port = brcm_tag[3] & BRCM_EG_PID_MASK; + + /* Validate port against switch setup, either the port is totally */ + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* Remove Broadcom tag and update checksum */ + skb_pull_rcsum(skb, BRCM_TAG_LEN); + + /* Move the Ethernet DA and SA */ + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - BRCM_TAG_LEN, + 2 * ETH_ALEN); + + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->dev = ds->ports[source_port]; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +const struct dsa_device_ops brcm_netdev_ops = { + .xmit = brcm_tag_xmit, + .rcv = brcm_tag_rcv, +}; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index cacce1e22f9..d7dbc5bda5c 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -16,7 +16,7 @@ #define DSA_HLEN 4 -netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *dsa_header; @@ -186,7 +186,7 @@ out: return 0; } -struct packet_type dsa_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_DSA), - .func = dsa_rcv, +const struct dsa_device_ops dsa_netdev_ops = { + .xmit = dsa_xmit, + .rcv = dsa_rcv, }; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index e70c43c25e6..6b30abe8918 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -17,7 +17,7 @@ #define DSA_HLEN 4 #define EDSA_HLEN 8 -netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *edsa_header; @@ -205,7 +205,7 @@ out: return 0; } -struct packet_type edsa_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_EDSA), - .func = edsa_rcv, +const struct dsa_device_ops edsa_netdev_ops = { + .xmit = edsa_xmit, + .rcv = edsa_rcv, }; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 94bc260d015..5fe9444842c 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -14,7 +14,7 @@ #include <linux/slab.h> #include "dsa_priv.h" -netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct sk_buff *nskb; @@ -114,7 +114,7 @@ out: return 0; } -struct packet_type trailer_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TRAILER), - .func = trailer_rcv, +const struct dsa_device_ops trailer_netdev_ops = { + .xmit = trailer_xmit, + .rcv = trailer_rcv, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f405e059240..33a140e1583 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -146,6 +146,33 @@ int eth_rebuild_header(struct sk_buff *skb) EXPORT_SYMBOL(eth_rebuild_header); /** + * eth_get_headlen - determine the the length of header for an ethernet frame + * @data: pointer to start of frame + * @len: total length of frame + * + * Make a best effort attempt to pull the length for all of the headers for + * a given frame in a linear buffer. + */ +u32 eth_get_headlen(void *data, unsigned int len) +{ + const struct ethhdr *eth = (const struct ethhdr *)data; + struct flow_keys keys; + + /* this should never happen, but better safe than sorry */ + if (len < sizeof(*eth)) + return len; + + /* parse any remaining L2/L3 headers, check for L4 */ + if (!__skb_flow_dissect(NULL, &keys, data, + eth->h_proto, sizeof(*eth), len)) + return max_t(u32, keys.thoff, sizeof(*eth)); + + /* parse for any L4 headers */ + return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); +} +EXPORT_SYMBOL(eth_get_headlen); + +/** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data * @dev: receiving network device @@ -181,11 +208,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. */ - if (unlikely(netdev_uses_dsa_tags(dev))) - return htons(ETH_P_DSA); - - if (unlikely(netdev_uses_trailer_tags(dev))) - return htons(ETH_P_TRAILER); + if (unlikely(netdev_uses_dsa(dev))) + return htons(ETH_P_XDSA); if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) return eth->h_proto; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 255aa9946fe..23104a3f292 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -243,7 +243,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { - int ret, no_addr, accept_local; + int ret, no_addr; struct fib_result res; struct flowi4 fl4; struct net *net; @@ -258,16 +258,17 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, no_addr = idev->ifa_list == NULL; - accept_local = IN_DEV_ACCEPT_LOCAL(idev); fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); if (fib_lookup(net, &fl4, &res)) goto last_resort; - if (res.type != RTN_UNICAST) { - if (res.type != RTN_LOCAL || !accept_local) - goto e_inval; - } + if (res.type != RTN_UNICAST && + (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) + goto e_inval; + if (!rpf && !fib_num_tclassid_users(dev_net(dev)) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) + goto last_resort; fib_combine_itag(itag, &res); dev_match = false; @@ -321,6 +322,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); if (!r && !fib_num_tclassid_users(dev_net(dev)) && + IN_DEV_ACCEPT_LOCAL(idev) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { *itag = 0; return 0; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b10cd43a472..5b6efb3d230 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -157,9 +157,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) static void free_nh_exceptions(struct fib_nh *nh) { - struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fnhe_hash_bucket *hash; int i; + hash = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!hash) + return; for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe; @@ -205,8 +208,7 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); - if (nexthop_nh->nh_exceptions) - free_nh_exceptions(nexthop_nh); + free_nh_exceptions(nexthop_nh); rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 0485bf7f8f0..7e0756da873 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -125,6 +125,10 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, *csum_err = true; return -EINVAL; } + + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + options++; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 6556263c8fa..d3fe2ac0516 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -119,28 +119,6 @@ out: return segs; } -/* Compute the whole skb csum in s/w and store it, then verify GRO csum - * starting from gro_offset. - */ -static __sum16 gro_skb_checksum(struct sk_buff *skb) -{ - __sum16 sum; - - skb->csum = skb_checksum(skb, 0, skb->len, 0); - NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, - csum_partial(skb->data, skb_gro_offset(skb), 0)); - sum = csum_fold(NAPI_GRO_CB(skb)->csum); - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { - if (unlikely(!sum) && !skb->csum_complete_sw) - netdev_rx_csum_fault(skb->dev); - } else { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum_complete_sw = 1; - } - - return sum; -} - static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -192,22 +170,16 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if (unlikely(!greh)) goto out_unlock; } - if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ - __sum16 csum = 0; - - if (skb->ip_summed == CHECKSUM_COMPLETE) - csum = csum_fold(NAPI_GRO_CB(skb)->csum); - /* Don't trust csum error calculated/reported by h/w */ - if (skb->ip_summed == CHECKSUM_NONE || csum != 0) - csum = gro_skb_checksum(skb); - - /* GRE CSUM is the 1's complement of the 1's complement sum - * of the GRE hdr plus payload so it should add up to 0xffff - * (and 0 after csum_fold()) just like the IPv4 hdr csum. - */ - if (csum) + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush) { + if (skb_gro_checksum_simple_validate(skb)) goto out_unlock; + + skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); } + flush = 0; for (p = *head; p; p = p->next) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f10eab46228..4146153d875 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -117,7 +117,7 @@ #define IGMP_V2_Unsolicited_Report_Interval (10*HZ) #define IGMP_V3_Unsolicited_Report_Interval (1*HZ) #define IGMP_Query_Response_Interval (10*HZ) -#define IGMP_Unsolicited_Report_Count 2 +#define IGMP_Query_Robustness_Variable 2 #define IGMP_Initial_Report_Delay (1) @@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev) { if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_start_timer(in_dev, 1); } @@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im) } /* else, v3 */ - im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_event(in_dev); #endif } @@ -1322,7 +1319,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); - im->unsolicit_count = IGMP_Unsolicited_Report_Count; + im->unsolicit_count = sysctl_igmp_qrv; #endif im->next_rcu = in_dev->mc_list; @@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev) (unsigned long)in_dev); setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); - in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; + in_dev->mr_qrv = sysctl_igmp_qrv; #endif spin_lock_init(&in_dev->mc_tomb_lock); @@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); +#ifdef CONFIG_IP_MULTICAST + in_dev->mr_qrv = sysctl_igmp_qrv; +#endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for_each_pmc_rtnl(in_dev, pmc) @@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) */ int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; - +#ifdef CONFIG_IP_MULTICAST +int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable; +#endif static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, __be32 *psfsrc) @@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2539,7 +2538,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (rcu_dereference(state->in_dev->mc_list) == im) { + if (rcu_access_pointer(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 43116e8c8e1..9111a4e2215 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -229,7 +229,7 @@ begin: } } else if (score == hiscore && reuseport) { matches++; - if (((u64)phash * matches) >> 32 == 0) + if (reciprocal_scale(phash, matches) == 0) result = sk; phash = next_pseudo_random32(phash); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5cb830c7899..455e75bcb16 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -405,7 +405,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; - struct sk_buff *skb, *skb2; + struct sk_buff *skb; DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct { struct sock_extended_err ee; @@ -415,7 +415,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int copied; err = -EAGAIN; - skb = skb_dequeue(&sk->sk_error_queue); + skb = sock_dequeue_err_skb(sk); if (skb == NULL) goto out; @@ -462,17 +462,6 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; - /* Reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - skb2 = skb_peek(&sk->sk_error_queue); - if (skb2 != NULL) { - sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else - spin_unlock_bh(&sk->sk_error_queue.lock); - out_free_skb: kfree_skb(skb); out: diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 5bbef4fdcb4..648fa1490ea 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -262,7 +262,8 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); - while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { + while (time_before(jiffies, start + + msecs_to_jiffies(CONF_CARRIER_TIMEOUT))) { int wait, elapsed; for_each_netdev(&init_net, dev) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2510c02c2d2..e90f83a3415 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -285,7 +285,7 @@ clusterip_hashfn(const struct sk_buff *skb, } /* node numbers are 1..n, not 0..n */ - return (((u64)hashval * config->num_total_nodes) >> 32) + 1; + return reciprocal_scale(hashval, config->num_total_nodes) + 1; } static inline int diff --git a/net/ipv4/route.c b/net/ipv4/route.c index eaa4b000c7b..234a43e233d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -596,12 +596,12 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) static inline u32 fnhe_hashfun(__be32 daddr) { + static u32 fnhe_hashrnd __read_mostly; u32 hval; - hval = (__force u32) daddr; - hval ^= (hval >> 11) ^ (hval >> 22); - - return hval & (FNHE_HASH_SIZE - 1); + net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); + hval = jhash_1word((__force u32) daddr, fnhe_hashrnd); + return hash_32(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) @@ -628,12 +628,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, spin_lock_bh(&fnhe_lock); - hash = nh->nh_exceptions; + hash = rcu_dereference(nh->nh_exceptions); if (!hash) { hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - nh->nh_exceptions = hash; + rcu_assign_pointer(nh->nh_exceptions, hash); } hash += hval; @@ -1242,7 +1242,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { - struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); struct fib_nh_exception *fnhe; u32 hval; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c0c75688896..0431a8f3c8f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -25,7 +25,7 @@ extern int sysctl_tcp_syncookies; -static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; +static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 79a007c5255..45d156dacd6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_qrv", + .data = &sysctl_igmp_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, +#endif { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index d5de69bc04f..bb395d46a38 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -17,7 +17,6 @@ #include <linux/module.h> #include <net/tcp.h> - #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta */ @@ -46,11 +45,10 @@ MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); module_param(smooth_part, int, 0644); MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wmax-B to Wmax"); - /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ - u32 last_max_cwnd; /* last maximum snd_cwnd */ + u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ @@ -103,7 +101,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* binary increase */ if (cwnd < ca->last_max_cwnd) { - __u32 dist = (ca->last_max_cwnd - cwnd) + __u32 dist = (ca->last_max_cwnd - cwnd) / BICTCP_B; if (dist > max_increment) @@ -154,7 +152,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) bictcp_update(ca, tp->snd_cwnd); tcp_cong_avoid_ai(tp, ca->cnt); } - } /* @@ -177,7 +174,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) ca->loss_cwnd = tp->snd_cwnd; - if (tp->snd_cwnd <= low_window) return max(tp->snd_cwnd >> 1U, 2U); else @@ -188,6 +184,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct bictcp *ca = inet_csk_ca(sk); + return max(tp->snd_cwnd, ca->loss_cwnd); } @@ -206,12 +203,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); + cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } } - static struct tcp_congestion_ops bictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7b09d8b49fa..80248f56c89 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -142,7 +142,6 @@ static int __init tcp_congestion_default(void) } late_initcall(tcp_congestion_default); - /* Build string with list of available congestion control values */ void tcp_get_available_congestion_control(char *buf, size_t maxlen) { @@ -154,7 +153,6 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen) offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); - } rcu_read_unlock(); } @@ -186,7 +184,6 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); - } rcu_read_unlock(); } @@ -230,7 +227,6 @@ out: return ret; } - /* Change congestion control for socket */ int tcp_set_congestion_control(struct sock *sk, const char *name) { @@ -337,6 +333,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9bd8a4828a..20de0118c98 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -82,12 +82,13 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ - u32 last_max_cwnd; /* last maximum snd_cwnd */ + u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ - u32 bic_K; /* time to origin point from the beginning of the current epoch */ + u32 bic_K; /* time to origin point + from the beginning of the current epoch */ u32 delay_min; /* min delay (msec << 3) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ @@ -219,7 +220,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->last_time = tcp_time_stamp; if (ca->epoch_start == 0) { - ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */ + ca->epoch_start = tcp_time_stamp; /* record beginning */ ca->ack_cnt = 1; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ @@ -263,9 +264,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* c/rtt * (t-K)^3 */ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); - if (t < ca->bic_K) /* below origin*/ + if (t < ca->bic_K) /* below origin*/ bic_target = ca->bic_origin_point - delta; - else /* above origin*/ + else /* above origin*/ bic_target = ca->bic_origin_point + delta; /* cubic function - calc bictcp_cnt*/ @@ -285,13 +286,14 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; + delta = (cwnd * scale) >> 3; while (ca->ack_cnt > delta) { /* update tcp cwnd */ ca->ack_cnt -= delta; ca->tcp_cwnd++; } - if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */ + if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ delta = ca->tcp_cwnd - cwnd; max_cnt = cwnd / delta; if (ca->cnt > max_cnt) @@ -320,7 +322,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) bictcp_update(ca, tp->snd_cwnd); tcp_cong_avoid_ai(tp, ca->cnt); } - } static u32 bictcp_recalc_ssthresh(struct sock *sk) @@ -452,7 +453,8 @@ static int __init cubictcp_register(void) * based on SRTT of 100ms */ - beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta); + beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 + / (BICTCP_BETA_SCALE - beta); cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index ed3f2ad42e0..0d73f9ddb55 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ - #include <linux/module.h> #include <linux/inet_diag.h> @@ -35,13 +34,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct inet_diag_req_v2 *r, struct nlattr *bc) + struct inet_diag_req_v2 *r, struct nlattr *bc) { inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); } static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req) + struct inet_diag_req_v2 *req) { return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 1c4908280d9..882c08aae2f 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <net/tcp.h> - /* From AIMD tables from RFC 3649 appendix B, * with fixed-point MD scaled <<8. */ @@ -17,78 +16,78 @@ static const struct hstcp_aimd_val { unsigned int cwnd; unsigned int md; } hstcp_aimd_vals[] = { - { 38, 128, /* 0.50 */ }, - { 118, 112, /* 0.44 */ }, - { 221, 104, /* 0.41 */ }, - { 347, 98, /* 0.38 */ }, - { 495, 93, /* 0.37 */ }, - { 663, 89, /* 0.35 */ }, - { 851, 86, /* 0.34 */ }, - { 1058, 83, /* 0.33 */ }, - { 1284, 81, /* 0.32 */ }, - { 1529, 78, /* 0.31 */ }, - { 1793, 76, /* 0.30 */ }, - { 2076, 74, /* 0.29 */ }, - { 2378, 72, /* 0.28 */ }, - { 2699, 71, /* 0.28 */ }, - { 3039, 69, /* 0.27 */ }, - { 3399, 68, /* 0.27 */ }, - { 3778, 66, /* 0.26 */ }, - { 4177, 65, /* 0.26 */ }, - { 4596, 64, /* 0.25 */ }, - { 5036, 62, /* 0.25 */ }, - { 5497, 61, /* 0.24 */ }, - { 5979, 60, /* 0.24 */ }, - { 6483, 59, /* 0.23 */ }, - { 7009, 58, /* 0.23 */ }, - { 7558, 57, /* 0.22 */ }, - { 8130, 56, /* 0.22 */ }, - { 8726, 55, /* 0.22 */ }, - { 9346, 54, /* 0.21 */ }, - { 9991, 53, /* 0.21 */ }, - { 10661, 52, /* 0.21 */ }, - { 11358, 52, /* 0.20 */ }, - { 12082, 51, /* 0.20 */ }, - { 12834, 50, /* 0.20 */ }, - { 13614, 49, /* 0.19 */ }, - { 14424, 48, /* 0.19 */ }, - { 15265, 48, /* 0.19 */ }, - { 16137, 47, /* 0.19 */ }, - { 17042, 46, /* 0.18 */ }, - { 17981, 45, /* 0.18 */ }, - { 18955, 45, /* 0.18 */ }, - { 19965, 44, /* 0.17 */ }, - { 21013, 43, /* 0.17 */ }, - { 22101, 43, /* 0.17 */ }, - { 23230, 42, /* 0.17 */ }, - { 24402, 41, /* 0.16 */ }, - { 25618, 41, /* 0.16 */ }, - { 26881, 40, /* 0.16 */ }, - { 28193, 39, /* 0.16 */ }, - { 29557, 39, /* 0.15 */ }, - { 30975, 38, /* 0.15 */ }, - { 32450, 38, /* 0.15 */ }, - { 33986, 37, /* 0.15 */ }, - { 35586, 36, /* 0.14 */ }, - { 37253, 36, /* 0.14 */ }, - { 38992, 35, /* 0.14 */ }, - { 40808, 35, /* 0.14 */ }, - { 42707, 34, /* 0.13 */ }, - { 44694, 33, /* 0.13 */ }, - { 46776, 33, /* 0.13 */ }, - { 48961, 32, /* 0.13 */ }, - { 51258, 32, /* 0.13 */ }, - { 53677, 31, /* 0.12 */ }, - { 56230, 30, /* 0.12 */ }, - { 58932, 30, /* 0.12 */ }, - { 61799, 29, /* 0.12 */ }, - { 64851, 28, /* 0.11 */ }, - { 68113, 28, /* 0.11 */ }, - { 71617, 27, /* 0.11 */ }, - { 75401, 26, /* 0.10 */ }, - { 79517, 26, /* 0.10 */ }, - { 84035, 25, /* 0.10 */ }, - { 89053, 24, /* 0.10 */ }, + { 38, 128, /* 0.50 */ }, + { 118, 112, /* 0.44 */ }, + { 221, 104, /* 0.41 */ }, + { 347, 98, /* 0.38 */ }, + { 495, 93, /* 0.37 */ }, + { 663, 89, /* 0.35 */ }, + { 851, 86, /* 0.34 */ }, + { 1058, 83, /* 0.33 */ }, + { 1284, 81, /* 0.32 */ }, + { 1529, 78, /* 0.31 */ }, + { 1793, 76, /* 0.30 */ }, + { 2076, 74, /* 0.29 */ }, + { 2378, 72, /* 0.28 */ }, + { 2699, 71, /* 0.28 */ }, + { 3039, 69, /* 0.27 */ }, + { 3399, 68, /* 0.27 */ }, + { 3778, 66, /* 0.26 */ }, + { 4177, 65, /* 0.26 */ }, + { 4596, 64, /* 0.25 */ }, + { 5036, 62, /* 0.25 */ }, + { 5497, 61, /* 0.24 */ }, + { 5979, 60, /* 0.24 */ }, + { 6483, 59, /* 0.23 */ }, + { 7009, 58, /* 0.23 */ }, + { 7558, 57, /* 0.22 */ }, + { 8130, 56, /* 0.22 */ }, + { 8726, 55, /* 0.22 */ }, + { 9346, 54, /* 0.21 */ }, + { 9991, 53, /* 0.21 */ }, + { 10661, 52, /* 0.21 */ }, + { 11358, 52, /* 0.20 */ }, + { 12082, 51, /* 0.20 */ }, + { 12834, 50, /* 0.20 */ }, + { 13614, 49, /* 0.19 */ }, + { 14424, 48, /* 0.19 */ }, + { 15265, 48, /* 0.19 */ }, + { 16137, 47, /* 0.19 */ }, + { 17042, 46, /* 0.18 */ }, + { 17981, 45, /* 0.18 */ }, + { 18955, 45, /* 0.18 */ }, + { 19965, 44, /* 0.17 */ }, + { 21013, 43, /* 0.17 */ }, + { 22101, 43, /* 0.17 */ }, + { 23230, 42, /* 0.17 */ }, + { 24402, 41, /* 0.16 */ }, + { 25618, 41, /* 0.16 */ }, + { 26881, 40, /* 0.16 */ }, + { 28193, 39, /* 0.16 */ }, + { 29557, 39, /* 0.15 */ }, + { 30975, 38, /* 0.15 */ }, + { 32450, 38, /* 0.15 */ }, + { 33986, 37, /* 0.15 */ }, + { 35586, 36, /* 0.14 */ }, + { 37253, 36, /* 0.14 */ }, + { 38992, 35, /* 0.14 */ }, + { 40808, 35, /* 0.14 */ }, + { 42707, 34, /* 0.13 */ }, + { 44694, 33, /* 0.13 */ }, + { 46776, 33, /* 0.13 */ }, + { 48961, 32, /* 0.13 */ }, + { 51258, 32, /* 0.13 */ }, + { 53677, 31, /* 0.12 */ }, + { 56230, 30, /* 0.12 */ }, + { 58932, 30, /* 0.12 */ }, + { 61799, 29, /* 0.12 */ }, + { 64851, 28, /* 0.11 */ }, + { 68113, 28, /* 0.11 */ }, + { 71617, 27, /* 0.11 */ }, + { 75401, 26, /* 0.10 */ }, + { 79517, 26, /* 0.10 */ }, + { 84035, 25, /* 0.10 */ }, + { 89053, 24, /* 0.10 */ }, }; #define HSTCP_AIMD_MAX ARRAY_SIZE(hstcp_aimd_vals) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 031361311a8..58469fff6c1 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -98,7 +98,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) } } -static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt) +static void measure_achieved_throughput(struct sock *sk, + u32 pkts_acked, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); @@ -148,8 +149,8 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT) if (use_bandwidth_switch) { u32 maxB = ca->maxB; u32 old_maxB = ca->old_maxB; - ca->old_maxB = ca->maxB; + ca->old_maxB = ca->maxB; if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { ca->beta = BETA_MIN; ca->modeswitch = 0; @@ -270,6 +271,7 @@ static void htcp_state(struct sock *sk, u8 new_state) case TCP_CA_Open: { struct htcp *ca = inet_csk_ca(sk); + if (ca->undo_last_cong) { ca->last_cong = jiffies; ca->undo_last_cong = 0; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index d8f8f05a495..f963b274f2b 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -29,7 +29,6 @@ static int rtt0 = 25; module_param(rtt0, int, 0644); MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); - /* This is called to refresh values for hybla parameters */ static inline void hybla_recalc_param (struct sock *sk) { diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 5999b3972e6..1d5a30a90ad 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -284,7 +284,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; if (delta >= tp->snd_cwnd) { tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, - (u32) tp->snd_cwnd_clamp); + (u32)tp->snd_cwnd_clamp); tp->snd_cwnd_cnt = 0; } } @@ -299,7 +299,6 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); } - /* Extract info for Tcp socket info provided via netlink. */ static void tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a906e0200ff..f97003ad0af 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static void tcp_clear_retrans_partial(struct tcp_sock *tp) +void tcp_clear_retrans(struct tcp_sock *tp) { tp->retrans_out = 0; tp->lost_out = 0; - tp->undo_marker = 0; tp->undo_retrans = -1; + tp->fackets_out = 0; + tp->sacked_out = 0; } -void tcp_clear_retrans(struct tcp_sock *tp) +static inline void tcp_init_undo(struct tcp_sock *tp) { - tcp_clear_retrans_partial(tp); - - tp->fackets_out = 0; - tp->sacked_out = 0; + tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ + tp->undo_retrans = tp->retrans_out ? : -1; } /* Enter Loss state. If we detect SACK reneging, forget all SACK information @@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk) tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); + tcp_init_undo(tp); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_clear_retrans_partial(tp); + tp->retrans_out = 0; + tp->lost_out = 0; if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - tp->undo_marker = tp->snd_una; - skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { @@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; @@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; - tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out ? : -1; + tcp_init_undo(tp); if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) @@ -2971,7 +2967,8 @@ void tcp_rearm_rto(struct sock *sk) if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); - const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; + const u32 rto_time_stamp = + tcp_skb_timestamp(skb) + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. @@ -5910,7 +5907,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct request_sock *req; struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = NULL; - __u32 isn = TCP_SKB_CB(skb)->when; + __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; bool want_cookie = false, fastopen; struct flowi fl; struct tcp_fastopen_cookie foc = { .len = -1 }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cd17f009aed..3f9bc3f0bba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,7 +90,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; EXPORT_SYMBOL(sysctl_tcp_low_latency); - #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); @@ -438,8 +437,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) skb = tcp_write_queue_head(sk); BUG_ON(!skb); - remaining = icsk->icsk_rto - min(icsk->icsk_rto, - tcp_time_stamp - TCP_SKB_CB(skb)->when); + remaining = icsk->icsk_rto - + min(icsk->icsk_rto, + tcp_time_stamp - tcp_skb_timestamp(skb)); if (remaining) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -1269,7 +1269,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, + .syn_ack_timeout = tcp_syn_ack_timeout, }; static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { @@ -1628,7 +1628,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; + TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; @@ -2183,7 +2183,7 @@ int tcp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; - s->last_pos = 0; + s->last_pos = 0; return 0; } EXPORT_SYMBOL(tcp_seq_open); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1649988bd1b..a058f411d3a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -232,7 +232,7 @@ kill: u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; - TCP_SKB_CB(skb)->when = isn; + TCP_SKB_CB(skb)->tcp_tw_isn = isn; return TCP_TW_SYN; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc1b83cb830..72912533a19 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -288,35 +288,14 @@ static int tcp_v4_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - /* Use the IP hdr immediately proceeding for this transport */ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - 0); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + inet_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5a7c41fbc6d..7f1280dcad5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -550,7 +550,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when + tp->tsoffset; + opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -618,7 +618,7 @@ static unsigned int tcp_synack_options(struct sock *sk, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = TCP_SKB_CB(skb)->when; + opts->tsval = tcp_skb_timestamp(skb); opts->tsecr = req->ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -647,7 +647,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { - struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); unsigned int size = 0; unsigned int eff_sacks; @@ -666,7 +665,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcb ? tcb->when + tp->tsoffset : 0; + opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -886,8 +885,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; - /* Our usage of tstamp should remain private */ - skb->tstamp.tv64 = 0; } inet = inet_sk(sk); @@ -975,7 +972,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); + /* Our usage of tstamp should remain private */ + skb->tstamp.tv64 = 0; err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); + if (likely(err <= 0)) return err; @@ -1146,10 +1146,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, buff->ip_summed = skb->ip_summed; - /* Looks stupid, but our code really uses when of - * skbs, which it never sent before. --ANK - */ - TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; buff->tstamp = skb->tstamp; tcp_fragment_tstamp(skb, buff); @@ -1874,8 +1870,8 @@ static int tcp_mtu_probe(struct sock *sk) tcp_init_tso_segs(sk, nskb, nskb->len); /* We're ready to send. If this fails, the probe will - * be resegmented into mss-sized pieces by tcp_write_xmit(). */ - TCP_SKB_CB(nskb)->when = tcp_time_stamp; + * be resegmented into mss-sized pieces by tcp_write_xmit(). + */ if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending * effectively two packets. */ @@ -1935,8 +1931,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, BUG_ON(!tso_segs); if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { - /* "when" is used as a start point for the retransmit timer */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; + /* "skb_mstamp" is used as a start point for the retransmit timer */ + skb_mstamp_get(&skb->skb_mstamp); goto repair; /* Skip network transmission */ } @@ -2000,8 +1996,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - TCP_SKB_CB(skb)->when = tcp_time_stamp; - if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2499,7 +2493,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Make a copy, if the first transmission SKB clone we made * is still in somebody's hands, else make a clone. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; /* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom @@ -2544,7 +2537,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) - tp->retrans_stamp = TCP_SKB_CB(skb)->when; + tp->retrans_stamp = tcp_skb_timestamp(skb); /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). @@ -2752,7 +2745,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), TCPHDR_ACK | TCPHDR_RST); /* Send it off. */ - TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); @@ -2791,7 +2783,6 @@ int tcp_send_synack(struct sock *sk) TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } - TCP_SKB_CB(skb)->when = tcp_time_stamp; return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } @@ -2835,10 +2826,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) - TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); + skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req); else #endif - TCP_SKB_CB(skb)->when = tcp_time_stamp; + skb_mstamp_get(&skb->skb_mstamp); tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, foc) + sizeof(*th); @@ -3086,7 +3077,7 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); - tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; + tp->retrans_stamp = tcp_time_stamp; tcp_connect_queue_skb(sk, buff); TCP_ECN_send_syn(sk, buff); @@ -3194,7 +3185,7 @@ void tcp_send_ack(struct sock *sk) tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); /* Send it off, this clears delayed acks for us. */ - TCP_SKB_CB(buff)->when = tcp_time_stamp; + skb_mstamp_get(&buff->skb_mstamp); tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -3226,7 +3217,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) * send it. */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); - TCP_SKB_CB(skb)->when = tcp_time_stamp; + skb_mstamp_get(&skb->skb_mstamp); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -3270,7 +3261,6 @@ int tcp_write_wakeup(struct sock *sk) tcp_set_skb_tso_segs(sk, skb, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) tcp_event_new_data_sent(sk, skb); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 3b66610d415..ebf5ff57526 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -83,7 +83,6 @@ static struct { struct tcp_log *log; } tcp_probe; - static inline int tcp_probe_used(void) { return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1); @@ -101,7 +100,6 @@ static inline int tcp_probe_avail(void) si4.sin_addr.s_addr = inet->inet_##mem##addr; \ } while (0) \ - /* * Hook inserted to be called before each receive packet. * Note: arguments must match tcp_rcv_established()! @@ -194,8 +192,8 @@ static int tcpprobe_sprint(char *tbuf, int n) return scnprintf(tbuf, n, "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", - (unsigned long) tv.tv_sec, - (unsigned long) tv.tv_nsec, + (unsigned long)tv.tv_sec, + (unsigned long)tv.tv_nsec, &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); } diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 8250949b885..6824afb65d9 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -31,10 +31,10 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } - static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, .cong_avoid = tcp_scalable_cong_avoid, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index df90cd1ce37..a339e7ba05a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -135,10 +135,9 @@ static bool retransmits_timed_out(struct sock *sk, if (!inet_csk(sk)->icsk_retransmits) return false; - if (unlikely(!tcp_sk(sk)->retrans_stamp)) - start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; - else - start_ts = tcp_sk(sk)->retrans_stamp; + start_ts = tcp_sk(sk)->retrans_stamp; + if (unlikely(!start_ts)) + start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); if (likely(timeout == 0)) { linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index b40ad897f94..a6afde666ab 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -51,7 +51,6 @@ MODULE_PARM_DESC(beta, "upper bound of packets in network"); module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); - /* There are several situations when we must "re-start" Vegas: * * o when a connection is established @@ -133,7 +132,6 @@ EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); void tcp_vegas_state(struct sock *sk, u8 ca_state) { - if (ca_state == TCP_CA_Open) vegas_enable(sk); else @@ -285,7 +283,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* Use normal slow start */ else if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp, acked); - } /* Extract info for Tcp socket info provided via netlink. */ diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 8276977d2c8..a4d2d2d88dc 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -175,7 +175,6 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) } else tp->snd_cwnd_cnt++; } - } if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index b94a04ae2ed..81911a92356 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -42,7 +42,6 @@ struct westwood { u8 reset_rtt_min; /* Reset RTT min to next RTT sample*/ }; - /* TCP Westwood functions and constants */ #define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ #define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ @@ -153,7 +152,6 @@ static inline void update_rtt_min(struct westwood *w) w->rtt_min = min(w->rtt, w->rtt_min); } - /* * @westwood_fast_bw * It is called when we are in fast path. In particular it is called when @@ -208,7 +206,6 @@ static inline u32 westwood_acked_count(struct sock *sk) return w->cumul_ack; } - /* * TCP Westwood * Here limit is evaluated as Bw estimation*RTTmin (for obtaining it @@ -219,6 +216,7 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct westwood *w = inet_csk_ca(sk); + return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } @@ -254,12 +252,12 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) } } - /* Extract info for Tcp socket info provided via netlink. */ static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct westwood *ca = inet_csk_ca(sk); + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct tcpvegas_info info = { .tcpv_enabled = 1, @@ -271,7 +269,6 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, } } - static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 599b79b8eac..cd727321859 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -54,10 +54,8 @@ static void tcp_yeah_init(struct sock *sk) /* Ensure the MD arithmetic works. This is somewhat pedantic, * since I don't think we will see a cwnd this large. :) */ tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); - } - static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -84,7 +82,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* Scalable */ tp->snd_cwnd_cnt += yeah->pkts_acked; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ + if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) { if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; tp->snd_cwnd_cnt = 0; @@ -120,7 +118,6 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) */ if (after(ack, yeah->vegas.beg_snd_nxt)) { - /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got * at least one RTT sample that wasn't from a delayed ACK. @@ -189,7 +186,6 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) } yeah->lastQ = queue; - } /* Save the extent of the current window so we can use this @@ -205,7 +201,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) } } -static u32 tcp_yeah_ssthresh(struct sock *sk) { +static u32 tcp_yeah_ssthresh(struct sock *sk) +{ const struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); u32 reduction; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f57c0e4c232..cd0db5471bb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -99,6 +99,7 @@ #include <linux/slab.h> #include <net/tcp_states.h> #include <linux/skbuff.h> +#include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/net_namespace.h> @@ -224,7 +225,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, remaining = (high - low) + 1; rand = prandom_u32(); - first = (((u64)rand * remaining) >> 32) + low; + first = reciprocal_scale(rand, remaining) + low; /* * force rand to be an odd multiple of UDP_HTABLE_SIZE */ @@ -448,7 +449,7 @@ begin: } } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -529,7 +530,7 @@ begin: } } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -1787,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_compute_pseudo); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); @@ -1967,7 +1972,7 @@ void udp_v4_early_demux(struct sk_buff *skb) return; skb->sk = sk; - skb->destructor = sock_edemux; + skb->destructor = sock_efree; dst = sk->sk_rx_dst; if (dst) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59035bc3008..84e0e05c9c0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -228,30 +228,24 @@ unlock: } EXPORT_SYMBOL(udp_del_offload); -static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, + struct udphdr *uh) { struct udp_offload_priv *uo_priv; struct sk_buff *p, **pp = NULL; - struct udphdr *uh, *uh2; - unsigned int hlen, off; + struct udphdr *uh2; + unsigned int off = skb_gro_offset(skb); int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || - (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE)) + (skb->ip_summed != CHECKSUM_PARTIAL && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - uh = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!uh)) - goto out; - } - rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { @@ -269,7 +263,12 @@ unflush: continue; uh2 = (struct udphdr *)(p->data + off); - if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + + /* Match ports and either checksums are either both zero + * or nonzero. + */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || + (!uh->check ^ !uh2->check)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -286,7 +285,33 @@ out: return pp; } -static int udp_gro_complete(struct sk_buff *skb, int nhoff) +static struct sk_buff **udp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + if (unlikely(!uh)) + goto flush; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo); +skip: + return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +} + +int udp_gro_complete(struct sk_buff *skb, int nhoff) { struct udp_offload_priv *uo_priv; __be16 newlen = htons(skb->len - nhoff); @@ -311,12 +336,24 @@ static int udp_gro_complete(struct sk_buff *skb, int nhoff) return err; } +int udp4_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, + iph->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv4_offload = { .callbacks = { .gso_send_check = udp4_ufo_send_check, .gso_segment = udp4_ufo_fragment, - .gro_receive = udp_gro_receive, - .gro_complete = udp_gro_complete, + .gro_receive = udp4_gro_receive, + .gro_complete = udp4_gro_complete, }, }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fc1fac2a052..ad4598fcc41 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -180,7 +180,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, - .use_tempaddr = 0, + .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, @@ -1105,8 +1105,8 @@ retry: spin_unlock_bh(&ifp->lock); regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred @@ -3030,7 +3030,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct hlist_head *h = &inet6_addr_lst[i]; spin_lock_bh(&addrconf_hash_lock); - restart: +restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); @@ -3542,8 +3542,8 @@ static void __net_exit if6_proc_net_exit(struct net *net) } static struct pernet_operations if6_proc_net_ops = { - .init = if6_proc_net_init, - .exit = if6_proc_net_exit, + .init = if6_proc_net_init, + .exit = if6_proc_net_exit, }; int __init if6_proc_init(void) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2daa3a133e4..b9393e6a21f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,15 +7,15 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * Fixes: + * Fixes: * piggy, Karl Knutson : Socket protocol table - * Hideaki YOSHIFUJI : sin6_scope_id support - * Arnaldo Melo : check proc_net_create return, cleanups + * Hideaki YOSHIFUJI : sin6_scope_id support + * Arnaldo Melo : check proc_net_create return, cleanups * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "IPv6: " fmt diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 72a4930bdc0..fcffd4e522c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -17,10 +17,10 @@ * Authors * * Mitsuru KANDA @USAGI : IPv6 Support - * Kazunori MIYAZAWA @USAGI : - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * Kazunori MIYAZAWA @USAGI : + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * - * This file is derived from net/ipv4/ah.c. + * This file is derived from net/ipv4/ah.c. */ #define pr_fmt(fmt) "IPv6: " fmt @@ -284,7 +284,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) ipv6_rearrange_rthdr(iph, exthdr.rth); break; - default : + default: return 0; } @@ -478,7 +478,7 @@ static void ah6_input_done(struct crypto_async_request *base, int err) auth_data = ah_tmp_auth(work_iph, hdr_len); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out; @@ -622,7 +622,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) goto out_free; } - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out_free; @@ -647,8 +647,8 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); - struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; - struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && @@ -755,11 +755,10 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err) return 0; } -static const struct xfrm_type ah6_type = -{ +static const struct xfrm_type ah6_type = { .description = "AH6", .owner = THIS_MODULE, - .proto = IPPROTO_AH, + .proto = IPPROTO_AH, .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah6_init_state, .destructor = ah6_destroy, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2753319524f..2cdc38338be 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -43,13 +43,13 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p, final; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; struct flowi6 fl6; struct ip6_flowlabel *flowlabel = NULL; - struct ipv6_txoptions *opt; + struct ipv6_txoptions *opt; int addr_type; int err; @@ -332,7 +332,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; - struct sk_buff *skb, *skb2; + struct sk_buff *skb; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name); struct { struct sock_extended_err ee; @@ -342,7 +342,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int copied; err = -EAGAIN; - skb = skb_dequeue(&sk->sk_error_queue); + skb = sock_dequeue_err_skb(sk); if (skb == NULL) goto out; @@ -415,17 +415,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; - /* Reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { - sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else { - spin_unlock_bh(&sk->sk_error_queue.lock); - } - out_free_skb: kfree_skb(skb); out: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d15da137714..83fc3a385a2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -17,10 +17,10 @@ * Authors * * Mitsuru KANDA @USAGI : IPv6 Support - * Kazunori MIYAZAWA @USAGI : - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * Kazunori MIYAZAWA @USAGI : + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * - * This file is derived from net/ipv4/esp.c + * This file is derived from net/ipv4/esp.c */ #define pr_fmt(fmt) "IPv6: " fmt @@ -598,7 +598,7 @@ static int esp6_init_state(struct xfrm_state *x) case XFRM_MODE_BEET: if (x->sel.family != AF_INET6) x->props.header_len += IPV4_BEET_PHMAXLEN + - (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); break; case XFRM_MODE_TRANSPORT: break; @@ -621,11 +621,10 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err) return 0; } -static const struct xfrm_type esp6_type = -{ +static const struct xfrm_type esp6_type = { .description = "ESP6", - .owner = THIS_MODULE, - .proto = IPPROTO_ESP, + .owner = THIS_MODULE, + .proto = IPPROTO_ESP, .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp6_init_state, .destructor = esp6_destroy, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8d67900aa00..bfde361b613 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -142,7 +142,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) default: /* Other TLV code so scan list */ if (optlen > len) goto bad; - for (curr=procs; curr->type >= 0; curr++) { + for (curr = procs; curr->type >= 0; curr++) { if (curr->type == nh[off]) { /* type specific length/alignment checks will be performed in the diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 06ba3e58320..394bb824fe4 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -503,7 +503,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) msg.type = type; len = skb->len - msg.offset; - len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); + len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); goto out_dst_release; @@ -636,7 +636,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) /* now skip over extension headers */ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (inner_offset<0) + if (inner_offset < 0) goto out; } else { inner_offset = sizeof(struct ipv6hdr); @@ -808,7 +808,7 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, memset(fl6, 0, sizeof(*fl6)); fl6->saddr = *saddr; fl6->daddr = *daddr; - fl6->flowi6_proto = IPPROTO_ICMPV6; + fl6->flowi6_proto = IPPROTO_ICMPV6; fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; @@ -875,8 +875,8 @@ static void __net_exit icmpv6_sk_exit(struct net *net) } static struct pernet_operations icmpv6_sk_ops = { - .init = icmpv6_sk_init, - .exit = icmpv6_sk_exit, + .init = icmpv6_sk_init, + .exit = icmpv6_sk_exit, }; int __init icmpv6_init(void) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a245e5ddffb..29b32206e49 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -63,7 +63,6 @@ int inet6_csk_bind_conflict(const struct sock *sk, return sk2 != NULL; } - EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); struct dst_entry *inet6_csk_route_req(struct sock *sk, @@ -144,7 +143,6 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, return NULL; } - EXPORT_SYMBOL_GPL(inet6_csk_search_req); void inet6_csk_reqsk_queue_hash_add(struct sock *sk, @@ -160,10 +158,9 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); inet_csk_reqsk_queue_added(sk, timeout); } - EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); -void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; @@ -175,7 +172,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, sk->sk_bound_dev_if); } - EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); static inline diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 262e13c02ec..051dffb49c9 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -6,7 +6,7 @@ * Generic INET6 transport hashtables * * Authors: Lotsa people, from code originally in tcp, generalised here - * by Arnaldo Carvalho de Melo <acme@mandriva.com> + * by Arnaldo Carvalho de Melo <acme@mandriva.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -198,7 +198,7 @@ begin: } } else if (score == hiscore && reuseport) { matches++; - if (((u64)phash * matches) >> 32 == 0) + if (reciprocal_scale(phash, matches) == 0) result = sk; phash = next_pseudo_random32(phash); } @@ -222,7 +222,6 @@ begin: rcu_read_unlock(); return result; } - EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -238,7 +237,6 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, return sk; } - EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, @@ -324,5 +322,4 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), __inet6_check_established, __inet6_hash); } - EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 4052694c6f2..3dd7d4ebd7c 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -136,7 +136,7 @@ static void ip6_fl_gc(unsigned long dummy) spin_lock(&ip6_fl_lock); - for (i=0; i<=FL_HASH_MASK; i++) { + for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl; struct ip6_flowlabel __rcu **flp; @@ -239,7 +239,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, /* Socket flowlabel lists */ -struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) +struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { struct ipv6_fl_socklist *sfl; struct ipv6_pinfo *np = inet6_sk(sk); @@ -259,7 +259,6 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) rcu_read_unlock_bh(); return NULL; } - EXPORT_SYMBOL_GPL(fl6_sock_lookup); void fl6_free_socklist(struct sock *sk) @@ -293,11 +292,11 @@ void fl6_free_socklist(struct sock *sk) following rthdr. */ -struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, - struct ip6_flowlabel * fl, - struct ipv6_txoptions * fopt) +struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, + struct ip6_flowlabel *fl, + struct ipv6_txoptions *fopt) { - struct ipv6_txoptions * fl_opt = fl->opt; + struct ipv6_txoptions *fl_opt = fl->opt; if (fopt == NULL || fopt->opt_flen == 0) return fl_opt; @@ -388,7 +387,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, goto done; msg.msg_controllen = olen; - msg.msg_control = (void*)(fl->opt+1); + msg.msg_control = (void *)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, @@ -517,7 +516,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; - struct ipv6_fl_socklist *sfl1=NULL; + struct ipv6_fl_socklist *sfl1 = NULL; struct ipv6_fl_socklist *sfl; struct ipv6_fl_socklist __rcu **sflp; struct ip6_flowlabel *fl, *fl1 = NULL; @@ -542,7 +541,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp))!=NULL; + (sfl = rcu_dereference(*sflp)) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 4578e23834f..14dacc544c3 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -13,7 +13,7 @@ static ip6_icmp_send_t __rcu *ip6_icmp_send; int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? - 0 : -EBUSY; + 0 : -EBUSY; } EXPORT_SYMBOL(inet6_register_icmp_sender); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 51d54dc376f..a3084ab5df6 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -15,8 +15,8 @@ */ /* Changes * - * Mitsuru KANDA @USAGI and - * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs(). + * Mitsuru KANDA @USAGI and + * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs(). */ #include <linux/errno.h> @@ -65,7 +65,7 @@ int ip6_rcv_finish(struct sk_buff *skb) int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { const struct ipv6hdr *hdr; - u32 pkt_len; + u32 pkt_len; struct inet6_dev *idev; struct net *net = dev_net(skb->dev); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 65eda2a8af4..5bcda338bce 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -244,7 +244,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, continue; iph2 = (struct ipv6hdr *)(p->data + off); - first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; + first_word = *(__be32 *)iph ^ *(__be32 *)iph2; /* All fields must match except length and Traffic Class. * XXX skbs on the gro_list have all been parsed and pulled diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 315a55d6607..b7a3e7b3378 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -20,7 +20,7 @@ * etc. * * H. von Brand : Added missing #include <linux/string.h> - * Imran Patel : frag id should be in NBO + * Imran Patel : frag id should be in NBO * Kazunori MIYAZAWA @USAGI * : add ip6_append_data and related functions * for datagram xmit @@ -233,7 +233,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EMSGSIZE; } - EXPORT_SYMBOL(ip6_xmit); static int ip6_call_ra_chain(struct sk_buff *skb, int sel) @@ -555,14 +554,14 @@ static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; int hroom, troom; __be32 frag_id = 0; - int ptr, offset = 0, err=0; + int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; struct net *net = dev_net(skb_dst(skb)->dev); @@ -637,7 +636,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } __skb_pull(skb, hlen); - fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); + fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); @@ -662,7 +661,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag) { frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); - fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); + fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), tmp_hdr, @@ -681,7 +680,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); - if(!err) + if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -742,7 +741,7 @@ slow_path: /* * Keep copying data until we run out. */ - while(left > 0) { + while (left > 0) { len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -865,7 +864,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) + * and MSG_DONTROUTE --ANK (980726) * * 1. ip6_rt_check(): If route was host route, * check that cached destination is current. @@ -1049,7 +1048,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu,unsigned int flags, + int transhdrlen, int mtu, unsigned int flags, struct rt6_info *rt) { @@ -1072,7 +1071,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_reserve(skb, hh_len); /* create space for UDP/IP header */ - skb_put(skb,fragheaderlen + transhdrlen); + skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f9de5a69507..e01bd039929 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -408,12 +408,12 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof (*ipv6h); + __u16 off = sizeof(*ipv6h); while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof (*hdr) > skb->data && + if (raw + off + sizeof(*hdr) > skb->data && !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) break; @@ -530,7 +530,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { + if ((len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; @@ -991,7 +991,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - sizeof (*ipv6h); + mtu = dst_mtu(dst) - sizeof(*ipv6h); if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1083,7 +1083,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1135,7 +1135,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); @@ -1229,11 +1229,11 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + - sizeof (struct ipv6hdr); + sizeof(struct ipv6hdr); - dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu-=8; + dev->mtu -= 8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1350,7 +1350,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } @@ -1362,7 +1362,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { err = -EFAULT; } break; @@ -1372,7 +1372,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && @@ -1407,7 +1407,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); @@ -1482,11 +1482,11 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); - dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr); t = netdev_priv(dev); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu-=8; + dev->mtu -= 8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f9a3fd320d1..0171f08325c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -845,7 +845,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) atomic_dec(&mrt->cache_resolve_queue_len); - while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { + while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); nlh->nlmsg_type = NLMSG_ERROR; @@ -1103,7 +1103,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, * Play the pending entries through our router */ - while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index d1c793cffcb..1b9316e1386 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -181,8 +181,7 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) return 0; } -static const struct xfrm_type ipcomp6_type = -{ +static const struct xfrm_type ipcomp6_type = { .description = "IPCOMP6", .owner = THIS_MODULE, .proto = IPPROTO_COMP, @@ -193,8 +192,7 @@ static const struct xfrm_type ipcomp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static struct xfrm6_protocol ipcomp6_protocol = -{ +static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0c289982796..e1a9583bb41 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -66,12 +66,12 @@ int ip6_ra_control(struct sock *sk, int sel) if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW) return -ENOPROTOOPT; - new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; write_lock_bh(&ip6_ra_lock); - for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { + for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { - if (sel>=0) { + if (sel >= 0) { write_unlock_bh(&ip6_ra_lock); kfree(new_ra); return -EADDRINUSE; @@ -130,7 +130,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, int retv = -ENOPROTOOPT; if (optval == NULL) - val=0; + val = 0; else { if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) @@ -139,7 +139,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, val = 0; } - valbool = (val!=0); + valbool = (val != 0); if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); @@ -474,7 +474,7 @@ sticky_done: goto done; msg.msg_controllen = optlen; - msg.msg_control = (void*)(opt+1); + msg.msg_control = (void *)(opt+1); retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); @@ -687,7 +687,7 @@ done: retv = -ENOBUFS; break; } - gsf = kmalloc(optlen,GFP_KERNEL); + gsf = kmalloc(optlen, GFP_KERNEL); if (!gsf) { retv = -ENOBUFS; break; @@ -873,7 +873,6 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(ipv6_setsockopt); #ifdef CONFIG_COMPAT @@ -909,7 +908,6 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(compat_ipv6_setsockopt); #endif @@ -921,7 +919,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, if (!opt) return 0; - switch(optname) { + switch (optname) { case IPV6_HOPOPTS: hdr = opt->hopopt; break; @@ -1284,9 +1282,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); - if(put_user(len, optlen)) + if (put_user(len, optlen)) return -EFAULT; - if(copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } @@ -1299,7 +1297,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if (level == SOL_IP && sk->sk_type != SOCK_RAW) return udp_prot.getsockopt(sk, level, optname, optval, optlen); - if(level != SOL_IPV6) + if (level != SOL_IPV6) return -ENOPROTOOPT; err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0); @@ -1321,7 +1319,6 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(ipv6_getsockopt); #ifdef CONFIG_COMPAT @@ -1364,7 +1361,6 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a23b655a762..6833dd07b2c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -121,6 +121,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IPV6_MLD_MAX_MSF 64 int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; +int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; /* * socket join on multicast group @@ -237,7 +238,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) spin_lock(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; (mc_lst = rcu_dereference_protected(*lnk, - lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL; lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { @@ -400,7 +401,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!psl) goto done; /* err = -EADDRNOTAVAIL */ rv = !0; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) break; @@ -417,7 +418,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, /* update the interface filter */ ip6_mc_del_src(idev, group, omode, 1, source, 1); - for (j=i+1; j<psl->sl_count; j++) + for (j = i+1; j < psl->sl_count; j++) psl->sl_addr[j-1] = psl->sl_addr[j]; psl->sl_count--; err = 0; @@ -443,19 +444,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk, newpsl->sl_max = count; newpsl->sl_count = count - IP6_SFBLOCK; if (psl) { - for (i=0; i<psl->sl_count; i++) + for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); } pmc->sflist = psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) /* There is an error in the address. */ goto done; } - for (j=psl->sl_count-1; j>=i; j--) + for (j = psl->sl_count-1; j >= i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = *source; psl->sl_count++; @@ -524,7 +525,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc; - for (i=0; i<newpsl->sl_count; ++i) { + for (i = 0; i < newpsl->sl_count; ++i) { struct sockaddr_in6 *psin6; psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i]; @@ -616,7 +617,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We * have the socket lock, so reading here is safe. */ - for (i=0; i<copycount; i++) { + for (i = 0; i < copycount; i++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -658,7 +659,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, } else { int i; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { if (ipv6_addr_equal(&psl->sl_addr[i], src_addr)) break; } @@ -772,7 +773,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) pmc->mca_tomb = im->mca_tomb; pmc->mca_sources = im->mca_sources; im->mca_tomb = im->mca_sources = NULL; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = pmc->mca_crcount; } spin_unlock_bh(&im->mca_lock); @@ -790,7 +791,7 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; - for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) { + for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) break; pmc_prev = pmc; @@ -804,7 +805,7 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) spin_unlock_bh(&idev->mc_lock); if (pmc) { - for (psf=pmc->mca_tomb; psf; psf=psf_next) { + for (psf = pmc->mca_tomb; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -831,14 +832,14 @@ static void mld_clear_delrec(struct inet6_dev *idev) /* clear dead sources, too */ read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { struct ip6_sf_list *psf, *psf_next; spin_lock_bh(&pmc->mca_lock); psf = pmc->mca_tomb; pmc->mca_tomb = NULL; spin_unlock_bh(&pmc->mca_lock); - for (; psf; psf=psf_next) { + for (; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -931,7 +932,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) ASSERT_RTNL(); write_lock_bh(&idev->lock); - for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) { + for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) { if (ipv6_addr_equal(&ma->mca_addr, addr)) { if (--ma->mca_users == 0) { *map = ma->next; @@ -982,7 +983,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); - for (mc = idev->mc_list; mc; mc=mc->next) { + for (mc = idev->mc_list; mc; mc = mc->next) { if (ipv6_addr_equal(&mc->mca_addr, group)) break; } @@ -991,7 +992,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, struct ip6_sf_list *psf; spin_lock_bh(&mc->mca_lock); - for (psf=mc->mca_sources;psf;psf=psf->sf_next) { + for (psf = mc->mca_sources; psf; psf = psf->sf_next) { if (ipv6_addr_equal(&psf->sf_addr, src_addr)) break; } @@ -1000,7 +1001,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, psf->sf_count[MCAST_EXCLUDE] != mc->mca_sfcount[MCAST_EXCLUDE]; else - rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0; + rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; spin_unlock_bh(&mc->mca_lock); } else rv = true; /* don't filter unspecified source */ @@ -1091,10 +1092,10 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, int i, scount; scount = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { /* skip inactive filters */ if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != @@ -1124,10 +1125,10 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, /* mark INCLUDE-mode sources */ scount = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { psf->sf_gsresp = 1; scount++; @@ -1205,15 +1206,16 @@ static void mld_update_qrv(struct inet6_dev *idev, * and SHOULD NOT be one. Catch this here if we ever run * into such a case in future. */ + const int min_qrv = min(MLD_QRV_DEFAULT, sysctl_mld_qrv); WARN_ON(idev->mc_qrv == 0); if (mlh2->mld2q_qrv > 0) idev->mc_qrv = mlh2->mld2q_qrv; - if (unlikely(idev->mc_qrv < 2)) { + if (unlikely(idev->mc_qrv < min_qrv)) { net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n", - idev->mc_qrv, MLD_QRV_DEFAULT); - idev->mc_qrv = MLD_QRV_DEFAULT; + idev->mc_qrv, min_qrv); + idev->mc_qrv = min_qrv; } } @@ -1378,13 +1380,13 @@ int igmp6_event_query(struct sk_buff *skb) read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { - for (ma = idev->mc_list; ma; ma=ma->next) { + for (ma = idev->mc_list; ma; ma = ma->next) { spin_lock_bh(&ma->mca_lock); igmp6_group_queried(ma, max_delay); spin_unlock_bh(&ma->mca_lock); } } else { - for (ma = idev->mc_list; ma; ma=ma->next) { + for (ma = idev->mc_list; ma; ma = ma->next) { if (!ipv6_addr_equal(group, &ma->mca_addr)) continue; spin_lock_bh(&ma->mca_lock); @@ -1448,7 +1450,7 @@ int igmp6_event_report(struct sk_buff *skb) */ read_lock_bh(&idev->lock); - for (ma = idev->mc_list; ma; ma=ma->next) { + for (ma = idev->mc_list; ma; ma = ma->next) { if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { spin_lock(&ma->mca_lock); if (del_timer(&ma->mca_timer)) @@ -1512,7 +1514,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) struct ip6_sf_list *psf; int scount = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -1726,7 +1728,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } first = 1; psf_prev = NULL; - for (psf=*psf_list; psf; psf=psf_next) { + for (psf = *psf_list; psf; psf = psf_next) { struct in6_addr *psrc; psf_next = psf->sf_next; @@ -1805,7 +1807,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) read_lock_bh(&idev->lock); if (!pmc) { - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; spin_lock_bh(&pmc->mca_lock); @@ -1838,7 +1840,7 @@ static void mld_clear_zeros(struct ip6_sf_list **ppsf) struct ip6_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf=*ppsf; psf; psf = psf_next) { + for (psf = *ppsf; psf; psf = psf_next) { psf_next = psf->sf_next; if (psf->sf_crcount == 0) { if (psf_prev) @@ -1862,7 +1864,7 @@ static void mld_send_cr(struct inet6_dev *idev) /* deleted MCA's */ pmc_prev = NULL; - for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) { + for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) { pmc_next = pmc->next; if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; @@ -1895,7 +1897,7 @@ static void mld_send_cr(struct inet6_dev *idev) spin_unlock(&idev->mc_lock); /* change recs */ - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) { type = MLD2_BLOCK_OLD_SOURCES; @@ -2032,7 +2034,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev) skb = NULL; read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_CHANGE_TO_EXCLUDE; @@ -2077,7 +2079,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2118,7 +2120,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } @@ -2138,7 +2140,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfcount[sfmode]--; } err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]); changerec |= rv > 0; @@ -2154,7 +2156,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfmode = MCAST_INCLUDE; pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf=pmc->mca_sources; psf; psf = psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; mld_ifc_event(pmc->idev); } else if (sf_setstate(pmc) || changerec) @@ -2173,7 +2175,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, struct ip6_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2198,7 +2200,7 @@ static void sf_markstate(struct ifmcaddr6 *pmc) struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) if (pmc->mca_sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && @@ -2215,7 +2217,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) int new_in, rv; rv = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -2225,8 +2227,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc) if (!psf->sf_oldin) { struct ip6_sf_list *prev = NULL; - for (dpsf=pmc->mca_tomb; dpsf; - dpsf=dpsf->sf_next) { + for (dpsf = pmc->mca_tomb; dpsf; + dpsf = dpsf->sf_next) { if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2248,7 +2250,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf=pmc->mca_tomb; dpsf; dpsf=dpsf->sf_next) + for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next) if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2282,7 +2284,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } @@ -2298,7 +2300,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!delta) pmc->mca_sfcount[sfmode]++; err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; @@ -2308,7 +2310,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!delta) pmc->mca_sfcount[sfmode]--; - for (j=0; j<i; j++) + for (j = 0; j < i; j++) ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) { struct ip6_sf_list *psf; @@ -2322,7 +2324,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf=pmc->mca_sources; psf; psf = psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; mld_ifc_event(idev); } else if (sf_setstate(pmc)) @@ -2336,12 +2338,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; - for (psf=pmc->mca_tomb; psf; psf=nextpsf) { + for (psf = pmc->mca_tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } pmc->mca_tomb = NULL; - for (psf=pmc->mca_sources; psf; psf=nextpsf) { + for (psf = pmc->mca_sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } @@ -2485,13 +2487,21 @@ void ipv6_mc_down(struct inet6_dev *idev) mld_gq_stop_timer(idev); mld_dad_stop_timer(idev); - for (i = idev->mc_list; i; i=i->next) + for (i = idev->mc_list; i; i = i->next) igmp6_group_dropped(i); read_unlock_bh(&idev->lock); mld_clear_delrec(idev); } +static void ipv6_mc_reset(struct inet6_dev *idev) +{ + idev->mc_qrv = sysctl_mld_qrv; + idev->mc_qi = MLD_QI_DEFAULT; + idev->mc_qri = MLD_QRI_DEFAULT; + idev->mc_v1_seen = 0; + idev->mc_maxdelay = unsolicited_report_interval(idev); +} /* Device going up */ @@ -2502,7 +2512,8 @@ void ipv6_mc_up(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ read_lock_bh(&idev->lock); - for (i = idev->mc_list; i; i=i->next) + ipv6_mc_reset(idev); + for (i = idev->mc_list; i; i = i->next) igmp6_group_added(i); read_unlock_bh(&idev->lock); } @@ -2522,13 +2533,7 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) (unsigned long)idev); setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, (unsigned long)idev); - - idev->mc_qrv = MLD_QRV_DEFAULT; - idev->mc_qi = MLD_QI_DEFAULT; - idev->mc_qri = MLD_QRI_DEFAULT; - - idev->mc_maxdelay = unsolicited_report_interval(idev); - idev->mc_v1_seen = 0; + ipv6_mc_reset(idev); write_unlock_bh(&idev->lock); } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index db9b6cbc9db..f61429d391d 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -336,11 +336,10 @@ static void mip6_destopt_destroy(struct xfrm_state *x) { } -static const struct xfrm_type mip6_destopt_type = -{ +static const struct xfrm_type mip6_destopt_type = { .description = "MIP6DESTOPT", .owner = THIS_MODULE, - .proto = IPPROTO_DSTOPTS, + .proto = IPPROTO_DSTOPTS, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, .init_state = mip6_destopt_init_state, .destructor = mip6_destopt_destroy, @@ -469,11 +468,10 @@ static void mip6_rthdr_destroy(struct xfrm_state *x) { } -static const struct xfrm_type mip6_rthdr_type = -{ +static const struct xfrm_type mip6_rthdr_type = { .description = "MIP6RT", .owner = THIS_MODULE, - .proto = IPPROTO_ROUTING, + .proto = IPPROTO_ROUTING, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, .init_state = mip6_rthdr_init_state, .destructor = mip6_rthdr_destroy, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 339078f95d1..4cb45c1079a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -175,7 +175,7 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, type = cur->nd_opt_type; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); - } while(cur < end && cur->nd_opt_type != type); + } while (cur < end && cur->nd_opt_type != type); return cur <= end && cur->nd_opt_type == type ? cur : NULL; } @@ -192,7 +192,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); - } while(cur < end && !ndisc_is_useropt(cur)); + } while (cur < end && !ndisc_is_useropt(cur)); return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } @@ -284,7 +284,6 @@ int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, } return -EINVAL; } - EXPORT_SYMBOL(ndisc_mc_map); static u32 ndisc_hash(const void *pkey, @@ -296,7 +295,7 @@ static u32 ndisc_hash(const void *pkey, static int ndisc_constructor(struct neighbour *neigh) { - struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key; + struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; struct net_device *dev = neigh->dev; struct inet6_dev *in6_dev; struct neigh_parms *parms; @@ -344,7 +343,7 @@ static int ndisc_constructor(struct neighbour *neigh) static int pndisc_constructor(struct pneigh_entry *n) { - struct in6_addr *addr = (struct in6_addr*)&n->key; + struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; @@ -357,7 +356,7 @@ static int pndisc_constructor(struct pneigh_entry *n) static void pndisc_destructor(struct pneigh_entry *n) { - struct in6_addr *addr = (struct in6_addr*)&n->key; + struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; @@ -1065,7 +1064,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) int optlen; unsigned int pref = 0; - __u8 * opt = (__u8 *)(ra_msg + 1); + __u8 *opt = (__u8 *)(ra_msg + 1); optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); @@ -1319,7 +1318,7 @@ skip_linkparms: continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; - rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, + rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); } } @@ -1352,7 +1351,7 @@ skip_routeinfo: __be32 n; u32 mtu; - memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); + memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 5ec867e4a8b..fc24c390af0 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -35,7 +35,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) if (found_rhdr) return offset; break; - default : + default: return offset; } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 2d6f860e5c1..1752cd0b488 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -8,7 +8,7 @@ * except it reports the sockets in the INET6 address family. * * Authors: David S. Miller (davem@caip.rutgers.edu) - * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 39d44226e40..896af880797 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -889,7 +889,7 @@ back_from_confirm: else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, - len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, + len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, msg->msg_flags, dontfrag); if (err) @@ -902,7 +902,7 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); - return err<0?err:len; + return err < 0 ? err : len; do_confirm: dst_confirm(dst); if (!(msg->msg_flags & MSG_PROBE) || len) @@ -1045,7 +1045,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val, len; - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; switch (optname) { @@ -1069,7 +1069,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index c6557d9f780..1a157ca2ebc 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -62,13 +62,12 @@ static const char ip6_frag_cache_name[] = "ip6-frags"; -struct ip6frag_skb_cb -{ +struct ip6frag_skb_cb { struct inet6_skb_parm h; int offset; }; -#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) +#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { @@ -289,7 +288,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, goto found; } prev = NULL; - for(next = fq->q.fragments; next != NULL; next = next->next) { + for (next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -529,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) + if (hdr->payload_len == 0) goto fail_hdr; if (!pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -575,8 +574,7 @@ fail_hdr: return -1; } -static const struct inet6_protocol frag_protocol = -{ +static const struct inet6_protocol frag_protocol = { .handler = ipv6_frag_rcv, .flags = INET6_PROTO_NOPOLICY, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f23fbd28a50..f74b0417bd6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -813,7 +813,7 @@ out: } -struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, +struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags) { return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); @@ -843,7 +843,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, return NULL; } - EXPORT_SYMBOL(rt6_lookup); /* ip6_ins_rt is called with FREE table->tb6_lock. @@ -1024,7 +1023,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, +struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { int flags = 0; @@ -1041,7 +1040,6 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } - EXPORT_SYMBOL(ip6_route_output); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) @@ -1149,7 +1147,7 @@ static void ip6_link_failure(struct sk_buff *skb) static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { - struct rt6_info *rt6 = (struct rt6_info*)dst; + struct rt6_info *rt6 = (struct rt6_info *)dst; dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { @@ -1924,7 +1922,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return NULL; read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); + fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); if (!fn) goto out; @@ -1983,7 +1981,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev return NULL; read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -2068,7 +2066,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) struct in6_rtmsg rtmsg; int err; - switch(cmd) { + switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -2191,7 +2189,7 @@ int ip6_route_get_saddr(struct net *net, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); + struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt); int err = 0; if (rt->rt6i_prefsrc.plen) *saddr = rt->rt6i_prefsrc.addr; @@ -2486,7 +2484,7 @@ beginning: return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct fib6_config cfg; int err; @@ -2501,7 +2499,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) return ip6_route_del(&cfg); } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct fib6_config cfg; int err; @@ -2693,7 +2691,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, 0, NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6163f851dc0..86e3fa81f85 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -812,9 +812,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, const struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - unsigned int max_headroom; /* The extra header space needed */ + struct rtable *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; @@ -1123,7 +1123,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, #endif static int -ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; @@ -1339,10 +1339,10 @@ static void ipip6_dev_free(struct net_device *dev) static void ipip6_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip6_netdev_ops; - dev->destructor = ipip6_dev_free; + dev->destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); dev->flags = IFF_NOARP; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 83cea1d3946..c643dc907ce 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,7 +24,7 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS]; +static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 0c56c93619e..c5c10fafcfe 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,6 +16,8 @@ #include <net/addrconf.h> #include <net/inet_frag.h> +static int one = 1; + static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", @@ -63,6 +65,14 @@ static struct ctl_table ipv6_rotable[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "mld_qrv", + .data = &sysctl_mld_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 29964c3d363..5b3c70ff7a7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -738,7 +738,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); - if (!TCP_SKB_CB(skb)->when && + if (!TCP_SKB_CB(skb)->tcp_tw_isn && (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || np->repflow)) { @@ -1412,7 +1412,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; + TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 01b0ff9a0c2..dbb3d9262bf 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -35,34 +35,14 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - wsum); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + ip6_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 2c4e4c5c761..3c758007b32 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> - * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> */ #define pr_fmt(fmt) "IPv6: " fmt @@ -64,7 +64,6 @@ err: return ret; } - EXPORT_SYMBOL(xfrm6_tunnel_register); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) @@ -92,7 +91,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) return ret; } - EXPORT_SYMBOL(xfrm6_tunnel_deregister); #define for_each_tunnel_rcu(head, handler) \ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4836af8f582..f6ba535b6fe 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -243,7 +243,7 @@ begin: goto exact_match; } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -323,7 +323,7 @@ begin: } } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -373,8 +373,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup); /* - * This should be easy, if there is something there we - * return it, otherwise we block. + * This should be easy, if there is something there we + * return it, otherwise we block. */ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, @@ -530,7 +530,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = &hdr->daddr; - struct udphdr *uh = (struct udphdr*)(skb->data+offset); + struct udphdr *uh = (struct udphdr *)(skb->data+offset); struct sock *sk; int err; struct net *net = dev_net(skb->dev); @@ -596,7 +596,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info ) + u8 code, int offset, __be32 info) { __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } @@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -960,10 +964,10 @@ static void udp_v6_flush_pending_frames(struct sock *sk) } /** - * udp6_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on - * @skb: sk_buff containing the filled-in UDP header - * (checksum field must be zeroed out) + * udp6_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) */ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, const struct in6_addr *saddr, @@ -1294,7 +1298,7 @@ do_append_data: getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, - (struct rt6_info*)dst, + (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) udp_v6_flush_pending_frames(sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0ae3d98f83e..89cb9a9b853 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -10,6 +10,7 @@ * UDPv6 GSO support */ #include <linux/skbuff.h> +#include <linux/netdevice.h> #include <net/protocol.h> #include <net/ipv6.h> #include <net/udp.h> @@ -127,10 +128,52 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, out: return segs; } + +static struct sk_buff **udp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + if (unlikely(!uh)) + goto flush; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo); + +skip: + return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +} + +int udp6_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, + &ipv6h->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv6_offload = { .callbacks = { .gso_send_check = udp6_ufo_send_check, .gso_segment = udp6_ufo_fragment, + .gro_receive = udp6_gro_receive, + .gro_complete = udp6_gro_complete, }, }; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f8c3cf842f5..f48fbe4d16f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -3,8 +3,8 @@ * * Authors: * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * YOSHIFUJI Hideaki @USAGI * IPv6 support */ @@ -52,7 +52,6 @@ int xfrm6_rcv(struct sk_buff *skb) return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 0); } - EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, @@ -142,5 +141,4 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, drop: return -1; } - EXPORT_SYMBOL(xfrm6_input_addr); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 433672d07d0..ca3f29b98ae 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -25,7 +25,6 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, { return ip6_find_1stfragopt(skb, prevhdr); } - EXPORT_SYMBOL(xfrm6_find_1stfragopt); static int xfrm6_local_dontfrag(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2a0bbda2c76..ac49f84fe2c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -3,11 +3,11 @@ * * Authors: * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> - * IPv6 support - * YOSHIFUJI Hideaki - * Split up af-specific portion + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * YOSHIFUJI Hideaki + * Split up af-specific portion * */ @@ -84,7 +84,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info*)dst; + struct rt6_info *rt = (struct rt6_info *)dst; if (rt->rt6i_node) path->path_cookie = rt->rt6i_node->fn_sernum; } @@ -97,7 +97,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rt6_info *rt = (struct rt6_info*)xdst->route; + struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; dev_hold(dev); @@ -296,7 +296,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, - .get_saddr = xfrm6_get_saddr, + .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, .init_dst = xfrm6_init_dst, @@ -319,9 +319,9 @@ static void xfrm6_policy_fini(void) static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", - .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, - .maxlen = sizeof(int), - .mode = 0644, + .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_dointvec, }, { } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 3fc970135fc..8a1f9c0d2a1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -3,11 +3,11 @@ * * Authors: * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> - * IPv6 support - * YOSHIFUJI Hideaki @USAGI - * Split up af-specific portion + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * YOSHIFUJI Hideaki @USAGI + * Split up af-specific portion * */ @@ -45,10 +45,10 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, const xfrm_address_t *daddr, const xfrm_address_t *saddr) { x->id = tmpl->id; - if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) + if (ipv6_addr_any((struct in6_addr *)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); - if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) + if (ipv6_addr_any((struct in6_addr *)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 1c66465a42d..5743044cd66 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> - * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * * Based on net/ipv4/xfrm4_tunnel.c * @@ -110,7 +110,6 @@ __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr) rcu_read_unlock_bh(); return htonl(spi); } - EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) @@ -187,7 +186,6 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) return htonl(spi); } - EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi); static void x6spi_destroy_rcu(struct rcu_head *head) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1109d3bb8da..2aa2b6c15f2 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -148,7 +148,7 @@ do { \ atomic_read(&_t->ref_count)); \ l2tp_tunnel_inc_refcount_1(_t); \ } while (0) -#define l2tp_tunnel_dec_refcount(_t) +#define l2tp_tunnel_dec_refcount(_t) \ do { \ pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \ __func__, __LINE__, (_t)->name, \ @@ -1392,6 +1392,8 @@ static int l2tp_tunnel_sock_create(struct net *net, if (err < 0) goto out; + udp_set_convert_csum(sock->sk, true); + break; case L2TP_ENCAPTYPE_IP: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index de88c4ab514..5016a692908 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -142,7 +142,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) static u32 __hash_bucket(u32 hash, unsigned int size) { - return ((u64)hash * size) >> 32; + return reciprocal_scale(hash, size); } static u32 hash_bucket(u32 hash, const struct net *net) @@ -358,7 +358,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) - tstamp->stop = ktime_to_ns(ktime_get_real()); + tstamp->stop = ktime_get_real_ns(); if (nf_ct_is_dying(ct)) goto delete; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index f87e8f68ad4..91a1837acd0 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -83,7 +83,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); - return ((u64)hash * nf_ct_expect_hsize) >> 32; + + return reciprocal_scale(hash, nf_ct_expect_hsize); } struct nf_conntrack_expect * diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 355a5c4ef76..1bd9ed9e62f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1737,7 +1737,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } tstamp = nf_conn_tstamp_find(ct); if (tstamp) - tstamp->start = ktime_to_ns(ktime_get_real()); + tstamp->start = ktime_get_real_ns(); err = nf_conntrack_hash_check_insert(ct); if (err < 0) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f641751dba9..cf65a1e040d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -101,7 +101,7 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos) { struct ct_iter_state *st = seq->private; - st->time_now = ktime_to_ns(ktime_get_real()); + st->time_now = ktime_get_real_ns(); rcu_read_lock(); return ct_get_idx(seq, *pos); } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 552f97cd9fd..4e0b47831d4 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -126,7 +126,8 @@ hash_by_src(const struct net *net, u16 zone, /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); - return ((u64)hash * net->ct.nat_htable_size) >> 32; + + return reciprocal_scale(hash, net->ct.nat_htable_size); } /* Is this tuple already taken? (not by us) */ @@ -274,7 +275,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, } var_ipp->all[i] = (__force __u32) - htonl(minip + (((u64)j * dist) >> 32)); + htonl(minip + reciprocal_scale(j, dist)); if (var_ipp->all[i] != range->max_addr.all[i]) full_range = true; diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 73b73f687c5..02afaf48a72 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -126,7 +126,7 @@ hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); hash = hash ^ (t->proto & info->proto_mask); - return (((u64)hash * info->hmodulus) >> 32) + info->hoffset; + return reciprocal_scale(hash, info->hmodulus) + info->hoffset; } static void diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index f4af1bfafb1..96fa26b20b6 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -55,7 +55,8 @@ xt_cluster_hash(const struct nf_conn *ct, WARN_ON(1); break; } - return (((u64)hash * info->total_nodes) >> 32); + + return reciprocal_scale(hash, info->total_nodes); } static inline bool diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 47dc6836830..52eb3e03458 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -135,7 +135,7 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) * give results between [0 and cfg.size-1] and same hash distribution, * but using a multiply, less expensive than a divide */ - return ((u64)hash * ht->cfg.size) >> 32; + return reciprocal_scale(hash, ht->cfg.size); } static struct dsthash_ent * diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index d07ab538fc9..7064da92f42 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -89,7 +89,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, * allocated stats as we have already locked them. */ if (likely(flow->stats_last_writer != NUMA_NO_NODE) - && likely(!rcu_dereference(flow->stats[node]))) { + && likely(!rcu_access_pointer(flow->stats[node]))) { /* Try to allocate node-specific stats. */ struct flow_stats *new_stats; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 93896d2092f..87d20f48ff0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -240,11 +240,9 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - const struct net_device_ops *ops = dev->netdev_ops; netdev_features_t features; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; - u16 queue_map; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) @@ -255,17 +253,13 @@ static int packet_direct_xmit(struct sk_buff *skb) __skb_linearize(skb)) goto drop; - queue_map = skb_get_queue_mapping(skb); - txq = netdev_get_tx_queue(dev, queue_map); + txq = skb_get_tx_queue(dev, skb); local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_drv_stopped(txq)) { - ret = ops->ndo_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) - txq_trans_update(txq); - } + if (!netif_xmit_frozen_or_drv_stopped(txq)) + ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index bc5514211b0..e873d7d9f85 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -160,7 +160,8 @@ void rose_link_rx_restart(struct sk_buff *skb, struct rose_neigh *neigh, unsigne break; case ROSE_DIAGNOSTIC: - printk(KERN_WARNING "ROSE: received diagnostic #%d - %02X %02X %02X\n", skb->data[3], skb->data[4], skb->data[5], skb->data[6]); + pr_warn("ROSE: received diagnostic #%d - %3ph\n", skb->data[3], + skb->data + 4); break; default: diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index db57458c824..74c0fcd3683 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -37,7 +37,7 @@ void rxrpc_UDP_error_report(struct sock *sk) _enter("%p{%d}", sk, local->debug_id); - skb = skb_dequeue(&sk->sk_error_queue); + skb = sock_dequeue_err_skb(sk); if (!skb) { _leave("UDP socket errqueue empty"); return; @@ -111,18 +111,6 @@ void rxrpc_UDP_error_report(struct sock *sk) skb_queue_tail(&trans->error_queue, skb); rxrpc_queue_work(&trans->error_handler); - /* reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - skb = skb_peek(&sk->sk_error_queue); - if (skb) { - sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else { - spin_unlock_bh(&sk->sk_error_queue.lock); - } - _leave(""); } diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 63b21e580de..481f89f9378 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -45,7 +45,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp; struct rxrpc_sock *rx = call->socket; struct sock *sk; - int skb_len, ret; + int ret; _enter(",,%d,%d", force, terminal); @@ -101,13 +101,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, rx->interceptor(sk, call->user_call_ID, skb); spin_unlock_bh(&sk->sk_receive_queue.lock); } else { - - /* Cache the SKB length before we tack it onto the - * receive queue. Once it is added it no longer - * belongs to us and may be freed by other threads of - * control pulling packets from the queue */ - skb_len = skb->len; - _net("post skb %p", skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock_bh(&sk->sk_receive_queue.lock); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0566e4606a4..f32bcb09491 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,7 +231,7 @@ override: if (ret != ACT_P_CREATED) return ret; - police->tcfp_t_c = ktime_to_ns(ktime_get()); + police->tcfp_t_c = ktime_get_ns(); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(hinfo); h = tcf_hash(police->tcf_index, POL_TAB_MASK); @@ -279,7 +279,7 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcfp_result; } - now = ktime_to_ns(ktime_get()); + now = ktime_get_ns(); toks = min_t(s64, now - police->tcfp_t_c, police->tcfp_burst); if (police->peak_present) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index ba32c2b005d..e12f997e1b4 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -416,7 +416,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_ns(); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; @@ -787,7 +787,7 @@ nla_put_failure: static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_ns(); struct tc_fq_qd_stats st = { .gc_flows = q->stat_gc_flows, .highprio_packets = q->stat_internal_packets, diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 063b726bf1f..cc56c8bb9be 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -77,7 +77,8 @@ static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src ^ keys.ip_proto, (__force u32)keys.ports, q->perturbation); - return ((u64)hash * q->flows_cnt) >> 32; + + return reciprocal_scale(hash, q->flows_cnt); } static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fc04fe93c2d..19696ebe9eb 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -63,15 +63,18 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ - txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else skb = NULL; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) + if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); + if (skb) + skb = validate_xmit_skb(skb, qdisc_dev(q)); + } } return skb; @@ -90,7 +93,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * detect it by checking xmit owner and drop the packet when * deadloop is detected. Return OK to try the next skb. */ - kfree_skb(skb); + kfree_skb_list(skb); net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); @@ -107,9 +110,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, } /* - * Transmit one skb, and handle the return status as required. Holding the - * __QDISC___STATE_RUNNING bit guarantees that only one CPU can execute this - * function. + * Transmit possibly several skbs, and handle the return status as + * required. Holding the __QDISC___STATE_RUNNING bit guarantees that + * only one CPU can execute this function. * * Returns to the caller: * 0 - queue is empty or throttled. @@ -126,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); @@ -183,10 +186,12 @@ static inline int qdisc_restart(struct Qdisc *q) skb = dequeue_skb(q); if (unlikely(!skb)) return 0; + WARN_ON_ONCE(skb_dst_is_noref(skb)); + root_lock = qdisc_lock(q); dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); return sch_direct_xmit(skb, q, dev, txq, root_lock); } @@ -616,7 +621,7 @@ void qdisc_reset(struct Qdisc *qdisc) ops->reset(qdisc); if (qdisc->gso_skb) { - kfree_skb(qdisc->gso_skb); + kfree_skb_list(qdisc->gso_skb); qdisc->gso_skb = NULL; qdisc->q.qlen = 0; } @@ -652,7 +657,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb(qdisc->gso_skb); + kfree_skb_list(qdisc->gso_skb); /* * gen_estimator est_timer() might access qdisc->q.lock, * wait a RCU grace period before freeing qdisc. diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9f949abcace..aea942ce600 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -895,7 +895,7 @@ ok: if (!sch->q.qlen) goto fin; - q->now = ktime_to_ns(ktime_get()); + q->now = ktime_get_ns(); start_at = jiffies; next_event = q->now + 5LLU * NSEC_PER_SEC; @@ -1225,7 +1225,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->un.leaf.q = new_q ? new_q : &noop_qdisc; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; - parent->t_c = ktime_to_ns(ktime_get()); + parent->t_c = ktime_get_ns(); parent->cmode = HTB_CAN_SEND; } @@ -1455,7 +1455,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->tokens = PSCHED_TICKS2NS(hopt->buffer); cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ - cl->t_c = ktime_to_ns(ktime_get()); + cl->t_c = ktime_get_ns(); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 18ff6343370..0c39b754083 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -239,7 +239,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); - now = ktime_to_ns(ktime_get()); + now = ktime_get_ns(); toks = min_t(s64, now - q->t_c, q->buffer); if (tbf_peak_present(q)) { @@ -292,7 +292,7 @@ static void tbf_reset(struct Qdisc *sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - q->t_c = ktime_to_ns(ktime_get()); + q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); @@ -431,7 +431,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - q->t_c = ktime_to_ns(ktime_get()); + q->t_c = ktime_get_ns(); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index bd33793b527..aaa8d03ed05 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -301,7 +301,6 @@ restart: do { struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - const struct net_device_ops *slave_ops = slave->netdev_ops; if (slave_txq->qdisc_sleeping != q) continue; @@ -317,8 +316,8 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { - txq_trans_update(slave_txq); + netdev_start_xmit(skb, slave, slave_txq, false) == + NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); diff --git a/net/sctp/input.c b/net/sctp/input.c index c1b99129451..b6493b3f11a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -133,9 +133,13 @@ int sctp_rcv(struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); if (skb->len < sizeof(struct sctphdr)) goto discard_it; - if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) && - sctp_rcv_checksum(net, skb) < 0) + + skb->csum_valid = 0; /* Previous value not applicable */ + if (skb_csum_unnecessary(skb)) + __skb_decr_checksum_unnecessary(skb); + else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0) goto discard_it; + skb->csum_valid = 1; skb_pull(skb, sizeof(struct sctphdr)); diff --git a/net/tipc/Makefile b/net/tipc/Makefile index a080c66d819..b8a13caad59 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_TIPC) := tipc.o tipc-y += addr.o bcast.o bearer.o config.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ - netlink.o node.o node_subscr.o port.o ref.o \ + netlink.o node.o node_subscr.o \ socket.o log.o eth_media.o server.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index dd13bfa0933..b2bbe69b255 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -37,7 +37,6 @@ #include "core.h" #include "link.h" -#include "port.h" #include "socket.h" #include "msg.h" #include "bcast.h" @@ -300,8 +299,8 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) tipc_link_push_queue(bcl); bclink_set_last_sent(); } - if (unlikely(released && !list_empty(&bcl->waiting_ports))) - tipc_link_wakeup_ports(bcl, 0); + if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks))) + bclink->node.action_flags |= TIPC_WAKEUP_USERS; exit: tipc_bclink_unlock(); } @@ -840,9 +839,10 @@ int tipc_bclink_init(void) sprintf(bcbearer->media.name, "tipc-broadcast"); spin_lock_init(&bclink->lock); - INIT_LIST_HEAD(&bcl->waiting_ports); + __skb_queue_head_init(&bcl->waiting_sks); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); + __skb_queue_head_init(&bclink->node.waiting_sks); bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); diff --git a/net/tipc/config.c b/net/tipc/config.c index 2b42403ad33..876f4c6a263 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -35,7 +35,7 @@ */ #include "core.h" -#include "port.h" +#include "socket.h" #include "name_table.h" #include "config.h" #include "server.h" @@ -266,7 +266,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area rep_tlv_buf = tipc_media_get_names(); break; case TIPC_CMD_SHOW_PORTS: - rep_tlv_buf = tipc_port_get_ports(); + rep_tlv_buf = tipc_sk_socks_show(); break; case TIPC_CMD_SHOW_STATS: rep_tlv_buf = tipc_show_stats(); diff --git a/net/tipc/core.c b/net/tipc/core.c index 676d18015dd..a5737b8407d 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -35,11 +35,10 @@ */ #include "core.h" -#include "ref.h" #include "name_table.h" #include "subscr.h" #include "config.h" -#include "port.h" +#include "socket.h" #include <linux/module.h> @@ -85,7 +84,7 @@ static void tipc_core_stop(void) tipc_netlink_stop(); tipc_subscr_stop(); tipc_nametbl_stop(); - tipc_ref_table_stop(); + tipc_sk_ref_table_stop(); tipc_socket_stop(); tipc_unregister_sysctl(); } @@ -99,7 +98,7 @@ static int tipc_core_start(void) get_random_bytes(&tipc_random, sizeof(tipc_random)); - err = tipc_ref_table_init(tipc_max_ports, tipc_random); + err = tipc_sk_ref_table_init(tipc_max_ports, tipc_random); if (err) goto out_reftbl; @@ -139,7 +138,7 @@ out_socket: out_netlink: tipc_nametbl_stop(); out_nametbl: - tipc_ref_table_stop(); + tipc_sk_ref_table_stop(); out_reftbl: return err; } diff --git a/net/tipc/core.h b/net/tipc/core.h index bb26ed1ee96..f773b148722 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -81,6 +81,7 @@ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; +extern int sysctl_tipc_named_timeout __read_mostly; /* * Other global variables @@ -187,8 +188,11 @@ static inline void k_term_timer(struct timer_list *timer) struct tipc_skb_cb { void *handle; - bool deferred; struct sk_buff *tail; + bool deferred; + bool wakeup_pending; + u16 chain_sz; + u16 chain_imp; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) diff --git a/net/tipc/link.c b/net/tipc/link.c index fb1485dc673..65410e18b8a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -36,7 +36,6 @@ #include "core.h" #include "link.h" -#include "port.h" #include "socket.h" #include "name_distr.h" #include "discover.h" @@ -275,7 +274,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, link_init_max_pkt(l_ptr); l_ptr->next_out_no = 1; - INIT_LIST_HEAD(&l_ptr->waiting_ports); + __skb_queue_head_init(&l_ptr->waiting_sks); link_reset_statistics(l_ptr); @@ -322,66 +321,47 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) } /** - * link_schedule_port - schedule port for deferred sending - * @l_ptr: pointer to link - * @origport: reference to sending port - * @sz: amount of data to be sent - * - * Schedules port for renewed sending of messages after link congestion - * has abated. + * link_schedule_user - schedule user for wakeup after congestion + * @link: congested link + * @oport: sending port + * @chain_sz: size of buffer chain that was attempted sent + * @imp: importance of message attempted sent + * Create pseudo msg to send back to user when congestion abates */ -static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) +static bool link_schedule_user(struct tipc_link *link, u32 oport, + uint chain_sz, uint imp) { - struct tipc_port *p_ptr; - struct tipc_sock *tsk; + struct sk_buff *buf; - spin_lock_bh(&tipc_port_list_lock); - p_ptr = tipc_port_lock(origport); - if (p_ptr) { - if (!list_empty(&p_ptr->wait_list)) - goto exit; - tsk = tipc_port_to_sock(p_ptr); - tsk->link_cong = 1; - p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); - list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); - l_ptr->stats.link_congs++; -exit: - tipc_port_unlock(p_ptr); - } - spin_unlock_bh(&tipc_port_list_lock); - return -ELINKCONG; + buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, tipc_own_addr, + tipc_own_addr, oport, 0, 0); + if (!buf) + return false; + TIPC_SKB_CB(buf)->chain_sz = chain_sz; + TIPC_SKB_CB(buf)->chain_imp = imp; + __skb_queue_tail(&link->waiting_sks, buf); + link->stats.link_congs++; + return true; } -void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) +/** + * link_prepare_wakeup - prepare users for wakeup after congestion + * @link: congested link + * Move a number of waiting users, as permitted by available space in + * the send queue, from link wait queue to node wait queue for wakeup + */ +static void link_prepare_wakeup(struct tipc_link *link) { - struct tipc_port *p_ptr; - struct tipc_sock *tsk; - struct tipc_port *temp_p_ptr; - int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; - - if (all) - win = 100000; - if (win <= 0) - return; - if (!spin_trylock_bh(&tipc_port_list_lock)) - return; - if (link_congested(l_ptr)) - goto exit; - list_for_each_entry_safe(p_ptr, temp_p_ptr, &l_ptr->waiting_ports, - wait_list) { - if (win <= 0) + struct sk_buff_head *wq = &link->waiting_sks; + struct sk_buff *buf; + uint pend_qsz = link->out_queue_size; + + for (buf = skb_peek(wq); buf; buf = skb_peek(wq)) { + if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(buf)->chain_imp]) break; - tsk = tipc_port_to_sock(p_ptr); - list_del_init(&p_ptr->wait_list); - spin_lock_bh(p_ptr->lock); - tsk->link_cong = 0; - tipc_sock_wakeup(tsk); - win -= p_ptr->waiting_pkts; - spin_unlock_bh(p_ptr->lock); + pend_qsz += TIPC_SKB_CB(buf)->chain_sz; + __skb_queue_tail(&link->owner->waiting_sks, __skb_dequeue(wq)); } - -exit: - spin_unlock_bh(&tipc_port_list_lock); } /** @@ -423,6 +403,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) u32 prev_state = l_ptr->state; u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); + struct tipc_node *owner = l_ptr->owner; msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); @@ -450,9 +431,10 @@ void tipc_link_reset(struct tipc_link *l_ptr) kfree_skb(l_ptr->proto_msg_queue); l_ptr->proto_msg_queue = NULL; kfree_skb_list(l_ptr->oldest_deferred_in); - if (!list_empty(&l_ptr->waiting_ports)) - tipc_link_wakeup_ports(l_ptr, 1); - + if (!skb_queue_empty(&l_ptr->waiting_sks)) { + skb_queue_splice_init(&l_ptr->waiting_sks, &owner->waiting_sks); + owner->action_flags |= TIPC_WAKEUP_USERS; + } l_ptr->retransm_queue_head = 0; l_ptr->retransm_queue_size = 0; l_ptr->last_out = NULL; @@ -688,19 +670,23 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - uint psz = msg_size(msg); uint imp = tipc_msg_tot_importance(msg); u32 oport = msg_tot_origport(msg); - if (likely(imp <= TIPC_CRITICAL_IMPORTANCE)) { - if (!msg_errcode(msg) && !msg_reroute_cnt(msg)) { - link_schedule_port(link, oport, psz); - return -ELINKCONG; - } - } else { + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); tipc_link_reset(link); + goto drop; } + if (unlikely(msg_errcode(msg))) + goto drop; + if (unlikely(msg_reroute_cnt(msg))) + goto drop; + if (TIPC_SKB_CB(buf)->wakeup_pending) + return -ELINKCONG; + if (link_schedule_user(link, oport, TIPC_SKB_CB(buf)->chain_sz, imp)) + return -ELINKCONG; +drop: kfree_skb_list(buf); return -EHOSTUNREACH; } @@ -1202,8 +1188,10 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->next_out)) tipc_link_push_queue(l_ptr); - if (unlikely(!list_empty(&l_ptr->waiting_ports))) - tipc_link_wakeup_ports(l_ptr, 0); + if (released && !skb_queue_empty(&l_ptr->waiting_sks)) { + link_prepare_wakeup(l_ptr); + l_ptr->owner->action_flags |= TIPC_WAKEUP_USERS; + } /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { diff --git a/net/tipc/link.h b/net/tipc/link.h index 782983ccd32..b567a3427fd 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -1,7 +1,7 @@ /* * net/tipc/link.h: Include file for TIPC link code * - * Copyright (c) 1995-2006, 2013, Ericsson AB + * Copyright (c) 1995-2006, 2013-2014, Ericsson AB * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -133,7 +133,7 @@ struct tipc_stats { * @retransm_queue_size: number of messages to retransmit * @retransm_queue_head: sequence number of first message to retransmit * @next_out: ptr to first unsent outbound message in queue - * @waiting_ports: linked list of ports waiting for link congestion to abate + * @waiting_sks: linked list of sockets waiting for link congestion to abate * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity @@ -194,7 +194,7 @@ struct tipc_link { u32 retransm_queue_size; u32 retransm_queue_head; struct sk_buff *next_out; - struct list_head waiting_ports; + struct sk_buff_head waiting_sks; /* Fragmentation/reassembly */ u32 long_msg_seq_no; @@ -235,7 +235,6 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, void tipc_link_push_queue(struct tipc_link *l_ptr); u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail, struct sk_buff *buf); -void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all); void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *start, u32 retransmits); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 9680be6d388..74745a47d72 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -56,8 +56,35 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, msg_set_size(m, hsize); msg_set_prevnode(m, tipc_own_addr); msg_set_type(m, type); - msg_set_orignode(m, tipc_own_addr); - msg_set_destnode(m, destnode); + if (hsize > SHORT_H_SIZE) { + msg_set_orignode(m, tipc_own_addr); + msg_set_destnode(m, destnode); + } +} + +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, + uint data_sz, u32 dnode, u32 onode, + u32 dport, u32 oport, int errcode) +{ + struct tipc_msg *msg; + struct sk_buff *buf; + + buf = tipc_buf_acquire(hdr_sz + data_sz); + if (unlikely(!buf)) + return NULL; + + msg = buf_msg(buf); + tipc_msg_init(msg, user, type, hdr_sz, dnode); + msg_set_size(msg, hdr_sz + data_sz); + msg_set_prevnode(msg, onode); + msg_set_origport(msg, oport); + msg_set_destport(msg, dport); + msg_set_errcode(msg, errcode); + if (hdr_sz > SHORT_H_SIZE) { + msg_set_orignode(msg, onode); + msg_set_destnode(msg, dnode); + } + return buf; } /* tipc_buf_append(): Append a buffer to the fragment list of another buffer @@ -155,7 +182,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, struct sk_buff *buf, *prev; char *pktpos; int rc; - + uint chain_sz = 0; msg_set_size(mhdr, msz); /* No fragmentation needed? */ @@ -166,6 +193,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, return -ENOMEM; skb_copy_to_linear_data(buf, mhdr, mhsz); pktpos = buf->data + mhsz; + TIPC_SKB_CB(buf)->chain_sz = 1; if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz)) return dsz; rc = -EFAULT; @@ -182,6 +210,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, *chain = buf = tipc_buf_acquire(pktmax); if (!buf) return -ENOMEM; + chain_sz = 1; pktpos = buf->data; skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); pktpos += INT_H_SIZE; @@ -215,6 +244,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, rc = -ENOMEM; goto error; } + chain_sz++; prev->next = buf; msg_set_type(&pkthdr, FRAGMENT); msg_set_size(&pkthdr, pktsz); @@ -224,7 +254,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct iovec const *iov, pktrem = pktsz - INT_H_SIZE; } while (1); - + TIPC_SKB_CB(*chain)->chain_sz = chain_sz; msg_set_type(buf_msg(buf), LAST_FRAGMENT); return dsz; error: diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 462fa194a6a..0ea7b695ac4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -442,6 +442,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 +#define SOCK_WAKEUP 14 /* pseudo user */ /* * Connection management protocol message types @@ -732,6 +733,10 @@ int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, + uint data_sz, u32 dnode, u32 onode, + u32 dport, u32 oport, int errcode); + int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index dcc15bcd569..780ef710a84 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -1,7 +1,7 @@ /* * net/tipc/name_distr.c: TIPC name distribution code * - * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2000-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -71,6 +71,21 @@ static struct publ_list *publ_lists[] = { }; +int sysctl_tipc_named_timeout __read_mostly = 2000; + +/** + * struct tipc_dist_queue - queue holding deferred name table updates + */ +static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue); + +struct distr_queue_item { + struct distr_item i; + u32 dtype; + u32 node; + unsigned long expires; + struct list_head next; +}; + /** * publ_to_item - add publication info to a publication message */ @@ -263,54 +278,104 @@ static void named_purge_publ(struct publication *publ) } /** + * tipc_update_nametbl - try to process a nametable update and notify + * subscribers + * + * tipc_nametbl_lock must be held. + * Returns the publication item if successful, otherwise NULL. + */ +struct publication *tipc_update_nametbl(struct distr_item *i, u32 node, + u32 dtype) +{ + struct publication *publ = NULL; + + if (dtype == PUBLICATION) { + publ = tipc_nametbl_insert_publ(ntohl(i->type), ntohl(i->lower), + ntohl(i->upper), + TIPC_CLUSTER_SCOPE, node, + ntohl(i->ref), ntohl(i->key)); + if (publ) { + tipc_nodesub_subscribe(&publ->subscr, node, publ, + (net_ev_handler) + named_purge_publ); + } + } else if (dtype == WITHDRAWAL) { + publ = tipc_nametbl_remove_publ(ntohl(i->type), ntohl(i->lower), + node, ntohl(i->ref), + ntohl(i->key)); + if (publ) { + tipc_nodesub_unsubscribe(&publ->subscr); + kfree(publ); + } + } else { + pr_warn("Unrecognized name table message received\n"); + } + return publ; +} + +/** + * tipc_named_add_backlog - add a failed name table update to the backlog + * + */ +static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) +{ + struct distr_queue_item *e; + unsigned long now = get_jiffies_64(); + + e = kzalloc(sizeof(*e), GFP_ATOMIC); + if (!e) + return; + e->dtype = type; + e->node = node; + e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout); + memcpy(e, i, sizeof(*i)); + list_add_tail(&e->next, &tipc_dist_queue); +} + +/** + * tipc_named_process_backlog - try to process any pending name table updates + * from the network. + */ +void tipc_named_process_backlog(void) +{ + struct distr_queue_item *e, *tmp; + char addr[16]; + unsigned long now = get_jiffies_64(); + + list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { + if (time_after(e->expires, now)) { + if (!tipc_update_nametbl(&e->i, e->node, e->dtype)) + continue; + } else { + tipc_addr_string_fill(addr, e->node); + pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n", + e->dtype, ntohl(e->i.type), + ntohl(e->i.lower), + ntohl(e->i.upper), + addr, ntohl(e->i.key)); + } + list_del(&e->next); + kfree(e); + } +} + +/** * tipc_named_rcv - process name table update message sent by another node */ void tipc_named_rcv(struct sk_buff *buf) { - struct publication *publ; struct tipc_msg *msg = buf_msg(buf); struct distr_item *item = (struct distr_item *)msg_data(msg); u32 count = msg_data_sz(msg) / ITEM_SIZE; + u32 node = msg_orignode(msg); write_lock_bh(&tipc_nametbl_lock); while (count--) { - if (msg_type(msg) == PUBLICATION) { - publ = tipc_nametbl_insert_publ(ntohl(item->type), - ntohl(item->lower), - ntohl(item->upper), - TIPC_CLUSTER_SCOPE, - msg_orignode(msg), - ntohl(item->ref), - ntohl(item->key)); - if (publ) { - tipc_nodesub_subscribe(&publ->subscr, - msg_orignode(msg), - publ, - (net_ev_handler) - named_purge_publ); - } - } else if (msg_type(msg) == WITHDRAWAL) { - publ = tipc_nametbl_remove_publ(ntohl(item->type), - ntohl(item->lower), - msg_orignode(msg), - ntohl(item->ref), - ntohl(item->key)); - - if (publ) { - tipc_nodesub_unsubscribe(&publ->subscr); - kfree(publ); - } else { - pr_err("Unable to remove publication by node 0x%x\n" - " (type=%u, lower=%u, ref=%u, key=%u)\n", - msg_orignode(msg), ntohl(item->type), - ntohl(item->lower), ntohl(item->ref), - ntohl(item->key)); - } - } else { - pr_warn("Unrecognized name table message received\n"); - } + if (!tipc_update_nametbl(item, node, msg_type(msg))) + tipc_named_add_backlog(item, msg_type(msg), node); item++; } + tipc_named_process_backlog(); write_unlock_bh(&tipc_nametbl_lock); kfree_skb(buf); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 8afe32b7fc9..b9e75feb343 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -73,5 +73,6 @@ void named_cluster_distribute(struct sk_buff *buf); void tipc_named_node_up(u32 dnode); void tipc_named_rcv(struct sk_buff *buf); void tipc_named_reinit(void); +void tipc_named_process_backlog(void); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9d7d37d9518..3a6a0a7c075 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -39,7 +39,6 @@ #include "name_table.h" #include "name_distr.h" #include "subscr.h" -#include "port.h" #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -262,8 +261,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, /* Lower end overlaps existing entry => need an exact match */ if ((sseq->lower != lower) || (sseq->upper != upper)) { - pr_warn("Cannot publish {%u,%u,%u}, overlap error\n", - type, lower, upper); return NULL; } @@ -285,8 +282,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, /* Fail if upper end overlaps into an existing entry */ if ((inspos < nseq->first_free) && (upper >= nseq->sseqs[inspos].lower)) { - pr_warn("Cannot publish {%u,%u,%u}, overlap error\n", - type, lower, upper); return NULL; } @@ -678,6 +673,8 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, if (likely(publ)) { table.local_publ_count++; buf = tipc_named_publish(publ); + /* Any pending external events? */ + tipc_named_process_backlog(); } write_unlock_bh(&tipc_nametbl_lock); @@ -699,6 +696,8 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) if (likely(publ)) { table.local_publ_count--; buf = tipc_named_withdraw(publ); + /* Any pending external events? */ + tipc_named_process_backlog(); write_unlock_bh(&tipc_nametbl_lock); list_del_init(&publ->pport_list); kfree(publ); diff --git a/net/tipc/net.c b/net/tipc/net.c index 7fcc94998fe..93b9944a6a8 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -38,7 +38,6 @@ #include "net.h" #include "name_distr.h" #include "subscr.h" -#include "port.h" #include "socket.h" #include "node.h" #include "config.h" @@ -111,7 +110,7 @@ int tipc_net_start(u32 addr) tipc_own_addr = addr; tipc_named_reinit(); - tipc_port_reinit(); + tipc_sk_reinit(); res = tipc_bclink_init(); if (res) return res; diff --git a/net/tipc/node.c b/net/tipc/node.c index f7069299943..17e6378c4df 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -38,6 +38,7 @@ #include "config.h" #include "node.h" #include "name_distr.h" +#include "socket.h" #define NODE_HTABLE_SIZE 512 @@ -50,6 +51,13 @@ static u32 tipc_num_nodes; static u32 tipc_num_links; static DEFINE_SPINLOCK(node_list_lock); +struct tipc_sock_conn { + u32 port; + u32 peer_port; + u32 peer_node; + struct list_head list; +}; + /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -100,6 +108,8 @@ struct tipc_node *tipc_node_create(u32 addr) INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->nsub); + INIT_LIST_HEAD(&n_ptr->conn_sks); + __skb_queue_head_init(&n_ptr->waiting_sks); hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); @@ -136,6 +146,71 @@ void tipc_node_stop(void) spin_unlock_bh(&node_list_lock); } +int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn; + + if (in_own_node(dnode)) + return 0; + + node = tipc_node_find(dnode); + if (!node) { + pr_warn("Connecting sock to node 0x%x failed\n", dnode); + return -EHOSTUNREACH; + } + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (!conn) + return -EHOSTUNREACH; + conn->peer_node = dnode; + conn->port = port; + conn->peer_port = peer_port; + + tipc_node_lock(node); + list_add_tail(&conn->list, &node->conn_sks); + tipc_node_unlock(node); + return 0; +} + +void tipc_node_remove_conn(u32 dnode, u32 port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn, *safe; + + if (in_own_node(dnode)) + return; + + node = tipc_node_find(dnode); + if (!node) + return; + + tipc_node_lock(node); + list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { + if (port != conn->port) + continue; + list_del(&conn->list); + kfree(conn); + } + tipc_node_unlock(node); +} + +void tipc_node_abort_sock_conns(struct list_head *conns) +{ + struct tipc_sock_conn *conn, *safe; + struct sk_buff *buf; + + list_for_each_entry_safe(conn, safe, conns, list) { + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tipc_own_addr, + conn->peer_node, conn->port, + conn->peer_port, TIPC_ERR_NO_NODE); + if (likely(buf)) + tipc_sk_rcv(buf); + list_del(&conn->list); + kfree(conn); + } +} + /** * tipc_node_link_up - handle addition of link * @@ -474,6 +549,8 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len) void tipc_node_unlock(struct tipc_node *node) { LIST_HEAD(nsub_list); + LIST_HEAD(conn_sks); + struct sk_buff_head waiting_sks; u32 addr = 0; if (likely(!node->action_flags)) { @@ -481,8 +558,14 @@ void tipc_node_unlock(struct tipc_node *node) return; } + __skb_queue_head_init(&waiting_sks); + if (node->action_flags & TIPC_WAKEUP_USERS) { + skb_queue_splice_init(&node->waiting_sks, &waiting_sks); + node->action_flags &= ~TIPC_WAKEUP_USERS; + } if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { list_replace_init(&node->nsub, &nsub_list); + list_replace_init(&node->conn_sks, &conn_sks); node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; } if (node->action_flags & TIPC_NOTIFY_NODE_UP) { @@ -491,8 +574,15 @@ void tipc_node_unlock(struct tipc_node *node) } spin_unlock_bh(&node->lock); + while (!skb_queue_empty(&waiting_sks)) + tipc_sk_rcv(__skb_dequeue(&waiting_sks)); + + if (!list_empty(&conn_sks)) + tipc_node_abort_sock_conns(&conn_sks); + if (!list_empty(&nsub_list)) tipc_nodesub_notify(&nsub_list); + if (addr) tipc_named_node_up(addr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index b61716a8218..522d6f3157b 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -58,7 +58,8 @@ enum { TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2), TIPC_NOTIFY_NODE_DOWN = (1 << 3), - TIPC_NOTIFY_NODE_UP = (1 << 4) + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_WAKEUP_USERS = (1 << 5) }; /** @@ -115,6 +116,8 @@ struct tipc_node { int working_links; u32 signature; struct list_head nsub; + struct sk_buff_head waiting_sks; + struct list_head conn_sks; struct rcu_head rcu; }; @@ -133,6 +136,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len); void tipc_node_unlock(struct tipc_node *node); +int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(u32 dnode, u32 port); static inline void tipc_node_lock(struct tipc_node *node) { diff --git a/net/tipc/port.c b/net/tipc/port.c deleted file mode 100644 index 7e096a5e770..00000000000 --- a/net/tipc/port.c +++ /dev/null @@ -1,514 +0,0 @@ -/* - * net/tipc/port.c: TIPC port code - * - * Copyright (c) 1992-2007, 2014, Ericsson AB - * Copyright (c) 2004-2008, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "config.h" -#include "port.h" -#include "name_table.h" -#include "socket.h" - -/* Connection management: */ -#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ - -#define MAX_REJECT_SIZE 1024 - -DEFINE_SPINLOCK(tipc_port_list_lock); - -static LIST_HEAD(ports); -static void port_handle_node_down(unsigned long ref); -static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err); -static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err); -static void port_timeout(unsigned long ref); - -/** - * tipc_port_peer_msg - verify message was sent by connected port's peer - * - * Handles cases where the node's network address has changed from - * the default of <0.0.0> to its configured setting. - */ -int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg) -{ - u32 peernode; - u32 orignode; - - if (msg_origport(msg) != tipc_port_peerport(p_ptr)) - return 0; - - orignode = msg_orignode(msg); - peernode = tipc_port_peernode(p_ptr); - return (orignode == peernode) || - (!orignode && (peernode == tipc_own_addr)) || - (!peernode && (orignode == tipc_own_addr)); -} - -/* tipc_port_init - intiate TIPC port and lock it - * - * Returns obtained reference if initialization is successful, zero otherwise - */ -u32 tipc_port_init(struct tipc_port *p_ptr, - const unsigned int importance) -{ - struct tipc_msg *msg; - u32 ref; - - ref = tipc_ref_acquire(p_ptr, &p_ptr->lock); - if (!ref) { - pr_warn("Port registration failed, ref. table exhausted\n"); - return 0; - } - - p_ptr->max_pkt = MAX_PKT_DEFAULT; - p_ptr->ref = ref; - INIT_LIST_HEAD(&p_ptr->wait_list); - INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); - k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); - INIT_LIST_HEAD(&p_ptr->publications); - INIT_LIST_HEAD(&p_ptr->port_list); - - /* - * Must hold port list lock while initializing message header template - * to ensure a change to node's own network address doesn't result - * in template containing out-dated network address information - */ - spin_lock_bh(&tipc_port_list_lock); - msg = &p_ptr->phdr; - tipc_msg_init(msg, importance, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); - msg_set_origport(msg, ref); - list_add_tail(&p_ptr->port_list, &ports); - spin_unlock_bh(&tipc_port_list_lock); - return ref; -} - -void tipc_port_destroy(struct tipc_port *p_ptr) -{ - struct sk_buff *buf = NULL; - struct tipc_msg *msg = NULL; - u32 peer; - - tipc_withdraw(p_ptr, 0, NULL); - - spin_lock_bh(p_ptr->lock); - tipc_ref_discard(p_ptr->ref); - spin_unlock_bh(p_ptr->lock); - - k_cancel_timer(&p_ptr->timer); - if (p_ptr->connected) { - buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); - tipc_nodesub_unsubscribe(&p_ptr->subscription); - msg = buf_msg(buf); - peer = msg_destnode(msg); - tipc_link_xmit(buf, peer, msg_link_selector(msg)); - } - spin_lock_bh(&tipc_port_list_lock); - list_del(&p_ptr->port_list); - list_del(&p_ptr->wait_list); - spin_unlock_bh(&tipc_port_list_lock); - k_term_timer(&p_ptr->timer); -} - -/* - * port_build_proto_msg(): create connection protocol message for port - * - * On entry the port must be locked and connected. - */ -static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, - u32 type, u32 ack) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - - buf = tipc_buf_acquire(INT_H_SIZE); - if (buf) { - msg = buf_msg(buf); - tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE, - tipc_port_peernode(p_ptr)); - msg_set_destport(msg, tipc_port_peerport(p_ptr)); - msg_set_origport(msg, p_ptr->ref); - msg_set_msgcnt(msg, ack); - buf->next = NULL; - } - return buf; -} - -static void port_timeout(unsigned long ref) -{ - struct tipc_port *p_ptr = tipc_port_lock(ref); - struct sk_buff *buf = NULL; - struct tipc_msg *msg = NULL; - - if (!p_ptr) - return; - - if (!p_ptr->connected) { - tipc_port_unlock(p_ptr); - return; - } - - /* Last probe answered ? */ - if (p_ptr->probing_state == TIPC_CONN_PROBING) { - buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); - } else { - buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); - p_ptr->probing_state = TIPC_CONN_PROBING; - k_start_timer(&p_ptr->timer, p_ptr->probing_interval); - } - tipc_port_unlock(p_ptr); - msg = buf_msg(buf); - tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); -} - - -static void port_handle_node_down(unsigned long ref) -{ - struct tipc_port *p_ptr = tipc_port_lock(ref); - struct sk_buff *buf = NULL; - struct tipc_msg *msg = NULL; - - if (!p_ptr) - return; - buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); - tipc_port_unlock(p_ptr); - msg = buf_msg(buf); - tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); -} - - -static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err) -{ - struct sk_buff *buf = port_build_peer_abort_msg(p_ptr, err); - - if (buf) { - struct tipc_msg *msg = buf_msg(buf); - msg_swap_words(msg, 4, 5); - msg_swap_words(msg, 6, 7); - buf->next = NULL; - } - return buf; -} - - -static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err) -{ - struct sk_buff *buf; - struct tipc_msg *msg; - u32 imp; - - if (!p_ptr->connected) - return NULL; - - buf = tipc_buf_acquire(BASIC_H_SIZE); - if (buf) { - msg = buf_msg(buf); - memcpy(msg, &p_ptr->phdr, BASIC_H_SIZE); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - msg_set_size(msg, BASIC_H_SIZE); - imp = msg_importance(msg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, ++imp); - msg_set_errcode(msg, err); - buf->next = NULL; - } - return buf; -} - -static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) -{ - struct publication *publ; - int ret; - - if (full_id) - ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", - tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), p_ptr->ref); - else - ret = tipc_snprintf(buf, len, "%-10u:", p_ptr->ref); - - if (p_ptr->connected) { - u32 dport = tipc_port_peerport(p_ptr); - u32 destnode = tipc_port_peernode(p_ptr); - - ret += tipc_snprintf(buf + ret, len - ret, - " connected to <%u.%u.%u:%u>", - tipc_zone(destnode), - tipc_cluster(destnode), - tipc_node(destnode), dport); - if (p_ptr->conn_type != 0) - ret += tipc_snprintf(buf + ret, len - ret, - " via {%u,%u}", p_ptr->conn_type, - p_ptr->conn_instance); - } else if (p_ptr->published) { - ret += tipc_snprintf(buf + ret, len - ret, " bound to"); - list_for_each_entry(publ, &p_ptr->publications, pport_list) { - if (publ->lower == publ->upper) - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u}", publ->type, - publ->lower); - else - ret += tipc_snprintf(buf + ret, len - ret, - " {%u,%u,%u}", publ->type, - publ->lower, publ->upper); - } - } - ret += tipc_snprintf(buf + ret, len - ret, "\n"); - return ret; -} - -struct sk_buff *tipc_port_get_ports(void) -{ - struct sk_buff *buf; - struct tlv_desc *rep_tlv; - char *pb; - int pb_len; - struct tipc_port *p_ptr; - int str_len = 0; - - buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); - if (!buf) - return NULL; - rep_tlv = (struct tlv_desc *)buf->data; - pb = TLV_DATA(rep_tlv); - pb_len = ULTRA_STRING_MAX_LEN; - - spin_lock_bh(&tipc_port_list_lock); - list_for_each_entry(p_ptr, &ports, port_list) { - spin_lock_bh(p_ptr->lock); - str_len += port_print(p_ptr, pb, pb_len, 0); - spin_unlock_bh(p_ptr->lock); - } - spin_unlock_bh(&tipc_port_list_lock); - str_len += 1; /* for "\0" */ - skb_put(buf, TLV_SPACE(str_len)); - TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); - - return buf; -} - -void tipc_port_reinit(void) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - - spin_lock_bh(&tipc_port_list_lock); - list_for_each_entry(p_ptr, &ports, port_list) { - msg = &p_ptr->phdr; - msg_set_prevnode(msg, tipc_own_addr); - msg_set_orignode(msg, tipc_own_addr); - } - spin_unlock_bh(&tipc_port_list_lock); -} - -void tipc_acknowledge(u32 ref, u32 ack) -{ - struct tipc_port *p_ptr; - struct sk_buff *buf = NULL; - struct tipc_msg *msg; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return; - if (p_ptr->connected) - buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); - - tipc_port_unlock(p_ptr); - if (!buf) - return; - msg = buf_msg(buf); - tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); -} - -int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, - struct tipc_name_seq const *seq) -{ - struct publication *publ; - u32 key; - - if (p_ptr->connected) - return -EINVAL; - key = p_ptr->ref + p_ptr->pub_count + 1; - if (key == p_ptr->ref) - return -EADDRINUSE; - - publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, p_ptr->ref, key); - if (publ) { - list_add(&publ->pport_list, &p_ptr->publications); - p_ptr->pub_count++; - p_ptr->published = 1; - return 0; - } - return -EINVAL; -} - -int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, - struct tipc_name_seq const *seq) -{ - struct publication *publ; - struct publication *tpubl; - int res = -EINVAL; - - if (!seq) { - list_for_each_entry_safe(publ, tpubl, - &p_ptr->publications, pport_list) { - tipc_nametbl_withdraw(publ->type, publ->lower, - publ->ref, publ->key); - } - res = 0; - } else { - list_for_each_entry_safe(publ, tpubl, - &p_ptr->publications, pport_list) { - if (publ->scope != scope) - continue; - if (publ->type != seq->type) - continue; - if (publ->lower != seq->lower) - continue; - if (publ->upper != seq->upper) - break; - tipc_nametbl_withdraw(publ->type, publ->lower, - publ->ref, publ->key); - res = 0; - break; - } - } - if (list_empty(&p_ptr->publications)) - p_ptr->published = 0; - return res; -} - -int tipc_port_connect(u32 ref, struct tipc_portid const *peer) -{ - struct tipc_port *p_ptr; - int res; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - res = __tipc_port_connect(ref, p_ptr, peer); - tipc_port_unlock(p_ptr); - return res; -} - -/* - * __tipc_port_connect - connect to a remote peer - * - * Port must be locked. - */ -int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, - struct tipc_portid const *peer) -{ - struct tipc_msg *msg; - int res = -EINVAL; - - if (p_ptr->published || p_ptr->connected) - goto exit; - if (!peer->ref) - goto exit; - - msg = &p_ptr->phdr; - msg_set_destnode(msg, peer->node); - msg_set_destport(msg, peer->ref); - msg_set_type(msg, TIPC_CONN_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_hdr_sz(msg, SHORT_H_SIZE); - - p_ptr->probing_interval = PROBING_INTERVAL; - p_ptr->probing_state = TIPC_CONN_OK; - p_ptr->connected = 1; - k_start_timer(&p_ptr->timer, p_ptr->probing_interval); - - tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, - (void *)(unsigned long)ref, - (net_ev_handler)port_handle_node_down); - res = 0; -exit: - p_ptr->max_pkt = tipc_node_get_mtu(peer->node, ref); - return res; -} - -/* - * __tipc_disconnect - disconnect port from peer - * - * Port must be locked. - */ -int __tipc_port_disconnect(struct tipc_port *tp_ptr) -{ - if (tp_ptr->connected) { - tp_ptr->connected = 0; - /* let timer expire on it's own to avoid deadlock! */ - tipc_nodesub_unsubscribe(&tp_ptr->subscription); - return 0; - } - - return -ENOTCONN; -} - -/* - * tipc_port_disconnect(): Disconnect port form peer. - * This is a node local operation. - */ -int tipc_port_disconnect(u32 ref) -{ - struct tipc_port *p_ptr; - int res; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - res = __tipc_port_disconnect(p_ptr); - tipc_port_unlock(p_ptr); - return res; -} - -/* - * tipc_port_shutdown(): Send a SHUTDOWN msg to peer and disconnect - */ -int tipc_port_shutdown(u32 ref) -{ - struct tipc_msg *msg; - struct tipc_port *p_ptr; - struct sk_buff *buf = NULL; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - - buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); - tipc_port_unlock(p_ptr); - msg = buf_msg(buf); - tipc_link_xmit(buf, msg_destnode(msg), msg_link_selector(msg)); - return tipc_port_disconnect(ref); -} diff --git a/net/tipc/port.h b/net/tipc/port.h deleted file mode 100644 index 3087da39ee4..00000000000 --- a/net/tipc/port.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * net/tipc/port.h: Include file for TIPC port code - * - * Copyright (c) 1994-2007, 2014, Ericsson AB - * Copyright (c) 2004-2007, 2010-2013, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_PORT_H -#define _TIPC_PORT_H - -#include "ref.h" -#include "net.h" -#include "msg.h" -#include "node_subscr.h" - -#define TIPC_CONNACK_INTV 256 -#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) -#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) - -/** - * struct tipc_port - TIPC port structure - * @lock: pointer to spinlock for controlling access to port - * @connected: non-zero if port is currently connected to a peer port - * @conn_type: TIPC type used when connection was established - * @conn_instance: TIPC instance used when connection was established - * @published: non-zero if port has one or more associated names - * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry - * @phdr: preformatted message header used when sending messages - * @port_list: adjacent ports in TIPC's global list of ports - * @wait_list: adjacent ports in list of ports waiting on link congestion - * @waiting_pkts: - * @publications: list of publications for port - * @pub_count: total # of publications port has made during its lifetime - * @probing_state: - * @probing_interval: - * @timer_ref: - * @subscription: "node down" subscription used to terminate failed connections - */ -struct tipc_port { - spinlock_t *lock; - int connected; - u32 conn_type; - u32 conn_instance; - int published; - u32 max_pkt; - u32 ref; - struct tipc_msg phdr; - struct list_head port_list; - struct list_head wait_list; - u32 waiting_pkts; - struct list_head publications; - u32 pub_count; - u32 probing_state; - u32 probing_interval; - struct timer_list timer; - struct tipc_node_subscr subscription; -}; - -extern spinlock_t tipc_port_list_lock; -struct tipc_port_list; - -/* - * TIPC port manipulation routines - */ -u32 tipc_port_init(struct tipc_port *p_ptr, - const unsigned int importance); - -void tipc_acknowledge(u32 port_ref, u32 ack); - -void tipc_port_destroy(struct tipc_port *p_ptr); - -int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, - struct tipc_name_seq const *name_seq); - -int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, - struct tipc_name_seq const *name_seq); - -int tipc_port_connect(u32 portref, struct tipc_portid const *port); - -int tipc_port_disconnect(u32 portref); - -int tipc_port_shutdown(u32 ref); - -/* - * The following routines require that the port be locked on entry - */ -int __tipc_port_disconnect(struct tipc_port *tp_ptr); -int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, - struct tipc_portid const *peer); -int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); - -struct sk_buff *tipc_port_get_ports(void); -void tipc_port_reinit(void); - -/** - * tipc_port_lock - lock port instance referred to and return its pointer - */ -static inline struct tipc_port *tipc_port_lock(u32 ref) -{ - return (struct tipc_port *)tipc_ref_lock(ref); -} - -/** - * tipc_port_unlock - unlock a port instance - * - * Can use pointer instead of tipc_ref_unlock() since port is already locked. - */ -static inline void tipc_port_unlock(struct tipc_port *p_ptr) -{ - spin_unlock_bh(p_ptr->lock); -} - -static inline u32 tipc_port_peernode(struct tipc_port *p_ptr) -{ - return msg_destnode(&p_ptr->phdr); -} - -static inline u32 tipc_port_peerport(struct tipc_port *p_ptr) -{ - return msg_destport(&p_ptr->phdr); -} - -static inline bool tipc_port_unreliable(struct tipc_port *port) -{ - return msg_src_droppable(&port->phdr) != 0; -} - -static inline void tipc_port_set_unreliable(struct tipc_port *port, - bool unreliable) -{ - msg_set_src_droppable(&port->phdr, unreliable ? 1 : 0); -} - -static inline bool tipc_port_unreturnable(struct tipc_port *port) -{ - return msg_dest_droppable(&port->phdr) != 0; -} - -static inline void tipc_port_set_unreturnable(struct tipc_port *port, - bool unreturnable) -{ - msg_set_dest_droppable(&port->phdr, unreturnable ? 1 : 0); -} - - -static inline int tipc_port_importance(struct tipc_port *port) -{ - return msg_importance(&port->phdr); -} - -static inline int tipc_port_set_importance(struct tipc_port *port, int imp) -{ - if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&port->phdr, (u32)imp); - return 0; -} - -#endif diff --git a/net/tipc/ref.c b/net/tipc/ref.c deleted file mode 100644 index 3d4ecd754ee..00000000000 --- a/net/tipc/ref.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * net/tipc/ref.c: TIPC object registry code - * - * Copyright (c) 1991-2006, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "core.h" -#include "ref.h" - -/** - * struct reference - TIPC object reference entry - * @object: pointer to object associated with reference entry - * @lock: spinlock controlling access to object - * @ref: reference value for object (combines instance & array index info) - */ -struct reference { - void *object; - spinlock_t lock; - u32 ref; -}; - -/** - * struct tipc_ref_table - table of TIPC object reference entries - * @entries: pointer to array of reference entries - * @capacity: array index of first unusable entry - * @init_point: array index of first uninitialized entry - * @first_free: array index of first unused object reference entry - * @last_free: array index of last unused object reference entry - * @index_mask: bitmask for array index portion of reference values - * @start_mask: initial value for instance value portion of reference values - */ -struct ref_table { - struct reference *entries; - u32 capacity; - u32 init_point; - u32 first_free; - u32 last_free; - u32 index_mask; - u32 start_mask; -}; - -/* - * Object reference table consists of 2**N entries. - * - * State Object ptr Reference - * ----- ---------- --------- - * In use non-NULL XXXX|own index - * (XXXX changes each time entry is acquired) - * Free NULL YYYY|next free index - * (YYYY is one more than last used XXXX) - * Uninitialized NULL 0 - * - * Entry 0 is not used; this allows index 0 to denote the end of the free list. - * - * Note that a reference value of 0 does not necessarily indicate that an - * entry is uninitialized, since the last entry in the free list could also - * have a reference value of 0 (although this is unlikely). - */ - -static struct ref_table tipc_ref_table; - -static DEFINE_SPINLOCK(ref_table_lock); - -/** - * tipc_ref_table_init - create reference table for objects - */ -int tipc_ref_table_init(u32 requested_size, u32 start) -{ - struct reference *table; - u32 actual_size; - - /* account for unused entry, then round up size to a power of 2 */ - - requested_size++; - for (actual_size = 16; actual_size < requested_size; actual_size <<= 1) - /* do nothing */ ; - - /* allocate table & mark all entries as uninitialized */ - table = vzalloc(actual_size * sizeof(struct reference)); - if (table == NULL) - return -ENOMEM; - - tipc_ref_table.entries = table; - tipc_ref_table.capacity = requested_size; - tipc_ref_table.init_point = 1; - tipc_ref_table.first_free = 0; - tipc_ref_table.last_free = 0; - tipc_ref_table.index_mask = actual_size - 1; - tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; - - return 0; -} - -/** - * tipc_ref_table_stop - destroy reference table for objects - */ -void tipc_ref_table_stop(void) -{ - vfree(tipc_ref_table.entries); - tipc_ref_table.entries = NULL; -} - -/** - * tipc_ref_acquire - create reference to an object - * - * Register an object pointer in reference table and lock the object. - * Returns a unique reference value that is used from then on to retrieve the - * object pointer, or to determine that the object has been deregistered. - * - * Note: The object is returned in the locked state so that the caller can - * register a partially initialized object, without running the risk that - * the object will be accessed before initialization is complete. - */ -u32 tipc_ref_acquire(void *object, spinlock_t **lock) -{ - u32 index; - u32 index_mask; - u32 next_plus_upper; - u32 ref; - struct reference *entry = NULL; - - if (!object) { - pr_err("Attempt to acquire ref. to non-existent obj\n"); - return 0; - } - if (!tipc_ref_table.entries) { - pr_err("Ref. table not found in acquisition attempt\n"); - return 0; - } - - /* take a free entry, if available; otherwise initialize a new entry */ - spin_lock_bh(&ref_table_lock); - if (tipc_ref_table.first_free) { - index = tipc_ref_table.first_free; - entry = &(tipc_ref_table.entries[index]); - index_mask = tipc_ref_table.index_mask; - next_plus_upper = entry->ref; - tipc_ref_table.first_free = next_plus_upper & index_mask; - ref = (next_plus_upper & ~index_mask) + index; - } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { - index = tipc_ref_table.init_point++; - entry = &(tipc_ref_table.entries[index]); - spin_lock_init(&entry->lock); - ref = tipc_ref_table.start_mask + index; - } else { - ref = 0; - } - spin_unlock_bh(&ref_table_lock); - - /* - * Grab the lock so no one else can modify this entry - * While we assign its ref value & object pointer - */ - if (entry) { - spin_lock_bh(&entry->lock); - entry->ref = ref; - entry->object = object; - *lock = &entry->lock; - /* - * keep it locked, the caller is responsible - * for unlocking this when they're done with it - */ - } - - return ref; -} - -/** - * tipc_ref_discard - invalidate references to an object - * - * Disallow future references to an object and free up the entry for re-use. - * Note: The entry's spin_lock may still be busy after discard - */ -void tipc_ref_discard(u32 ref) -{ - struct reference *entry; - u32 index; - u32 index_mask; - - if (!tipc_ref_table.entries) { - pr_err("Ref. table not found during discard attempt\n"); - return; - } - - index_mask = tipc_ref_table.index_mask; - index = ref & index_mask; - entry = &(tipc_ref_table.entries[index]); - - spin_lock_bh(&ref_table_lock); - - if (!entry->object) { - pr_err("Attempt to discard ref. to non-existent obj\n"); - goto exit; - } - if (entry->ref != ref) { - pr_err("Attempt to discard non-existent reference\n"); - goto exit; - } - - /* - * mark entry as unused; increment instance part of entry's reference - * to invalidate any subsequent references - */ - entry->object = NULL; - entry->ref = (ref & ~index_mask) + (index_mask + 1); - - /* append entry to free entry list */ - if (tipc_ref_table.first_free == 0) - tipc_ref_table.first_free = index; - else - tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; - tipc_ref_table.last_free = index; - -exit: - spin_unlock_bh(&ref_table_lock); -} - -/** - * tipc_ref_lock - lock referenced object and return pointer to it - */ -void *tipc_ref_lock(u32 ref) -{ - if (likely(tipc_ref_table.entries)) { - struct reference *entry; - - entry = &tipc_ref_table.entries[ref & - tipc_ref_table.index_mask]; - if (likely(entry->ref != 0)) { - spin_lock_bh(&entry->lock); - if (likely((entry->ref == ref) && (entry->object))) - return entry->object; - spin_unlock_bh(&entry->lock); - } - } - return NULL; -} diff --git a/net/tipc/ref.h b/net/tipc/ref.h deleted file mode 100644 index d01aa1df63b..00000000000 --- a/net/tipc/ref.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * net/tipc/ref.h: Include file for TIPC object registry code - * - * Copyright (c) 1991-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _TIPC_REF_H -#define _TIPC_REF_H - -int tipc_ref_table_init(u32 requested_size, u32 start); -void tipc_ref_table_stop(void); - -u32 tipc_ref_acquire(void *object, spinlock_t **lock); -void tipc_ref_discard(u32 ref); - -void *tipc_ref_lock(u32 ref); - -#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ff8c8118d56..75275c5cf92 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,17 +35,67 @@ */ #include "core.h" -#include "port.h" #include "name_table.h" #include "node.h" #include "link.h" #include <linux/export.h> +#include "config.h" +#include "socket.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define TIPC_FWD_MSG 1 +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 + +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @published: non-zero if port has one or more associated names + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + * @port_list: adjacent ports in TIPC's global list of ports + * @publications: list of publications for port + * @pub_count: total # of publications port has made during its lifetime + * @probing_state: + * @probing_interval: + * @timer: + * @port: port - interacts with 'sk' and with the rest of the TIPC stack + * @peer_name: the peer of the connection, if any + * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer + */ +struct tipc_sock { + struct sock sk; + int connected; + u32 conn_type; + u32 conn_instance; + int published; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; + struct list_head sock_list; + struct list_head publications; + u32 pub_count; + u32 probing_state; + u32 probing_interval; + struct timer_list timer; + uint conn_timeout; + atomic_t dupl_rcvcnt; + bool link_cong; + uint sent_unacked; + uint rcv_unacked; +}; static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); @@ -53,6 +103,16 @@ static void tipc_write_space(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); +static void tipc_sk_timeout(unsigned long ref); +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk); +static void tipc_sk_ref_discard(u32 ref); +static struct tipc_sock *tipc_sk_get(u32 ref); +static struct tipc_sock *tipc_sk_get_next(u32 *ref); +static void tipc_sk_put(struct tipc_sock *tsk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -106,24 +166,75 @@ static struct proto tipc_proto_kern; * - port reference */ -#include "socket.h" +static u32 tsk_peer_node(struct tipc_sock *tsk) +{ + return msg_destnode(&tsk->phdr); +} + +static u32 tsk_peer_port(struct tipc_sock *tsk) +{ + return msg_destport(&tsk->phdr); +} + +static bool tsk_unreliable(struct tipc_sock *tsk) +{ + return msg_src_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) +{ + msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); +} + +static bool tsk_unreturnable(struct tipc_sock *tsk) +{ + return msg_dest_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) +{ + msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); +} + +static int tsk_importance(struct tipc_sock *tsk) +{ + return msg_importance(&tsk->phdr); +} + +static int tsk_set_importance(struct tipc_sock *tsk, int imp) +{ + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tsk->phdr, (u32)imp); + return 0; +} + +static struct tipc_sock *tipc_sk(const struct sock *sk) +{ + return container_of(sk, struct tipc_sock, sk); +} + +static int tsk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} /** - * advance_rx_queue - discard first buffer in socket receive queue + * tsk_advance_rx_queue - discard first buffer in socket receive queue * * Caller must hold socket lock */ -static void advance_rx_queue(struct sock *sk) +static void tsk_advance_rx_queue(struct sock *sk) { kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); } /** - * reject_rx_queue - reject all buffers in socket receive queue + * tsk_rej_rx_queue - reject all buffers in socket receive queue * * Caller must hold socket lock */ -static void reject_rx_queue(struct sock *sk) +static void tsk_rej_rx_queue(struct sock *sk) { struct sk_buff *buf; u32 dnode; @@ -134,6 +245,38 @@ static void reject_rx_queue(struct sock *sk) } } +/* tsk_peer_msg - verify if message was sent by connected port's peer + * + * Handles cases where the node's network address has changed from + * the default of <0.0.0> to its configured setting. + */ +static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) +{ + u32 peer_port = tsk_peer_port(tsk); + u32 orig_node; + u32 peer_node; + + if (unlikely(!tsk->connected)) + return false; + + if (unlikely(msg_origport(msg) != peer_port)) + return false; + + orig_node = msg_orignode(msg); + peer_node = tsk_peer_node(tsk); + + if (likely(orig_node == peer_node)) + return true; + + if (!orig_node && (peer_node == tipc_own_addr)) + return true; + + if (!peer_node && (orig_node == tipc_own_addr)) + return true; + + return false; +} + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -153,7 +296,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, socket_state state; struct sock *sk; struct tipc_sock *tsk; - struct tipc_port *port; + struct tipc_msg *msg; u32 ref; /* Validate arguments */ @@ -188,20 +331,24 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return -ENOMEM; tsk = tipc_sk(sk); - port = &tsk->port; - - ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE); + ref = tipc_sk_ref_acquire(tsk); if (!ref) { - pr_warn("Socket registration failed, ref. table exhausted\n"); - sk_free(sk); + pr_warn("Socket create failed; reference table exhausted\n"); return -ENOMEM; } + tsk->max_pkt = MAX_PKT_DEFAULT; + tsk->ref = ref; + INIT_LIST_HEAD(&tsk->publications); + msg = &tsk->phdr; + tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); + msg_set_origport(msg, ref); /* Finish initializing socket data structures */ sock->ops = ops; sock->state = state; - sock_init_data(sock, sk); + k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref); sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -209,12 +356,11 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); - tipc_port_unlock(port); if (sock->state == SS_READY) { - tipc_port_set_unreturnable(port, true); + tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) - tipc_port_set_unreliable(port, true); + tsk_set_unreliable(tsk, true); } return 0; } @@ -308,7 +454,6 @@ static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; struct tipc_sock *tsk; - struct tipc_port *port; struct sk_buff *buf; u32 dnode; @@ -320,13 +465,13 @@ static int tipc_release(struct socket *sock) return 0; tsk = tipc_sk(sk); - port = &tsk->port; lock_sock(sk); /* * Reject all unreceived messages, except on an active connection * (which disconnects locally & sends a 'FIN+' to peer) */ + dnode = tsk_peer_node(tsk); while (sock->state != SS_DISCONNECTING) { buf = __skb_dequeue(&sk->sk_receive_queue); if (buf == NULL) @@ -337,17 +482,27 @@ static int tipc_release(struct socket *sock) if ((sock->state == SS_CONNECTING) || (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; - tipc_port_disconnect(port->ref); + tsk->connected = 0; + tipc_node_remove_conn(dnode, tsk->ref); } if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) tipc_link_xmit(buf, dnode, 0); } } - /* Destroy TIPC port; also disconnects an active connection and - * sends a 'FIN-' to peer. - */ - tipc_port_destroy(port); + tipc_sk_withdraw(tsk, 0, NULL); + tipc_sk_ref_discard(tsk->ref); + k_cancel_timer(&tsk->timer); + if (tsk->connected) { + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, dnode, tipc_own_addr, + tsk_peer_port(tsk), + tsk->ref, TIPC_ERR_NO_PORT); + if (buf) + tipc_link_xmit(buf, dnode, tsk->ref); + tipc_node_remove_conn(dnode, tsk->ref); + } + k_term_timer(&tsk->timer); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -355,7 +510,6 @@ static int tipc_release(struct socket *sock) /* Reject any messages that accumulated in backlog queue */ sock->state = SS_DISCONNECTING; release_sock(sk); - sock_put(sk); sock->sk = NULL; @@ -387,7 +541,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, lock_sock(sk); if (unlikely(!uaddr_len)) { - res = tipc_withdraw(&tsk->port, 0, NULL); + res = tipc_sk_withdraw(tsk, 0, NULL); goto exit; } @@ -415,8 +569,8 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, } res = (addr->scope > 0) ? - tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq); + tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : + tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); exit: release_sock(sk); return res; @@ -446,10 +600,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, if ((sock->state != SS_CONNECTED) && ((peer != 2) || (sock->state != SS_DISCONNECTING))) return -ENOTCONN; - addr->addr.id.ref = tipc_port_peerport(&tsk->port); - addr->addr.id.node = tipc_port_peernode(&tsk->port); + addr->addr.id.ref = tsk_peer_port(tsk); + addr->addr.id.node = tsk_peer_node(tsk); } else { - addr->addr.id.ref = tsk->port.ref; + addr->addr.id.ref = tsk->ref; addr->addr.id.node = tipc_own_addr; } @@ -518,7 +672,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, break; case SS_READY: case SS_CONNECTED: - if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) + if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -549,7 +703,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct iovec *iov, size_t dsz, long timeo) { struct sock *sk = sock->sk; - struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr; + struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; struct sk_buff *buf; uint mtu; int rc; @@ -579,6 +733,7 @@ new_mtu: goto new_mtu; if (rc != -ELINKCONG) break; + tipc_sk(sk)->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) kfree_skb_list(buf); @@ -638,20 +793,19 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *port = &tsk->port; int conn_cong; /* Ignore if connection cannot be validated: */ - if (!port->connected || !tipc_port_peer_msg(port, msg)) + if (!tsk_peer_msg(tsk, msg)) goto exit; - port->probing_state = TIPC_CONN_OK; + tsk->probing_state = TIPC_CONN_OK; if (msg_type(msg) == CONN_ACK) { - conn_cong = tipc_sk_conn_cong(tsk); + conn_cong = tsk_conn_cong(tsk); tsk->sent_unacked -= msg_msgcnt(msg); if (conn_cong) - tipc_sock_wakeup(tsk); + tsk->sk.sk_write_space(&tsk->sk); } else if (msg_type(msg) == CONN_PROBE) { if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) return TIPC_OK; @@ -742,8 +896,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; - struct tipc_msg *mhdr = &port->phdr; + struct tipc_msg *mhdr = &tsk->phdr; struct iovec *iov = m->msg_iov; u32 dnode, dport; struct sk_buff *buf; @@ -774,13 +927,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, rc = -EISCONN; goto exit; } - if (tsk->port.published) { + if (tsk->published) { rc = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { - tsk->port.conn_type = dest->addr.name.name.type; - tsk->port.conn_instance = dest->addr.name.name.instance; + tsk->conn_type = dest->addr.name.name.type; + tsk->conn_instance = dest->addr.name.name.instance; } } rc = dest_name_check(dest, m); @@ -820,13 +973,14 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, } new_mtu: - mtu = tipc_node_get_mtu(dnode, tsk->port.ref); + mtu = tipc_node_get_mtu(dnode, tsk->ref); rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf); if (rc < 0) goto exit; do { - rc = tipc_link_xmit(buf, dnode, tsk->port.ref); + TIPC_SKB_CB(buf)->wakeup_pending = tsk->link_cong; + rc = tipc_link_xmit(buf, dnode, tsk->ref); if (likely(rc >= 0)) { if (sock->state != SS_READY) sock->state = SS_CONNECTING; @@ -835,10 +989,9 @@ new_mtu: } if (rc == -EMSGSIZE) goto new_mtu; - if (rc != -ELINKCONG) break; - + tsk->link_cong = 1; rc = tipc_wait_for_sndmsg(sock, &timeo); if (rc) kfree_skb_list(buf); @@ -873,8 +1026,8 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && - !tipc_sk_conn_cong(tsk)) || - !tsk->port.connected); + !tsk_conn_cong(tsk)) || + !tsk->connected); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -897,11 +1050,10 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; - struct tipc_msg *mhdr = &port->phdr; + struct tipc_msg *mhdr = &tsk->phdr; struct sk_buff *buf; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - u32 ref = port->ref; + u32 ref = tsk->ref; int rc = -EINVAL; long timeo; u32 dnode; @@ -929,16 +1081,16 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - dnode = tipc_port_peernode(port); + dnode = tsk_peer_node(tsk); next: - mtu = port->max_pkt; + mtu = tsk->max_pkt; send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf); if (unlikely(rc < 0)) goto exit; do { - if (likely(!tipc_sk_conn_cong(tsk))) { + if (likely(!tsk_conn_cong(tsk))) { rc = tipc_link_xmit(buf, dnode, ref); if (likely(!rc)) { tsk->sent_unacked++; @@ -948,11 +1100,12 @@ next: goto next; } if (rc == -EMSGSIZE) { - port->max_pkt = tipc_node_get_mtu(dnode, ref); + tsk->max_pkt = tipc_node_get_mtu(dnode, ref); goto next; } if (rc != -ELINKCONG) break; + tsk->link_cong = 1; } rc = tipc_wait_for_sndpkt(sock, &timeo); if (rc) @@ -984,29 +1137,25 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, return tipc_send_stream(iocb, sock, m, dsz); } -/** - * auto_connect - complete connection setup to a remote port - * @tsk: tipc socket structure - * @msg: peer's response message - * - * Returns 0 on success, errno otherwise +/* tipc_sk_finish_conn - complete the setup of a connection */ -static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg) +static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, + u32 peer_node) { - struct tipc_port *port = &tsk->port; - struct socket *sock = tsk->sk.sk_socket; - struct tipc_portid peer; - - peer.ref = msg_origport(msg); - peer.node = msg_orignode(msg); - - __tipc_port_connect(port->ref, port, &peer); - - if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&port->phdr, (u32)msg_importance(msg)); - sock->state = SS_CONNECTED; - return 0; + struct tipc_msg *msg = &tsk->phdr; + + msg_set_destnode(msg, peer_node); + msg_set_destport(msg, peer_port); + msg_set_type(msg, TIPC_CONN_MSG); + msg_set_lookup_scope(msg, 0); + msg_set_hdr_sz(msg, SHORT_H_SIZE); + + tsk->probing_interval = CONN_PROBING_INTERVAL; + tsk->probing_state = TIPC_CONN_OK; + tsk->connected = 1; + k_start_timer(&tsk->timer, tsk->probing_interval); + tipc_node_add_conn(peer_node, tsk->ref, peer_port); + tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref); } /** @@ -1033,17 +1182,17 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) } /** - * anc_data_recv - optionally capture ancillary data for received message + * tipc_sk_anc_data_recv - optionally capture ancillary data for received message * @m: descriptor for message info * @msg: received message header - * @tport: TIPC port associated with message + * @tsk: TIPC port associated with message * * Note: Ancillary data is not captured if not requested by receiver. * * Returns 0 if successful, otherwise errno */ -static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) +static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, + struct tipc_sock *tsk) { u32 anc_data[3]; u32 err; @@ -1086,10 +1235,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, anc_data[2] = msg_nameupper(msg); break; case TIPC_CONN_MSG: - has_name = (tport->conn_type != 0); - anc_data[0] = tport->conn_type; - anc_data[1] = tport->conn_instance; - anc_data[2] = tport->conn_instance; + has_name = (tsk->conn_type != 0); + anc_data[0] = tsk->conn_type; + anc_data[1] = tsk->conn_instance; + anc_data[2] = tsk->conn_instance; break; default: has_name = 0; @@ -1103,6 +1252,24 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } +static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) +{ + struct sk_buff *buf = NULL; + struct tipc_msg *msg; + u32 peer_port = tsk_peer_port(tsk); + u32 dnode = tsk_peer_node(tsk); + + if (!tsk->connected) + return; + buf = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, + tipc_own_addr, peer_port, tsk->ref, TIPC_OK); + if (!buf) + return; + msg = buf_msg(buf); + msg_set_msgcnt(msg, ack); + tipc_link_xmit(buf, dnode, msg_link_selector(msg)); +} + static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) { struct sock *sk = sock->sk; @@ -1153,7 +1320,6 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; struct sk_buff *buf; struct tipc_msg *msg; long timeo; @@ -1188,7 +1354,7 @@ restart: /* Discard an empty non-errored message & try again */ if ((!sz) && (!err)) { - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); goto restart; } @@ -1196,7 +1362,7 @@ restart: set_orig_addr(m, msg); /* Capture ancillary data (optional) */ - res = anc_data_recv(m, msg, port); + res = tipc_sk_anc_data_recv(m, msg, tsk); if (res) goto exit; @@ -1223,10 +1389,10 @@ restart: if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_acknowledge(port->ref, tsk->rcv_unacked); + tipc_sk_send_ack(tsk, tsk->rcv_unacked); tsk->rcv_unacked = 0; } - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); } exit: release_sock(sk); @@ -1250,7 +1416,6 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; struct sk_buff *buf; struct tipc_msg *msg; long timeo; @@ -1288,14 +1453,14 @@ restart: /* Discard an empty non-errored message & try again */ if ((!sz) && (!err)) { - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); goto restart; } /* Optionally capture sender's address & ancillary data of first msg */ if (sz_copied == 0) { set_orig_addr(m, msg); - res = anc_data_recv(m, msg, port); + res = tipc_sk_anc_data_recv(m, msg, tsk); if (res) goto exit; } @@ -1334,10 +1499,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_acknowledge(port->ref, tsk->rcv_unacked); + tipc_sk_send_ack(tsk, tsk->rcv_unacked); tsk->rcv_unacked = 0; } - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); } /* Loop around if more data is required */ @@ -1396,12 +1561,9 @@ static void tipc_data_ready(struct sock *sk) static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; - struct tipc_port *port = &tsk->port; struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - int retval = -TIPC_ERR_NO_PORT; - int res; if (msg_mcast(msg)) return retval; @@ -1409,16 +1571,23 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) switch ((int)sock->state) { case SS_CONNECTED: /* Accept only connection-based messages sent by peer */ - if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) { + if (tsk_peer_msg(tsk, msg)) { if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - __tipc_port_disconnect(port); + tsk->connected = 0; + /* let timer expire on it's own */ + tipc_node_remove_conn(tsk_peer_node(tsk), + tsk->ref); } retval = TIPC_OK; } break; case SS_CONNECTING: /* Accept only ACK or NACK message */ + + if (unlikely(!msg_connected(msg))) + break; + if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; sk->sk_err = ECONNREFUSED; @@ -1426,17 +1595,17 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) break; } - if (unlikely(!msg_connected(msg))) - break; - - res = auto_connect(tsk, msg); - if (res) { + if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) { sock->state = SS_DISCONNECTING; - sk->sk_err = -res; + sk->sk_err = EINVAL; retval = TIPC_OK; break; } + tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg)); + msg_set_importance(&tsk->phdr, msg_importance(msg)); + sock->state = SS_CONNECTED; + /* If an incoming message is an 'ACK-', it should be * discarded here because it doesn't contain useful * data. In addition, we should try to wake up @@ -1518,6 +1687,13 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf) if (unlikely(msg_user(msg) == CONN_MANAGER)) return tipc_sk_proto_rcv(tsk, &onode, buf); + if (unlikely(msg_user(msg) == SOCK_WAKEUP)) { + kfree_skb(buf); + tsk->link_cong = 0; + sk->sk_write_space(sk); + return TIPC_OK; + } + /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) return -TIPC_ERR_NO_PORT; @@ -1585,7 +1761,6 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) int tipc_sk_rcv(struct sk_buff *buf) { struct tipc_sock *tsk; - struct tipc_port *port; struct sock *sk; u32 dport = msg_destport(buf_msg(buf)); int rc = TIPC_OK; @@ -1593,13 +1768,11 @@ int tipc_sk_rcv(struct sk_buff *buf) u32 dnode; /* Validate destination and message */ - port = tipc_port_lock(dport); - if (unlikely(!port)) { + tsk = tipc_sk_get(dport); + if (unlikely(!tsk)) { rc = tipc_msg_eval(buf, &dnode); goto exit; } - - tsk = tipc_port_to_sock(port); sk = &tsk->sk; /* Queue message */ @@ -1615,8 +1788,7 @@ int tipc_sk_rcv(struct sk_buff *buf) rc = -TIPC_ERR_OVERLOAD; } bh_unlock_sock(sk); - tipc_port_unlock(port); - + tipc_sk_put(tsk); if (likely(!rc)) return 0; exit: @@ -1803,10 +1975,8 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) { struct sock *new_sk, *sk = sock->sk; struct sk_buff *buf; - struct tipc_port *new_port; + struct tipc_sock *new_tsock; struct tipc_msg *msg; - struct tipc_portid peer; - u32 new_ref; long timeo; int res; @@ -1828,8 +1998,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; new_sk = new_sock->sk; - new_port = &tipc_sk(new_sk)->port; - new_ref = new_port->ref; + new_tsock = tipc_sk(new_sk); msg = buf_msg(buf); /* we lock on new_sk; but lockdep sees the lock on sk */ @@ -1839,18 +2008,16 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) * Reject any stray messages received by new socket * before the socket lock was taken (very, very unlikely) */ - reject_rx_queue(new_sk); + tsk_rej_rx_queue(new_sk); /* Connect new socket to it's peer */ - peer.ref = msg_origport(msg); - peer.node = msg_orignode(msg); - tipc_port_connect(new_ref, &peer); + tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); new_sock->state = SS_CONNECTED; - tipc_port_set_importance(new_port, msg_importance(msg)); + tsk_set_importance(new_tsock, msg_importance(msg)); if (msg_named(msg)) { - new_port->conn_type = msg_nametype(msg); - new_port->conn_instance = msg_nameinst(msg); + new_tsock->conn_type = msg_nametype(msg); + new_tsock->conn_instance = msg_nameinst(msg); } /* @@ -1860,7 +2027,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) if (!msg_data_sz(msg)) { struct msghdr m = {NULL,}; - advance_rx_queue(sk); + tsk_advance_rx_queue(sk); tipc_send_packet(NULL, new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); @@ -1886,9 +2053,8 @@ static int tipc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; struct sk_buff *buf; - u32 peer; + u32 dnode; int res; if (how != SHUT_RDWR) @@ -1908,15 +2074,21 @@ restart: kfree_skb(buf); goto restart; } - tipc_port_disconnect(port->ref); - if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) - tipc_link_xmit(buf, peer, 0); + if (tipc_msg_reverse(buf, &dnode, TIPC_CONN_SHUTDOWN)) + tipc_link_xmit(buf, dnode, tsk->ref); + tipc_node_remove_conn(dnode, tsk->ref); } else { - tipc_port_shutdown(port->ref); + dnode = tsk_peer_node(tsk); + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, + 0, dnode, tipc_own_addr, + tsk_peer_port(tsk), + tsk->ref, TIPC_CONN_SHUTDOWN); + tipc_link_xmit(buf, dnode, tsk->ref); } - + tsk->connected = 0; sock->state = SS_DISCONNECTING; - + tipc_node_remove_conn(dnode, tsk->ref); /* fall through */ case SS_DISCONNECTING: @@ -1937,6 +2109,432 @@ restart: return res; } +static void tipc_sk_timeout(unsigned long ref) +{ + struct tipc_sock *tsk; + struct sock *sk; + struct sk_buff *buf = NULL; + u32 peer_port, peer_node; + + tsk = tipc_sk_get(ref); + if (!tsk) + return; + + sk = &tsk->sk; + bh_lock_sock(sk); + if (!tsk->connected) { + bh_unlock_sock(sk); + goto exit; + } + peer_port = tsk_peer_port(tsk); + peer_node = tsk_peer_node(tsk); + + if (tsk->probing_state == TIPC_CONN_PROBING) { + /* Previous probe not answered -> self abort */ + buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tipc_own_addr, + peer_node, ref, peer_port, + TIPC_ERR_NO_PORT); + } else { + buf = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, + 0, peer_node, tipc_own_addr, + peer_port, ref, TIPC_OK); + tsk->probing_state = TIPC_CONN_PROBING; + k_start_timer(&tsk->timer, tsk->probing_interval); + } + bh_unlock_sock(sk); + if (buf) + tipc_link_xmit(buf, peer_node, ref); +exit: + tipc_sk_put(tsk); +} + +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct publication *publ; + u32 key; + + if (tsk->connected) + return -EINVAL; + key = tsk->ref + tsk->pub_count + 1; + if (key == tsk->ref) + return -EADDRINUSE; + + publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, + scope, tsk->ref, key); + if (unlikely(!publ)) + return -EINVAL; + + list_add(&publ->pport_list, &tsk->publications); + tsk->pub_count++; + tsk->published = 1; + return 0; +} + +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct publication *publ; + struct publication *safe; + int rc = -EINVAL; + + list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { + if (seq) { + if (publ->scope != scope) + continue; + if (publ->type != seq->type) + continue; + if (publ->lower != seq->lower) + continue; + if (publ->upper != seq->upper) + break; + tipc_nametbl_withdraw(publ->type, publ->lower, + publ->ref, publ->key); + rc = 0; + break; + } + tipc_nametbl_withdraw(publ->type, publ->lower, + publ->ref, publ->key); + rc = 0; + } + if (list_empty(&tsk->publications)) + tsk->published = 0; + return rc; +} + +static int tipc_sk_show(struct tipc_sock *tsk, char *buf, + int len, int full_id) +{ + struct publication *publ; + int ret; + + if (full_id) + ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:", + tipc_zone(tipc_own_addr), + tipc_cluster(tipc_own_addr), + tipc_node(tipc_own_addr), tsk->ref); + else + ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref); + + if (tsk->connected) { + u32 dport = tsk_peer_port(tsk); + u32 destnode = tsk_peer_node(tsk); + + ret += tipc_snprintf(buf + ret, len - ret, + " connected to <%u.%u.%u:%u>", + tipc_zone(destnode), + tipc_cluster(destnode), + tipc_node(destnode), dport); + if (tsk->conn_type != 0) + ret += tipc_snprintf(buf + ret, len - ret, + " via {%u,%u}", tsk->conn_type, + tsk->conn_instance); + } else if (tsk->published) { + ret += tipc_snprintf(buf + ret, len - ret, " bound to"); + list_for_each_entry(publ, &tsk->publications, pport_list) { + if (publ->lower == publ->upper) + ret += tipc_snprintf(buf + ret, len - ret, + " {%u,%u}", publ->type, + publ->lower); + else + ret += tipc_snprintf(buf + ret, len - ret, + " {%u,%u,%u}", publ->type, + publ->lower, publ->upper); + } + } + ret += tipc_snprintf(buf + ret, len - ret, "\n"); + return ret; +} + +struct sk_buff *tipc_sk_socks_show(void) +{ + struct sk_buff *buf; + struct tlv_desc *rep_tlv; + char *pb; + int pb_len; + struct tipc_sock *tsk; + int str_len = 0; + u32 ref = 0; + + buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN)); + if (!buf) + return NULL; + rep_tlv = (struct tlv_desc *)buf->data; + pb = TLV_DATA(rep_tlv); + pb_len = ULTRA_STRING_MAX_LEN; + + tsk = tipc_sk_get_next(&ref); + for (; tsk; tsk = tipc_sk_get_next(&ref)) { + lock_sock(&tsk->sk); + str_len += tipc_sk_show(tsk, pb + str_len, + pb_len - str_len, 0); + release_sock(&tsk->sk); + tipc_sk_put(tsk); + } + str_len += 1; /* for "\0" */ + skb_put(buf, TLV_SPACE(str_len)); + TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); + + return buf; +} + +/* tipc_sk_reinit: set non-zero address in all existing sockets + * when we go from standalone to network mode. + */ +void tipc_sk_reinit(void) +{ + struct tipc_msg *msg; + u32 ref = 0; + struct tipc_sock *tsk = tipc_sk_get_next(&ref); + + for (; tsk; tsk = tipc_sk_get_next(&ref)) { + lock_sock(&tsk->sk); + msg = &tsk->phdr; + msg_set_prevnode(msg, tipc_own_addr); + msg_set_orignode(msg, tipc_own_addr); + release_sock(&tsk->sk); + tipc_sk_put(tsk); + } +} + +/** + * struct reference - TIPC socket reference entry + * @tsk: pointer to socket associated with reference entry + * @ref: reference value for socket (combines instance & array index info) + */ +struct reference { + struct tipc_sock *tsk; + u32 ref; +}; + +/** + * struct tipc_ref_table - table of TIPC socket reference entries + * @entries: pointer to array of reference entries + * @capacity: array index of first unusable entry + * @init_point: array index of first uninitialized entry + * @first_free: array index of first unused socket reference entry + * @last_free: array index of last unused socket reference entry + * @index_mask: bitmask for array index portion of reference values + * @start_mask: initial value for instance value portion of reference values + */ +struct ref_table { + struct reference *entries; + u32 capacity; + u32 init_point; + u32 first_free; + u32 last_free; + u32 index_mask; + u32 start_mask; +}; + +/* Socket reference table consists of 2**N entries. + * + * State Socket ptr Reference + * ----- ---------- --------- + * In use non-NULL XXXX|own index + * (XXXX changes each time entry is acquired) + * Free NULL YYYY|next free index + * (YYYY is one more than last used XXXX) + * Uninitialized NULL 0 + * + * Entry 0 is not used; this allows index 0 to denote the end of the free list. + * + * Note that a reference value of 0 does not necessarily indicate that an + * entry is uninitialized, since the last entry in the free list could also + * have a reference value of 0 (although this is unlikely). + */ + +static struct ref_table tipc_ref_table; + +static DEFINE_RWLOCK(ref_table_lock); + +/** + * tipc_ref_table_init - create reference table for sockets + */ +int tipc_sk_ref_table_init(u32 req_sz, u32 start) +{ + struct reference *table; + u32 actual_sz; + + /* account for unused entry, then round up size to a power of 2 */ + + req_sz++; + for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) { + /* do nothing */ + }; + + /* allocate table & mark all entries as uninitialized */ + table = vzalloc(actual_sz * sizeof(struct reference)); + if (table == NULL) + return -ENOMEM; + + tipc_ref_table.entries = table; + tipc_ref_table.capacity = req_sz; + tipc_ref_table.init_point = 1; + tipc_ref_table.first_free = 0; + tipc_ref_table.last_free = 0; + tipc_ref_table.index_mask = actual_sz - 1; + tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask; + + return 0; +} + +/** + * tipc_ref_table_stop - destroy reference table for sockets + */ +void tipc_sk_ref_table_stop(void) +{ + if (!tipc_ref_table.entries) + return; + vfree(tipc_ref_table.entries); + tipc_ref_table.entries = NULL; +} + +/* tipc_ref_acquire - create reference to a socket + * + * Register an socket pointer in the reference table. + * Returns a unique reference value that is used from then on to retrieve the + * socket pointer, or to determine if the socket has been deregistered. + */ +u32 tipc_sk_ref_acquire(struct tipc_sock *tsk) +{ + u32 index; + u32 index_mask; + u32 next_plus_upper; + u32 ref = 0; + struct reference *entry; + + if (unlikely(!tsk)) { + pr_err("Attempt to acquire ref. to non-existent obj\n"); + return 0; + } + if (unlikely(!tipc_ref_table.entries)) { + pr_err("Ref. table not found in acquisition attempt\n"); + return 0; + } + + /* Take a free entry, if available; otherwise initialize a new one */ + write_lock_bh(&ref_table_lock); + index = tipc_ref_table.first_free; + entry = &tipc_ref_table.entries[index]; + + if (likely(index)) { + index = tipc_ref_table.first_free; + entry = &tipc_ref_table.entries[index]; + index_mask = tipc_ref_table.index_mask; + next_plus_upper = entry->ref; + tipc_ref_table.first_free = next_plus_upper & index_mask; + ref = (next_plus_upper & ~index_mask) + index; + entry->tsk = tsk; + } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) { + index = tipc_ref_table.init_point++; + entry = &tipc_ref_table.entries[index]; + ref = tipc_ref_table.start_mask + index; + } + + if (ref) { + entry->ref = ref; + entry->tsk = tsk; + } + write_unlock_bh(&ref_table_lock); + return ref; +} + +/* tipc_sk_ref_discard - invalidate reference to an socket + * + * Disallow future references to an socket and free up the entry for re-use. + */ +void tipc_sk_ref_discard(u32 ref) +{ + struct reference *entry; + u32 index; + u32 index_mask; + + if (unlikely(!tipc_ref_table.entries)) { + pr_err("Ref. table not found during discard attempt\n"); + return; + } + + index_mask = tipc_ref_table.index_mask; + index = ref & index_mask; + entry = &tipc_ref_table.entries[index]; + + write_lock_bh(&ref_table_lock); + + if (unlikely(!entry->tsk)) { + pr_err("Attempt to discard ref. to non-existent socket\n"); + goto exit; + } + if (unlikely(entry->ref != ref)) { + pr_err("Attempt to discard non-existent reference\n"); + goto exit; + } + + /* Mark entry as unused; increment instance part of entry's + * reference to invalidate any subsequent references + */ + + entry->tsk = NULL; + entry->ref = (ref & ~index_mask) + (index_mask + 1); + + /* Append entry to free entry list */ + if (unlikely(tipc_ref_table.first_free == 0)) + tipc_ref_table.first_free = index; + else + tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index; + tipc_ref_table.last_free = index; +exit: + write_unlock_bh(&ref_table_lock); +} + +/* tipc_sk_get - find referenced socket and return pointer to it + */ +struct tipc_sock *tipc_sk_get(u32 ref) +{ + struct reference *entry; + struct tipc_sock *tsk; + + if (unlikely(!tipc_ref_table.entries)) + return NULL; + read_lock_bh(&ref_table_lock); + entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask]; + tsk = entry->tsk; + if (likely(tsk && (entry->ref == ref))) + sock_hold(&tsk->sk); + else + tsk = NULL; + read_unlock_bh(&ref_table_lock); + return tsk; +} + +/* tipc_sk_get_next - lock & return next socket after referenced one +*/ +struct tipc_sock *tipc_sk_get_next(u32 *ref) +{ + struct reference *entry; + struct tipc_sock *tsk = NULL; + uint index = *ref & tipc_ref_table.index_mask; + + read_lock_bh(&ref_table_lock); + while (++index < tipc_ref_table.capacity) { + entry = &tipc_ref_table.entries[index]; + if (!entry->tsk) + continue; + tsk = entry->tsk; + sock_hold(&tsk->sk); + *ref = entry->ref; + break; + } + read_unlock_bh(&ref_table_lock); + return tsk; +} + +static void tipc_sk_put(struct tipc_sock *tsk) +{ + sock_put(&tsk->sk); +} + /** * tipc_setsockopt - set socket option * @sock: socket structure @@ -1955,7 +2553,6 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; u32 value; int res; @@ -1973,16 +2570,16 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - res = tipc_port_set_importance(port, value); + res = tsk_set_importance(tsk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) - tipc_port_set_unreliable(port, value); + tsk_set_unreliable(tsk, value); else res = -ENOPROTOOPT; break; case TIPC_DEST_DROPPABLE: - tipc_port_set_unreturnable(port, value); + tsk_set_unreturnable(tsk, value); break; case TIPC_CONN_TIMEOUT: tipc_sk(sk)->conn_timeout = value; @@ -2015,7 +2612,6 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; int len; u32 value; int res; @@ -2032,16 +2628,16 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - value = tipc_port_importance(port); + value = tsk_importance(tsk); break; case TIPC_SRC_DROPPABLE: - value = tipc_port_unreliable(port); + value = tsk_unreliable(tsk); break; case TIPC_DEST_DROPPABLE: - value = tipc_port_unreturnable(port); + value = tsk_unreturnable(tsk); break; case TIPC_CONN_TIMEOUT: - value = tipc_sk(sk)->conn_timeout; + value = tsk->conn_timeout; /* no need to set "res", since already 0 at this point */ break; case TIPC_NODE_RECVQ_DEPTH: diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 43b75b3cece..baa43d03901 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -35,56 +35,17 @@ #ifndef _TIPC_SOCK_H #define _TIPC_SOCK_H -#include "port.h" #include <net/sock.h> -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 - -/** - * struct tipc_sock - TIPC socket structure - * @sk: socket - interacts with 'port' and with user via the socket API - * @port: port - interacts with 'sk' and with the rest of the TIPC stack - * @peer_name: the peer of the connection, if any - * @conn_timeout: the time we can wait for an unresponded setup request - * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue - * @link_cong: non-zero if owner must sleep because of link congestion - * @sent_unacked: # messages sent by socket, and not yet acked by peer - * @rcv_unacked: # messages read by user, but not yet acked back to peer - */ - -struct tipc_sock { - struct sock sk; - struct tipc_port port; - unsigned int conn_timeout; - atomic_t dupl_rcvcnt; - int link_cong; - uint sent_unacked; - uint rcv_unacked; -}; - -static inline struct tipc_sock *tipc_sk(const struct sock *sk) -{ - return container_of(sk, struct tipc_sock, sk); -} - -static inline struct tipc_sock *tipc_port_to_sock(const struct tipc_port *port) -{ - return container_of(port, struct tipc_sock, port); -} - -static inline void tipc_sock_wakeup(struct tipc_sock *tsk) -{ - tsk->sk.sk_write_space(&tsk->sk); -} - -static inline int tipc_sk_conn_cong(struct tipc_sock *tsk) -{ - return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; -} - +#define TIPC_CONNACK_INTV 256 +#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) +#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) int tipc_sk_rcv(struct sk_buff *buf); - +struct sk_buff *tipc_sk_socks_show(void); void tipc_sk_mcast_rcv(struct sk_buff *buf); +void tipc_sk_reinit(void); +int tipc_sk_ref_table_init(u32 requested_size, u32 start); +void tipc_sk_ref_table_stop(void); #endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 642437231ad..31b5cb232a4 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -36,7 +36,6 @@ #include "core.h" #include "name_table.h" -#include "port.h" #include "subscr.h" /** diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index f3fef93325a..1a779b1e851 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -47,6 +47,13 @@ static struct ctl_table tipc_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "named_timeout", + .data = &sysctl_tipc_named_timeout, + .maxlen = sizeof(sysctl_tipc_named_timeout), + .mode = 0644, + .proc_handler = proc_dointvec, + }, {} }; |