diff options
Diffstat (limited to 'net')
84 files changed, 795 insertions, 436 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6f142f03716..733ec283ed1 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -493,10 +493,48 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) } } +static int vlan_calculate_locking_subclass(struct net_device *real_dev) +{ + int subclass = 0; + + while (is_vlan_dev(real_dev)) { + subclass++; + real_dev = vlan_dev_priv(real_dev)->real_dev; + } + + return subclass; +} + +static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from) +{ + int err = 0, subclass; + + subclass = vlan_calculate_locking_subclass(to); + + spin_lock_nested(&to->addr_list_lock, subclass); + err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + spin_unlock(&to->addr_list_lock); +} + +static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from) +{ + int err = 0, subclass; + + subclass = vlan_calculate_locking_subclass(to); + + spin_lock_nested(&to->addr_list_lock, subclass); + err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + spin_unlock(&to->addr_list_lock); +} + static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); - dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -608,9 +646,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - if (is_vlan_dev(real_dev)) - subclass = 1; - + subclass = vlan_calculate_locking_subclass(dev); vlan_dev_set_lockdep_class(dev, subclass); vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b3bd4ec3fd9..f04224c3200 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1545,6 +1545,8 @@ out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) batadv_orig_node_free_ref(orig_neigh_node); out: + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); if (router) batadv_neigh_node_free_ref(router); if (router_router) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ffd9dfbd9b0..1785da37b82 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -501,12 +501,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) { struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router; orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + /* this is the last reference to this object */ + router = rcu_dereference_protected(orig_ifinfo->router, true); + if (router) + batadv_neigh_node_free_ref_now(router); kfree(orig_ifinfo); } @@ -702,6 +707,47 @@ free_orig_node: } /** + * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor + * @bat_priv: the bat priv with all the soft interface information + * @neigh: orig node which is to be checked + */ +static void +batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, + struct batadv_neigh_node *neigh) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + struct batadv_hard_iface *if_outgoing; + struct hlist_node *node_tmp; + + spin_lock_bh(&neigh->ifinfo_lock); + + /* for all ifinfo objects for this neighinator */ + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh->ifinfo_list, list) { + if_outgoing = neigh_ifinfo->if_outgoing; + + /* always keep the default interface */ + if (if_outgoing == BATADV_IF_DEFAULT) + continue; + + /* don't purge if the interface is not (going) down */ + if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && + (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && + (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + continue; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "neighbor/ifinfo purge: neighbor %pM, iface: %s\n", + neigh->addr, if_outgoing->net_dev->name); + + hlist_del_rcu(&neigh_ifinfo->list); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } + + spin_unlock_bh(&neigh->ifinfo_lock); +} + +/** * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which is to be checked @@ -800,6 +846,11 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); + } else { + /* only necessary if not the whole neighbor is to be + * deleted, but some interface has been removed. + */ + batadv_purge_neigh_ifinfo(bat_priv, neigh_node); } } @@ -857,7 +908,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, { struct batadv_neigh_node *best_neigh_node; struct batadv_hard_iface *hard_iface; - bool changed; + bool changed_ifinfo, changed_neigh; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { @@ -867,10 +918,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, jiffies_to_msecs(orig_node->last_seen)); return true; } - changed = batadv_purge_orig_ifinfo(bat_priv, orig_node); - changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node); + changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node); + changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node); - if (!changed) + if (!changed_ifinfo && !changed_neigh) return false; /* first for NULL ... */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80e1b0f60a3..2acf7fa1fec 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -859,12 +859,12 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; - if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && + if (skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) { if (br_parse_ip_options(skb)) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e74b6d530cb..e8844d975b3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -445,6 +445,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + return register_netdevice(dev); +} + static size_t br_get_link_af_size(const struct net_device *dev) { struct net_port_vlans *pv; @@ -473,6 +487,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .newlink = br_dev_newlink, .dellink = br_dev_delete, }; diff --git a/net/can/gw.c b/net/can/gw.c index ac31891967d..050a2110d43 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) @@ -893,7 +893,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e632b5a52f5..8b8a5a24b22 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1548,8 +1548,10 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, return; for (i = 0; i < len; i++) { - if (osds[i] != CRUSH_ITEM_NONE && - osdmap->osd_primary_affinity[i] != + int osd = osds[i]; + + if (osd != CRUSH_ITEM_NONE && + osdmap->osd_primary_affinity[osd] != CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) { break; } @@ -1563,10 +1565,9 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, * osd's pgs get rejected as primary. */ for (i = 0; i < len; i++) { - int osd; + int osd = osds[i]; u32 aff; - osd = osds[i]; if (osd == CRUSH_ITEM_NONE) continue; diff --git a/net/core/dev.c b/net/core/dev.c index 14dac0654f2..c619b864133 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2284,7 +2284,7 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; - int vlan_depth = ETH_HLEN; + int vlan_depth = skb->mac_len; /* Tunnel gso handlers can set protocol to ethernet. */ if (type == htons(ETH_P_TEB)) { @@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - const struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { int tmp; if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { + } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - netdev_features_t features = dev->features; + netdev_features_t features = skb->dev->features; - if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } -EXPORT_SYMBOL(netif_skb_dev_features); +EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) @@ -5238,6 +5236,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } +EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { diff --git a/net/core/dst.c b/net/core/dst.c index ca4231ec734..80d6286c8b6 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -142,12 +142,12 @@ loop: mutex_unlock(&dst_gc_mutex); } -int dst_discard(struct sk_buff *skb) +int dst_discard_sk(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } -EXPORT_SYMBOL(dst_discard); +EXPORT_SYMBOL(dst_discard_sk); const u32 dst_default_metrics[RTAX_MAX + 1] = { /* This initializer is needed to force linker to place this variable @@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->xfrm = NULL; #endif dst->input = dst_discard; - dst->output = dst_discard; + dst->output = dst_discard_sk; dst->error = 0; dst->obsolete = initial_obsolete; dst->header_len = 0; @@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst) /* The first case (dev==NULL) is required, when protocol module is unloaded. */ - if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) - dst->input = dst->output = dst_discard; + if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { + dst->input = dst_discard; + dst->output = dst_discard_sk; + } dst->obsolete = DST_OBSOLETE_DEAD; } @@ -361,7 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, return; if (!unregister) { - dst->input = dst->output = dst_discard; + dst->input = dst_discard; + dst->output = dst_discard_sk; } else { dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); diff --git a/net/core/filter.c b/net/core/filter.c index e08b3822c72..9d79ca0a6e8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -122,6 +122,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } +/* Register mappings for user programs. */ +#define A_REG 0 +#define X_REG 7 +#define TMP_REG 8 +#define ARG2_REG 2 +#define ARG3_REG 3 + /** * __sk_run_filter - run a filter on a given context * @ctx: buffer to run the filter on @@ -242,6 +249,8 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; regs[ARG1_REG] = (u64) (unsigned long) ctx; + regs[A_REG] = 0; + regs[X_REG] = 0; select_insn: goto *jumptable[insn->code]; @@ -600,6 +609,9 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) return 0; @@ -618,11 +630,14 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) return 0; nla = (struct nlattr *) &skb->data[A]; - if (nla->nla_len > A - skb->len) + if (nla->nla_len > skb->len - A) return 0; nla = nla_find_nested(nla, X); @@ -637,13 +652,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) return raw_smp_processor_id(); } -/* Register mappings for user programs. */ -#define A_REG 0 -#define X_REG 7 -#define TMP_REG 8 -#define ARG2_REG 2 -#define ARG3_REG 3 - static bool convert_bpf_extensions(struct sock_filter *fp, struct sock_filter_int **insnp) { @@ -1737,7 +1745,6 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, - [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8f8a96ef9f3..32d872eec7f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1248,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh->nud_state = NUD_INCOMPLETE; + atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d4ff41739b0..9837bebf93c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -774,7 +774,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, return 0; } -static size_t rtnl_port_size(const struct net_device *dev) +static size_t rtnl_port_size(const struct net_device *dev, + u32 ext_filter_mask) { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ @@ -790,7 +791,8 @@ static size_t rtnl_port_size(const struct net_device *dev) size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + port_size; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; if (dev_num_vf(dev->dev.parent)) return port_self_size + vf_ports_size + @@ -826,7 +828,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ - + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */ @@ -888,11 +890,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) return 0; } -static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) +static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, + u32 ext_filter_mask) { int err; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; err = rtnl_port_self_fill(skb, dev); @@ -1079,7 +1083,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, vfinfo); } - if (rtnl_port_fill(skb, dev)) + if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { @@ -1198,6 +1202,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_head *head; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; + int err; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1218,11 +1223,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) + err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1395,7 +1406,8 @@ static int do_set_master(struct net_device *dev, int ifindex) return 0; } -static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, + struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1407,7 +1419,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { err = -EPERM; goto errout; } @@ -1661,7 +1673,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - err = do_setlink(dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, tb, ifname, 0); errout: return err; } @@ -1778,7 +1790,8 @@ err: } EXPORT_SYMBOL(rtnl_create_link); -static int rtnl_group_changelink(struct net *net, int group, +static int rtnl_group_changelink(const struct sk_buff *skb, + struct net *net, int group, struct ifinfomsg *ifm, struct nlattr **tb) { @@ -1787,7 +1800,7 @@ static int rtnl_group_changelink(struct net *net, int group, for_each_netdev(net, dev) { if (dev->group == group) { - err = do_setlink(dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, tb, NULL, 0); if (err < 0) return err; } @@ -1929,12 +1942,12 @@ replay: modified = 1; } - return do_setlink(dev, ifm, tb, ifname, modified); + return do_setlink(skb, dev, ifm, tb, ifname, modified); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) - return rtnl_group_changelink(net, + return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), ifm, tb); return -ENODEV; @@ -2321,7 +2334,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) int err = -EINVAL; __u8 *addr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); @@ -2773,7 +2786,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { diff --git a/net/core/sock.c b/net/core/sock.c index b4fff008136..664ee4295b6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,55 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +/** + * sk_ns_capable - General socket capability test + * @sk: Socket to use a capability on or through + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in the user + * namespace @user_ns. + */ +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap) +{ + return file_ns_capable(sk->sk_socket->file, user_ns, cap) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(sk_ns_capable); + +/** + * sk_capable - Socket global capability test + * @sk: Socket to use a capability on or through + * @cap: The global capbility to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in all user + * namespaces. + */ +bool sk_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, &init_user_ns, cap); +} +EXPORT_SYMBOL(sk_capable); + +/** + * sk_net_capable - Network namespace socket capability test + * @sk: Socket to use a capability on or through + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socke was created + * and the current process has the capability @cap over the network namespace + * the socket is a member of. + */ +bool sk_net_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); +} +EXPORT_SYMBOL(sk_net_capable); + + #ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index d7af1885932..a4216a4c957 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype) { struct sock_fprog_kern *fprog; @@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, unsigned int flen; int err = 0; - if (!ns_capable(user_ns, CAP_NET_ADMIN)) { + if (!may_report_filterinfo) { nla_reserve(skb, attrtype, 0); return 0; } diff --git a/net/core/utils.c b/net/core/utils.c index 2f737bf90b3..eed34338736 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w) { struct __net_random_once_work *work = container_of(w, struct __net_random_once_work, work); - if (!static_key_enabled(work->key)) - static_key_slow_inc(work->key); + BUG_ON(!static_key_enabled(work->key)); + static_key_slow_dec(work->key); kfree(work); } @@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key) } bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key) + struct static_key *once_key) { static DEFINE_SPINLOCK(lock); unsigned long flags; @@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, *done = true; spin_unlock_irqrestore(&lock, flags); - __net_random_once_disable_jump(done_key); + __net_random_once_disable_jump(once_key); return true; } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 55364440267..f8b98d89c28 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1669,7 +1669,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlmsghdr *reply_nlh = NULL; const struct reply_func *fn; - if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN)) + if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, diff --git a/net/dccp/output.c b/net/dccp/output.c index 8876078859d..0248e8a3460 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -138,7 +138,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index a603823a3e2..3b726f31c64 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -574,7 +574,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr __rcu **ifap; int err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -618,7 +618,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr *ifa; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 57dc159245e..d332aefb084 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index ce0cbbfe0f4..daccc4a36d8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb) return n->output(n, skb); } -static int dn_output(struct sk_buff *skb) +static int dn_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; @@ -838,6 +838,18 @@ drop: * Used to catch bugs. This should never normally get * called. */ +static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb) +{ + struct dn_skb_cb *cb = DN_SKB_CB(skb); + + net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", + le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); + + kfree_skb(skb); + + return NET_RX_DROP; +} + static int dn_rt_bug(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); @@ -1463,7 +1475,7 @@ make_route: rt->n = neigh; rt->dst.lastuse = jiffies; - rt->dst.output = dn_rt_bug; + rt->dst.output = dn_rt_bug_sk; switch (res.type) { case RTN_UNICAST: rt->dst.input = dn_forward; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index e83015cecfa..e4d9560a910 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); /* Eventually we might send routing messages too */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8c54870db79..6d6dd345bc4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1650,6 +1650,39 @@ static int __init init_ipv4_mibs(void) return register_pernet_subsys(&ipv4_mib_ops); } +static __net_init int inet_init_net(struct net *net) +{ + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = 32768; + net->ipv4.ip_local_ports.range[1] = 61000; + + seqlock_init(&net->ipv4.ping_group_range.lock); + /* + * Sane defaults - nobody may create ping sockets. + * Boot scripts should set this to distro-specific group. + */ + net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); + return 0; +} + +static __net_exit void inet_exit_net(struct net *net) +{ +} + +static __net_initdata struct pernet_operations af_inet_ops = { + .init = inet_init_net, + .exit = inet_exit_net, +}; + +static int __init init_inet_pernet_ops(void) +{ + return register_pernet_subsys(&af_inet_ops); +} + static int ipv4_proc_init(void); /* @@ -1794,6 +1827,9 @@ static int __init inet_init(void) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif + + if (init_inet_pernet_ops()) + pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); /* * Initialise per-cpu ipv4 mibs */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1a629f87027..255aa9946fe 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = oif; + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b53f0bf84dc..b10cd43a472 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -631,6 +631,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, .daddr = nh->nh_gw, .flowi4_scope = cfg->fc_scope + 1, .flowi4_oif = nh->nh_oif, + .flowi4_iif = LOOPBACK_IFINDEX, }; /* It is not necessary, but requires a bit of thinking */ @@ -820,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); if (!fi->fib_metrics) goto failure; } else fi->fib_metrics = (u32 *) dst_default_metrics; - fib_info_cnt++; fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0d1e2cb877e..a56b8e6e866 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - *low = net->ipv4.sysctl_local_ports.range[0]; - *high = net->ipv4.sysctl_local_ports.range[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + *low = net->ipv4.ip_local_ports.range[0]; + *high = net->ipv4.ip_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index be8abe73bb9..6f111e48e11 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,12 +42,12 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - !skb->local_df; + skb->local_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) @@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } -static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) -{ - unsigned int mtu; - - if (skb->local_df || !skb_is_gso(skb)) - return false; - - mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); - - /* if seglen > mtu, do software segmentation for IP fragmentation on - * output. DF bit cannot be set since ip_forward would have sent - * icmp error. - */ - return skb_gso_network_seglen(skb) > mtu; -} - -/* called if GSO skb needs to be fragmented on forward */ -static int ip_forward_finish_gso(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - netdev_features_t features; - struct sk_buff *segs; - int ret = 0; - - features = netif_skb_dev_features(skb, dst->dev); - segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { - kfree_skb(skb); - return -ENOMEM; - } - - consume_skb(skb); - - do { - struct sk_buff *nskb = segs->next; - int err; - - segs->next = NULL; - err = dst_output(segs); - - if (err && ret == 0) - ret = err; - segs = nskb; - } while (segs); - - return ret; -} static int ip_forward_finish(struct sk_buff *skb) { @@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - if (ip_gso_exceeds_dst_mtu(skb)) - return ip_forward_finish_gso(skb); - return dst_output(skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index c10a3ce5cbf..ed32313e307 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg) * "Fragment Reassembly Timeout" message, per RFC792. */ if (qp->user == IP_DEFRAG_AF_PACKET || - (qp->user == IP_DEFRAG_CONNTRACK_IN && - skb_rtable(head)->rt_type != RTN_LOCAL)) + ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && + (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && + (skb_rtable(head)->rt_type != RTN_LOCAL))) goto out_rcu_unlock; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1a0755fea49..a52f50187b5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb) skb_dst(skb)->dev, dst_output); } -int ip_local_out(struct sk_buff *skb) +int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; err = __ip_local_out(skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } -EXPORT_SYMBOL_GPL(ip_local_out); +EXPORT_SYMBOL_GPL(ip_local_out_sk); static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { @@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static int ip_finish_output_gso(struct sk_buff *skb) +{ + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + /* common case: locally created skb or seglen is <= mtu */ + if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || + skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) + return ip_finish_output2(skb); + + /* Slowpath - GSO segment length is exceeding the dst MTU. + * + * This can happen in two cases: + * 1) TCP GRO packet, DF bit not set + * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly + * from host network stack. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = ip_fragment(segs, ip_finish_output2); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) @@ -220,15 +262,17 @@ static int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) + if (skb_is_gso(skb)) + return ip_finish_output_gso(skb); + + if (skb->len > ip_skb_dst_mtu(skb)) return ip_fragment(skb, ip_finish_output2); - else - return ip_finish_output2(skb); + + return ip_finish_output2(skb); } -int ip_mc_output(struct sk_buff *skb) +int ip_mc_output(struct sock *sk, struct sk_buff *skb) { - struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; @@ -287,7 +331,7 @@ int ip_mc_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_output(struct sk_buff *skb) +int ip_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; @@ -315,9 +359,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) sizeof(fl4->saddr) + sizeof(fl4->daddr)); } -int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) +/* Note: skb->sk can be different from sk, in case of tunnels */ +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options_rcu *inet_opt; struct flowi4 *fl4; @@ -389,6 +433,7 @@ packet_routed: ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e77381d1df9..b3f859731c6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -442,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -670,7 +672,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -722,19 +724,18 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ip_tunnel *tunnel = netdev_priv(dev); - struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; + struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); BUG_ON(!itn->fb_tunnel_dev); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; - if (dev == itn->fb_tunnel_dev) + if (dev == itn->fb_tunnel_dev) { t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) - t = netdev_priv(dev); + if (t == NULL) + t = netdev_priv(dev); + } memcpy(p, &t->parms, sizeof(*p)); break; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index e0c2b1d2ea4..bcf206c7900 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -46,7 +46,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> -int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, +int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df, bool xnet) { @@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, iph->ttl = ttl; __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); - err = ip_local_out(skb); + err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; return pkt_len; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 28863570dd6..d84dc8d4c91 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -455,7 +455,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) struct mr_table *mrt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_iif = skb->skb_iif, + .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi4_mark = skb->mark, }; int err; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c49dcd0284a..4bfaedf9b34 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ipv4_is_multicast(iph->daddr)) { if (ipv4_is_zeronet(iph->saddr)) return ipv4_is_local_multicast(iph->daddr) ^ invert; - flow.flowi4_iif = 0; - } else { - flow.flowi4_iif = LOOPBACK_IFINDEX; } - + flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_oif = 0; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 12e13bd82b5..f40f321b41f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -22,7 +22,6 @@ #endif #include <net/netfilter/nf_conntrack_zones.h> -/* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { int err; @@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) err = ip_defrag(skb, user); local_bh_enable(); - if (!err) + if (!err) { ip_send_check(ip_hdr(skb)); + skb->local_df = 1; + } return err; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index f4b19e5dde5..044a0ddf6a7 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -236,15 +236,15 @@ exit: static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, kgid_t *high) { - kgid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.ping_group_range.range; unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } @@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; kgid_t low, high; + int ret = 0; inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { kgid_t gid = group_info->blocks[i][j]; if (gid_lte(low, gid) && gid_lte(gid, high)) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } EXPORT_SYMBOL_GPL(ping_init_sock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34d094cadb1..db1e0da871f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1129,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb) dst_set_expires(&rt->dst, 0); } -static int ip_rt_bug(struct sk_buff *skb) +static int ip_rt_bug(struct sock *sk, struct sk_buff *skb) { pr_debug("%s: %pI4 -> %pI4, %s\n", __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, @@ -1700,8 +1700,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, - LOOPBACK_IFINDEX, - dev, in_dev, &itag); + 0, dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; goto local_input; @@ -2218,7 +2217,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; new->dev = ort->dst.dev; if (new->dev) @@ -2357,7 +2356,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, } } else #endif - if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) goto nla_put_failure; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44eba052b43..5cde8f263d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = range[0]; - net->ipv4.sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = range[0]; + net->ipv4.ip_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos) { struct net *net = - container_of(table->data, struct net, ipv4.sysctl_local_ports.range); + container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -87,14 +87,14 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } /* Update system visible IP port range */ @@ -102,11 +102,11 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); - write_seqlock(&net->ipv4.sysctl_local_ports.lock); + container_of(table->data, struct net, ipv4.ping_group_range.range); + write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ping_group_range", - .data = &init_net.ipv4.sysctl_ping_group_range, + .data = &init_net.ipv4.ping_group_range.range, .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, @@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ip_local_port_range", - .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), - .data = &init_net.ipv4.sysctl_local_ports.range, + .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), + .data = &init_net.ipv4.ip_local_ports.range, .mode = 0644, .proc_handler = ipv4_local_port_range, }, @@ -858,20 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[i].data += (void *)net - (void *)&init_net; } - /* - * Sane defaults - nobody may create ping sockets. - * Boot scripts should set this to distro-specific group. - */ - net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); - net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - - /* - * Set defaults for local port range - */ - seqlock_init(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = 32768; - net->ipv4.sysctl_local_ports.range[1] = 61000; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 8bf224516ba..b4f1b29b08b 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -409,7 +409,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; ratio += cnt; - ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); + ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 699fb102e97..12d6016bdd9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -981,7 +981,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); - err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); if (likely(err <= 0)) return err; @@ -2441,8 +2441,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } - if (likely(!err)) + if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + /* Update global TCP statistics. */ + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + tp->total_retrans++; + } return err; } @@ -2452,12 +2458,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int err = __tcp_retransmit_skb(sk, skb); if (err == 0) { - /* Update global TCP statistics. */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans++; - #if FASTRETRANS_DEBUG > 0 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { net_dbg_ratelimited("retrans_out leaked\n"); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index baa0f63731f..40e701f2e1e 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -86,7 +86,7 @@ int xfrm4_output_finish(struct sk_buff *skb) return xfrm_output(skb); } -int xfrm4_output(struct sk_buff *skb) +int xfrm4_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index c9138189415..d4ade34ab37 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { - struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 fl6; struct dst_entry *dst; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 34e0ded5c14..87891f5f57b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1459,7 +1459,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) if (w->skip) { w->skip--; - continue; + goto skip; } err = w->func(w); @@ -1469,6 +1469,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->count++; continue; } +skip: w->state = FWS_U; case FWS_U: if (fn == w->root) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c98338b81d3..9d921462b57 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1559,6 +1559,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return 0; } +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + if (dev != ign->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6gre_get_size(const struct net_device *dev) { return @@ -1636,6 +1645,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .validate = ip6gre_tunnel_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, + .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3284d61577c..fbf11562b54 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb) return ip6_finish_output2(skb); } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -344,12 +344,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; + /* ipv6 conntrack defrag sets max_frag_size + local_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; + if (skb->local_df) + return false; + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) return false; @@ -1225,7 +1229,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int maxnonfragsize, headersize; headersize = sizeof(struct ipv6hdr) + - (opt ? opt->tot_len : 0) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + (dst_allfrag(&rt->dst) ? sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e1df691d78b..f6a66bb4114 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1340,8 +1340,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip6_tnl_parm p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t = NULL; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); switch (cmd) { @@ -1353,11 +1353,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) - t = netdev_priv(dev); ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; @@ -1557,7 +1557,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8737400af0a..8250474ab7d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -700,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct mr6_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; int err; @@ -1633,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; struct flowi6 fl6 = { - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1da0d7..d38e6a8d8b9 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, }; + int err; dst = ip6_route_output(net, skb->sk, &fl6); - if (dst->error) { + err = dst->error; + if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return dst->error; + return err; } /* Drop old route. */ diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index e0983f3648a..790e0c6b19e 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = false; struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .daddr = iph->saddr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5015c50a5ba..004fffb6c22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *, static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); -static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -290,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, - .output = dst_discard, + .output = dst_discard_sk, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -1058,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; if (dst_metrics_read_only(&ort->dst)) new->_metrics = ort->dst._metrics; @@ -1273,6 +1273,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = iph->daddr; @@ -1294,6 +1295,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = msg->dest; @@ -1338,7 +1340,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = IPV6_MIN_MTU; @@ -1348,7 +1350,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return min_t(unsigned int, mtu, IP6_MAX_MTU); } static struct dst_entry *icmp6_dst_gc_list; @@ -1576,7 +1579,7 @@ int ip6_route_add(struct fib6_config *cfg) switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; - rt->dst.output = dst_discard; + rt->dst.output = dst_discard_sk; rt->dst.input = dst_discard; break; case RTN_PROHIBIT: @@ -2128,7 +2131,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); @@ -2139,7 +2142,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_prohibit_out(struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1693c8d885f..e5a453ca302 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -974,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto out; } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, - ttl, df, !net_eq(tunnel->net, dev_net(dev))); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df, + !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; @@ -1126,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip_tunnel_parm p; struct ip_tunnel_prl prl; - struct ip_tunnel *t; - struct net *net = dev_net(dev); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; @@ -1138,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: #endif - t = NULL; if (dev == sitn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); err = -EFAULT; if (cmd == SIOCGETTUNNEL) { @@ -1243,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; if (dev == sitn->fb_tunnel_dev) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); break; @@ -1261,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; switch (cmd) { case SIOCDELPRL: @@ -1291,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(ip6rd))) goto done; - t = netdev_priv(dev); - if (cmd != SIOCDEL6RD) { err = ipip6_tunnel_update_6rd(t, &ip6rd); if (err < 0) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e3770..19ef329bdbf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -163,7 +163,7 @@ static int __xfrm6_output(struct sk_buff *skb) return x->outer_mode->afinfo->output_finish(skb); } -int xfrm6_output(struct sk_buff *skb) +int xfrm6_output(struct sock *sk, struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev, __xfrm6_output); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 01e77b0ae07..8c9d7302c84 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1830,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { + if (msg->tag == IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 47f7a549055..a4e37d7158d 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1131,10 +1131,10 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped) - error = inet6_csk_xmit(skb, NULL); + error = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif - error = ip_queue_xmit(skb, fl); + error = ip_queue_xmit(tunnel->sock, skb, fl); /* Update stats */ if (error >= 0) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0b44d855269..3397fe6897c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -487,7 +487,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m xmit: /* Queue the packet to IP for output */ - rc = ip_queue_xmit(skb, &inet->cork.fl); + rc = ip_queue_xmit(sk, skb, &inet->cork.fl); rcu_read_unlock(); error: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 6dba48efe01..75421f2ba8b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1795,6 +1795,7 @@ int nf_conntrack_init_net(struct net *net) int cpu; atomic_set(&net->ct.count, 0); + seqcount_init(&net->ct.generation); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); if (!net->ct.pcpu_lists) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ccc46fa5edb..58579634427 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1336,6 +1336,9 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) #ifdef CONFIG_NF_NAT_NEEDED int ret; + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 7bd03decd36..825c3e3f830 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -605,32 +605,14 @@ static struct nf_conntrack_helper pptp __read_mostly = { .expect_policy = &pptp_exp_policy, }; -static void nf_conntrack_pptp_net_exit(struct net *net) -{ - nf_ct_gre_keymap_flush(net); -} - -static struct pernet_operations nf_conntrack_pptp_net_ops = { - .exit = nf_conntrack_pptp_net_exit, -}; - static int __init nf_conntrack_pptp_init(void) { - int rv; - - rv = nf_conntrack_helper_register(&pptp); - if (rv < 0) - return rv; - rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops); - if (rv < 0) - nf_conntrack_helper_unregister(&pptp); - return rv; + return nf_conntrack_helper_register(&pptp); } static void __exit nf_conntrack_pptp_fini(void) { nf_conntrack_helper_unregister(&pptp); - unregister_pernet_subsys(&nf_conntrack_pptp_net_ops); } module_init(nf_conntrack_pptp_init); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9d9c0dade60..d5665739e3b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -66,7 +66,7 @@ static inline struct netns_proto_gre *gre_pernet(struct net *net) return net_generic(net, proto_gre_net_id); } -void nf_ct_gre_keymap_flush(struct net *net) +static void nf_ct_gre_keymap_flush(struct net *net) { struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; @@ -78,7 +78,6 @@ void nf_ct_gre_keymap_flush(struct net *net) } write_unlock_bh(&net_gre->keymap_lock); } -EXPORT_SYMBOL(nf_ct_gre_keymap_flush); static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 90998a6ff8b..804105391b9 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - u32 mask; + u32 mask = nft_cmp_fast_mask(priv->len); - mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); if ((data[priv->sreg].data[0] & mask) == priv->data) return; data[NFT_REG_VERDICT].verdict = NFT_BREAK; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e8138da4c14..23ef77c60ff 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -256,15 +256,15 @@ replay: #endif { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(nskb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(skb); } while (skb->len >= nlmsg_total_size(0)) { @@ -368,14 +368,13 @@ done: static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - struct net *net = sock_net(skb->sk); int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { netlink_ack(skb, nlh, -EPERM); return; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 954925db414..e2b3f51c81f 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -128,7 +128,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, BUG_ON(err < 0); desc.len *= BITS_PER_BYTE; - mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + mask = nft_cmp_fast_mask(desc.len); priv->data = data.data[0] & mask; priv->len = desc.len; return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 894cda0206b..81dca96d2be 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1360,7 +1360,72 @@ retry: return err; } -static inline int netlink_capable(const struct socket *sock, unsigned int flag) +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *user_ns, int cap) +{ + return sk_ns_capable(nsp->sk, user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *user_ns, int cap) +{ + return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + +static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); @@ -1428,7 +1493,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -1490,7 +1555,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if ((nladdr->nl_groups || nladdr->nl_pid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) @@ -2096,7 +2161,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -2247,7 +2312,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_group = ffs(addr->nl_groups); err = -EPERM; if ((dst_group || dst_portid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { dst_portid = nlk->dst_portid; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b1dcdb932a8..a3ba3ca0ff9 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -561,7 +561,7 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EOPNOTSUPP; if ((ops->flags & GENL_ADMIN_PERM) && - !capable(CAP_NET_ADMIN)) + !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index a3d6951602d..ebb6e244255 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -174,7 +174,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) skb->local_df = 1; - return iptunnel_xmit(rt, skb, fl.saddr, + return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, false); diff --git a/net/packet/diag.c b/net/packet/diag.c index 533ce4ff108..92f2c7107ee 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + bool may_report_filterinfo, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { @@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && - sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER)) + sock_diag_put_filterinfo(may_report_filterinfo, sk, skb, + PACKET_DIAG_FILTER)) goto out_nlmsg_trim; return nlmsg_end(skb, nlh); @@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; + bool may_report_filterinfo; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); + may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { @@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (sk_diag_fill(sk, skb, req, + may_report_filterinfo, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index dc15f430080..b64151ade6b 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 pnaddr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); @@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 dst; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8a5ba5add4b..648778aef1a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -948,7 +948,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; - if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 29a30a14c31..bdbdb1a7920 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -134,7 +134,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) int err; int tp_created = 0; - if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a0b84e0e22d..400769014bb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1084,7 +1084,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1151,7 +1151,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q, *p; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1490,7 +1490,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index edee03d922e..6e957c3b985 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -553,11 +553,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; - sch_tree_lock(sch); - - if (tb[TCA_HHF_BACKLOG_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); - if (tb[TCA_HHF_QUANTUM]) new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); @@ -567,6 +562,12 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; if (non_hh_quantum > INT_MAX) return -EINVAL; + + sch_tree_lock(sch); + + if (tb[TCA_HHF_BACKLOG_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); + q->quantum = new_quantum; q->hhf_non_hh_weight = new_hhf_non_hh_weight; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 4f6d6f9d127..39579c3e0d1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1395,35 +1395,44 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc) return false; } -/* Update asoc's rwnd for the approximated state in the buffer, - * and check whether SACK needs to be sent. - */ -void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer) +/* Increase asoc's rwnd by len and send any window update SACK if needed. */ +void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) { - int rx_count; struct sctp_chunk *sack; struct timer_list *timer; - if (asoc->ep->rcvbuf_policy) - rx_count = atomic_read(&asoc->rmem_alloc); - else - rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + if (asoc->rwnd_over) { + if (asoc->rwnd_over >= len) { + asoc->rwnd_over -= len; + } else { + asoc->rwnd += (len - asoc->rwnd_over); + asoc->rwnd_over = 0; + } + } else { + asoc->rwnd += len; + } - if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0) - asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1; - else - asoc->rwnd = 0; + /* If we had window pressure, start recovering it + * once our rwnd had reached the accumulated pressure + * threshold. The idea is to recover slowly, but up + * to the initial advertised window. + */ + if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) { + int change = min(asoc->pathmtu, asoc->rwnd_press); + asoc->rwnd += change; + asoc->rwnd_press -= change; + } - pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n", - __func__, asoc, asoc->rwnd, rx_count, - asoc->base.sk->sk_rcvbuf); + pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->a_rwnd); /* Send a window update SACK if the rwnd has increased by at least the * minimum of the association's PMTU and half of the receive buffer. * The algorithm used is similar to the one described in * Section 4.2.3.3 of RFC 1122. */ - if (update_peer && sctp_peer_needs_update(asoc)) { + if (sctp_peer_needs_update(asoc)) { asoc->a_rwnd = asoc->rwnd; pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " @@ -1445,6 +1454,45 @@ void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer) } } +/* Decrease asoc's rwnd by len. */ +void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) +{ + int rx_count; + int over = 0; + + if (unlikely(!asoc->rwnd || asoc->rwnd_over)) + pr_debug("%s: association:%p has asoc->rwnd:%u, " + "asoc->rwnd_over:%u!\n", __func__, asoc, + asoc->rwnd, asoc->rwnd_over); + + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + + /* If we've reached or overflowed our receive buffer, announce + * a 0 rwnd if rwnd would still be positive. Store the + * the potential pressure overflow so that the window can be restored + * back to original value. + */ + if (rx_count >= asoc->base.sk->sk_rcvbuf) + over = 1; + + if (asoc->rwnd >= len) { + asoc->rwnd -= len; + if (over) { + asoc->rwnd_press += asoc->rwnd; + asoc->rwnd = 0; + } + } else { + asoc->rwnd_over = len - asoc->rwnd; + asoc->rwnd = 0; + } + + pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->rwnd_press); +} /* Build the bind address list for the association based on info from the * local endpoint and the remote peer. diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 683c7d1b130..0e8529113dc 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -386,14 +386,13 @@ nomem: */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { - struct net *net = sock_net(asoc->base.sk); struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!net->sctp.auth_enable || !asoc->peer.auth_capable) + if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -440,16 +439,16 @@ struct sctp_shared_key *sctp_auth_get_shkey( */ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) { - struct net *net = sock_net(ep->base.sk); struct crypto_hash *tfm = NULL; __u16 id; - /* if the transforms are already allocted, we are done */ - if (!net->sctp.auth_enable) { + /* If AUTH extension is disabled, we are done */ + if (!ep->auth_enable) { ep->auth_hmacs = NULL; return 0; } + /* If the transforms are already allocated, we are done */ if (ep->auth_hmacs) return 0; @@ -665,12 +664,10 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - struct net *net; if (!asoc) return 0; - net = sock_net(asoc->base.sk); - if (!net->sctp.auth_enable || !asoc->peer.auth_capable) + if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -679,12 +676,10 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - struct net *net; if (!asoc) return 0; - net = sock_net(asoc->base.sk); - if (!net->sctp.auth_enable) + if (!asoc->ep->auth_enable) return 0; return __sctp_auth_cid(chunk, diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8e5fdea0521..3d9f429858d 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -68,7 +68,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (!ep->digest) return NULL; - if (net->sctp.auth_enable) { + ep->auth_enable = net->sctp.auth_enable; + if (ep->auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e1d0fcb028..44cbb54c857 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -491,8 +491,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; + flowi4_update_output(fl4, + asoc->base.sk->sk_bound_dev_if, + RT_CONN_FLAGS(asoc->base.sk), + daddr->v4.sin_addr.s_addr, + laddr->a.v4.sin_addr.s_addr); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; @@ -957,7 +962,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, &transport->fl); + return ip_queue_xmit(&inet->sk, skb, &transport->fl); } static struct sctp_af sctp_af_inet; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 3a1767ef320..fee5552ddf9 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -219,6 +219,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, gfp_t gfp, int vparam_len) { struct net *net = sock_net(asoc->base.sk); + struct sctp_endpoint *ep = asoc->ep; sctp_inithdr_t init; union sctp_params addrs; size_t chunksize; @@ -278,7 +279,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += vparam_len; /* Account for AUTH related parameters */ - if (net->sctp.auth_enable) { + if (ep->auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); @@ -363,7 +364,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, } /* Add SCTP-AUTH chunks to the parameter list */ - if (net->sctp.auth_enable) { + if (ep->auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) @@ -2010,7 +2011,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - if (net->sctp.auth_enable) + if (asoc->ep->auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: @@ -2102,6 +2103,7 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_NO_ERROR - continue with the chunk */ static sctp_ierror_t sctp_verify_param(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, union sctp_params param, sctp_cid_t cid, @@ -2152,7 +2154,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, goto fallthrough; case SCTP_PARAM_RANDOM: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 @@ -2169,7 +2171,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_CHUNKS: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 @@ -2185,7 +2187,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_HMAC_ALGO: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; @@ -2220,10 +2222,9 @@ fallthrough: } /* Verify the INIT packet before we process it. */ -int sctp_verify_init(struct net *net, const struct sctp_association *asoc, - sctp_cid_t cid, - sctp_init_chunk_t *peer_init, - struct sctp_chunk *chunk, +int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, sctp_cid_t cid, + sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **errp) { union sctp_params param; @@ -2264,8 +2265,8 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { - - result = sctp_verify_param(net, asoc, param, cid, chunk, errp); + result = sctp_verify_param(net, ep, asoc, param, cid, + chunk, errp); switch (result) { case SCTP_IERROR_ABORT: case SCTP_IERROR_NOMEM: @@ -2497,6 +2498,7 @@ static int sctp_process_param(struct sctp_association *asoc, struct sctp_af *af; union sctp_addr_param *addr_param; struct sctp_transport *t; + struct sctp_endpoint *ep = asoc->ep; /* We maintain all INIT parameters in network byte order all the * time. This allows us to not worry about whether the parameters @@ -2636,7 +2638,7 @@ do_addr_param: goto fall_through; case SCTP_PARAM_RANDOM: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fall_through; /* Save peer's random parameter */ @@ -2649,7 +2651,7 @@ do_addr_param: break; case SCTP_PARAM_HMAC_ALGO: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fall_through; /* Save peer's HMAC list */ @@ -2665,7 +2667,7 @@ do_addr_param: break; case SCTP_PARAM_CHUNKS: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fall_through; asoc->peer.peer_chunks = kmemdup(param.p, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 5d6883ff00c..fef2acdf4a2 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -496,11 +496,10 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, /* If the transport error count is greater than the pf_retrans * threshold, and less than pathmaxrtx, and if the current state - * is not SCTP_UNCONFIRMED, then mark this transport as Partially - * Failed, see SCTP Quick Failover Draft, section 5.1 + * is SCTP_ACTIVE, then mark this transport as Partially Failed, + * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state != SCTP_PF) && - (transport->state != SCTP_UNCONFIRMED) && + if ((transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 01e002430c8..5170a1ff95a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -357,7 +357,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. @@ -524,7 +524,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { @@ -1430,7 +1430,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. @@ -6178,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * PMTU. In cases, such as loopback, this might be a rather * large spill over. */ - if ((!chunk->data_accepted) && (!asoc->rwnd || + if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over || (datalen > asoc->rwnd + asoc->frag_point))) { /* If this is the next TSN, consider reneging to make diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e13519e9df8..fee06b99a4d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2115,6 +2115,12 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, sctp_skb_pull(skb, copied); skb_queue_head(&sk->sk_receive_queue, skb); + /* When only partial message is copied to the user, increase + * rwnd by that amount. If all the data in the skb is read, + * rwnd is updated when the event is freed. + */ + if (!sctp_ulpevent_is_notification(event)) + sctp_assoc_rwnd_increase(event->asoc, copied); goto out; } else if ((event->msg_flags & MSG_NOTIFICATION) || (event->msg_flags & MSG_EOR)) @@ -3315,10 +3321,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunk val; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) @@ -3335,7 +3341,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, } /* add this chunk id to the endpoint */ - return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk); + return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk); } /* @@ -3348,12 +3354,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_hmacalgo *hmacs; u32 idents; int err; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) @@ -3370,7 +3376,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, goto out; } - err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs); + err = sctp_auth_ep_set_hmacs(ep, hmacs); out: kfree(hmacs); return err; @@ -3386,12 +3392,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkey *authkey; struct sctp_association *asoc; int ret; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen <= sizeof(struct sctp_authkey)) @@ -3412,7 +3418,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, goto out; } - ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); + ret = sctp_auth_set_key(ep, asoc, authkey); out: kzfree(authkey); return ret; @@ -3428,11 +3434,11 @@ static int sctp_setsockopt_active_key(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3444,8 +3450,7 @@ static int sctp_setsockopt_active_key(struct sock *sk, if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc, - val.scact_keynumber); + return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); } /* @@ -3457,11 +3462,11 @@ static int sctp_setsockopt_del_key(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3473,8 +3478,7 @@ static int sctp_setsockopt_del_key(struct sock *sk, if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc, - val.scact_keynumber); + return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); } @@ -5381,16 +5385,16 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; - hmacs = sctp_sk(sk)->ep->auth_hmacs_list; + hmacs = ep->auth_hmacs_list; data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t); if (len < sizeof(struct sctp_hmacalgo) + data_len) @@ -5411,11 +5415,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (len < sizeof(struct sctp_authkeyid)) @@ -5430,7 +5434,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, if (asoc) val.scact_keynumber = asoc->active_key_id; else - val.scact_keynumber = sctp_sk(sk)->ep->active_key_id; + val.scact_keynumber = ep->active_key_id; len = sizeof(struct sctp_authkeyid); if (put_user(len, optlen)) @@ -5444,7 +5448,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5452,7 +5456,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5489,7 +5493,7 @@ num: static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5497,7 +5501,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5514,7 +5518,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, if (asoc) ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; else - ch = sctp_sk(sk)->ep->auth_chunk_list; + ch = ep->auth_chunk_list; if (!ch) goto num; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 35c8923b555..c82fdc1eab7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -64,6 +64,9 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_auth(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); static struct ctl_table sctp_table[] = { { @@ -266,7 +269,7 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_sctp_do_auth, }, { .procname = "addr_scope_policy", @@ -400,6 +403,37 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, return ret; } +static int proc_sctp_do_auth(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + struct ctl_table tbl; + int new_value, ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.auth_enable; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + + if (write) { + struct sock *sk = net->sctp.ctl_sock; + + net->sctp.auth_enable = new_value; + /* Update the value in the control socket */ + lock_sock(sk); + sctp_sk(sk)->ep->auth_enable = new_value; + release_sock(sk); + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table = sctp_net_table; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8d198ae0360..85c64658bd0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event, skb = sctp_event2skb(event); /* Set the owner and charge rwnd for bytes received. */ sctp_ulpevent_set_owner(event, asoc); - sctp_assoc_rwnd_update(asoc, false); + sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb)); if (!skb->data_len) return; @@ -1011,7 +1011,6 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) { struct sk_buff *skb, *frag; unsigned int len; - struct sctp_association *asoc; /* Current stack structures assume that the rcv buffer is * per socket. For UDP style sockets this is not true as @@ -1036,11 +1035,8 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) } done: - asoc = event->asoc; - sctp_association_hold(asoc); + sctp_assoc_rwnd_increase(event->asoc, len); sctp_ulpevent_release_owner(event); - sctp_assoc_rwnd_update(asoc, true); - sctp_association_put(asoc); } static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event) diff --git a/net/socket.c b/net/socket.c index 1b1e7e6a960..abf56b2a14f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1880,8 +1880,8 @@ out: * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, - unsigned int flags) +SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, + unsigned int, flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3aaf73de9e2..ad844d36534 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; - if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) + if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) cmd = TIPC_CMD_NOT_NET_ADMIN; else cmd = req_userhdr->cmd; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5adfd94c5b8..85d232bed87 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = { .fops = &vsock_device_ops, }; -static int __vsock_core_init(void) +int __vsock_core_init(const struct vsock_transport *t, struct module *owner) { - int err; + int err = mutex_lock_interruptible(&vsock_register_mutex); + + if (err) + return err; + + if (transport) { + err = -EBUSY; + goto err_busy; + } + + /* Transport must be the owner of the protocol so that it can't + * unload while there are open sockets. + */ + vsock_proto.owner = owner; + transport = t; vsock_init_tables(); @@ -1951,36 +1965,19 @@ static int __vsock_core_init(void) goto err_unregister_proto; } + mutex_unlock(&vsock_register_mutex); return 0; err_unregister_proto: proto_unregister(&vsock_proto); err_misc_deregister: misc_deregister(&vsock_device); - return err; -} - -int vsock_core_init(const struct vsock_transport *t) -{ - int retval = mutex_lock_interruptible(&vsock_register_mutex); - if (retval) - return retval; - - if (transport) { - retval = -EBUSY; - goto out; - } - - transport = t; - retval = __vsock_core_init(); - if (retval) - transport = NULL; - -out: + transport = NULL; +err_busy: mutex_unlock(&vsock_register_mutex); - return retval; + return err; } -EXPORT_SYMBOL_GPL(vsock_core_init); +EXPORT_SYMBOL_GPL(__vsock_core_init); void vsock_core_exit(void) { @@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.0.0-k"); +MODULE_VERSION("1.0.1.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f02f511b710..c08fbd11cef 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1842,7 +1842,7 @@ purge_queue: xfrm_pol_put(pol); } -static int xdst_queue_output(struct sk_buff *skb) +static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) { unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8f131c10a6f..51398ae6cda 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2377,7 +2377,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || |