diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 32 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 21 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 85 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 20 | ||||
-rw-r--r-- | net/core/netpoll.c | 3 | ||||
-rw-r--r-- | net/core/pktgen.c | 111 | ||||
-rw-r--r-- | net/core/ptp_classifier.c | 64 | ||||
-rw-r--r-- | net/core/request_sock.c | 43 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 101 | ||||
-rw-r--r-- | net/core/timestamping.c | 57 |
11 files changed, 320 insertions, 219 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 7990984ca36..138ab897de7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1085,6 +1085,7 @@ static int dev_get_valid_name(struct net *net, */ int dev_change_name(struct net_device *dev, const char *newname) { + unsigned char old_assign_type; char oldname[IFNAMSIZ]; int err = 0; int ret; @@ -1112,10 +1113,14 @@ int dev_change_name(struct net_device *dev, const char *newname) return err; } + old_assign_type = dev->name_assign_type; + dev->name_assign_type = NET_NAME_RENAMED; + rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); + dev->name_assign_type = old_assign_type; write_seqcount_end(&devnet_rename_seq); return ret; } @@ -1144,6 +1149,8 @@ rollback: write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); + dev->name_assign_type = old_assign_type; + old_assign_type = NET_NAME_RENAMED; goto rollback; } else { pr_err("%s: name change rollback failed: %d\n", @@ -2745,8 +2752,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. - * This permits __QDISC_STATE_RUNNING owner to get the lock more often - * and dequeue packets faster. + * This permits __QDISC___STATE_RUNNING owner to get the lock more + * often and dequeue packets faster. */ contended = qdisc_is_running(q); if (unlikely(contended)) @@ -5438,13 +5445,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ + if ((old_flags ^ flags) & IFF_UP) ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); - if (!ret) - dev_set_rx_mode(dev); - } - if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; unsigned int old_flags = dev->flags; @@ -6444,17 +6447,19 @@ void netdev_freemem(struct net_device *dev) /** * alloc_netdev_mqs - allocate network device - * @sizeof_priv: size of private data to allocate space for - * @name: device name format string - * @setup: callback to initialize device - * @txqs: the number of TX subqueues to allocate - * @rxqs: the number of RX subqueues to allocate + * @sizeof_priv: size of private data to allocate space for + * @name: device name format string + * @name_assign_type: origin of device name + * @setup: callback to initialize device + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use * and performs basic initialization. Also allocates subqueue structs * for each queue on the device. */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs) { @@ -6533,6 +6538,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #endif strcpy(dev->name, name); + dev->name_assign_type = name_assign_type; dev->group = INIT_NETDEV_GROUP; if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; @@ -7101,7 +7107,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { - if (dev->rtnl_link_ops) + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) dev->rtnl_link_ops->dellink(dev, &dev_kill_list); else unregister_netdevice_queue(dev, &dev_kill_list); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index e70301eb7a4..50f9a9db579 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -289,10 +289,8 @@ static int net_dm_cmd_trace(struct sk_buff *skb, switch (info->genlhdr->cmd) { case NET_DM_CMD_START: return set_all_monitor_traces(TRACE_ON); - break; case NET_DM_CMD_STOP: return set_all_monitor_traces(TRACE_OFF); - break; } return -ENOTSUPP; diff --git a/net/core/filter.c b/net/core/filter.c index 1dbf6462f76..b90ae7fb3b8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -84,15 +84,6 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } -static inline void *load_pointer(const struct sk_buff *skb, int k, - unsigned int size, void *buffer) -{ - if (k >= 0) - return skb_header_pointer(skb, k, size, buffer); - - return bpf_internal_load_pointer_neg_helper(skb, k, size); -} - /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff @@ -537,7 +528,7 @@ load_word: * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ - ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be32(ptr); CONT; @@ -547,7 +538,7 @@ load_word: LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ off = IMM; load_half: - ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be16(ptr); CONT; @@ -557,7 +548,7 @@ load_half: LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ off = IMM; load_byte: - ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); if (likely(ptr != NULL)) { BPF_R0 = *(u8 *)ptr; CONT; @@ -1094,7 +1085,7 @@ err: * a cell if not previously written, and we check all branches to be sure * a malicious user doesn't try to abuse us. */ -static int check_load_and_stores(struct sock_filter *filter, int flen) +static int check_load_and_stores(const struct sock_filter *filter, int flen) { u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ int pc, ret = 0; @@ -1227,7 +1218,7 @@ static bool chk_code_allowed(u16 code_to_probe) * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int sk_chk_filter(struct sock_filter *filter, unsigned int flen) +int sk_chk_filter(const struct sock_filter *filter, unsigned int flen) { bool anc_found; int pc; @@ -1237,7 +1228,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) /* Check the filter code now */ for (pc = 0; pc < flen; pc++) { - struct sock_filter *ftest = &filter[pc]; + const struct sock_filter *ftest = &filter[pc]; /* May we actually operate on this code? */ if (!chk_code_allowed(ftest->code)) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 107ed12a532..5f362c1d033 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -80,6 +80,8 @@ ip: case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; + __be32 flow_label; + ipv6: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) @@ -89,6 +91,21 @@ ipv6: flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); + + flow_label = ip6_flowlabel(iph); + if (flow_label) { + /* Awesome, IPv6 packet has a flow label so we can + * use that to represent the ports without any + * further dissection. + */ + flow->n_proto = proto; + flow->ip_proto = ip_proto; + flow->ports = flow_label; + flow->thoff = (u16)nhoff; + + return true; + } + break; } case htons(ETH_P_8021AD): @@ -175,6 +192,7 @@ ipv6: break; } + flow->n_proto = proto; flow->ip_proto = ip_proto; flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; @@ -195,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) return jhash_3words(a, b, c, hashrnd); } -static __always_inline u32 __flow_hash_1word(u32 a) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys) { - __flow_hash_secret_init(); - return jhash_1word(a, hashrnd); + u32 hash; + + /* get a consistent hash (same value on both flow directions) */ + if (((__force u32)keys->dst < (__force u32)keys->src) || + (((__force u32)keys->dst == (__force u32)keys->src) && + ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { + swap(keys->dst, keys->src); + swap(keys->port16[0], keys->port16[1]); + } + + hash = __flow_hash_3words((__force u32)keys->dst, + (__force u32)keys->src, + (__force u32)keys->ports); + if (!hash) + hash = 1; + + return hash; } +u32 flow_hash_from_keys(struct flow_keys *keys) +{ + return __flow_hash_from_keys(keys); +} +EXPORT_SYMBOL(flow_hash_from_keys); + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value @@ -210,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a) void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; - u32 hash; if (!skb_flow_dissect(skb, &keys)) return; @@ -218,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb) if (keys.ports) skb->l4_hash = 1; - /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys.dst < (__force u32)keys.src) || - (((__force u32)keys.dst == (__force u32)keys.src) && - ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { - swap(keys.dst, keys.src); - swap(keys.port16[0], keys.port16[1]); - } - - hash = __flow_hash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports); - if (!hash) - hash = 1; + skb->sw_hash = 1; - skb->hash = hash; + skb->hash = __flow_hash_from_keys(&keys); } EXPORT_SYMBOL(__skb_get_hash); @@ -240,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash); * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ -u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; @@ -260,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, qcount = dev->tc_to_txq[tc].count; } - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol; - hash = __flow_hash_1word(hash); - - return (u16) (((u64) hash * qcount) >> 32) + qoffset; + return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); @@ -338,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) if (map) { if (map->len == 1) queue_index = map->queues[0]; - else { - u32 hash; - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol ^ - skb->hash; - hash = __flow_hash_1word(hash); + else queue_index = map->queues[ - ((u64)hash * map->len) >> 32]; - } + ((u64)skb_get_hash(skb) * map->len) >> 32]; + if (unlikely(queue_index >= dev->real_num_tx_queues)) queue_index = -1; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1cac29ebb05..7752f2ad49a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -112,6 +112,25 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec); NETDEVICE_SHOW_RO(type, fmt_dec); NETDEVICE_SHOW_RO(link_mode, fmt_dec); +static ssize_t format_name_assign_type(const struct net_device *net, char *buf) +{ + return sprintf(buf, fmt_dec, net->name_assign_type); +} + +static ssize_t name_assign_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *net = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (net->name_assign_type != NET_NAME_UNKNOWN) + ret = netdev_show(dev, attr, buf, format_name_assign_type); + + return ret; +} +static DEVICE_ATTR_RO(name_assign_type); + /* use same locking rules as GIFHWADDR ioctl's */ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, + &dev_attr_name_assign_type.attr, &dev_attr_addr_assign_type.attr, &dev_attr_addr_len.attr, &dev_attr_link_mode.attr, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e33937fb32a..907fb5e36c0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -822,7 +822,8 @@ void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); - } + } else + RCU_INIT_POINTER(np->dev->npinfo, NULL); } EXPORT_SYMBOL_GPL(__netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fc17a9d309a..8b849ddfef2 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -69,8 +69,9 @@ * for running devices in the if_list and sends packets until count is 0 it * also the thread checks the thread->control which is used for inter-process * communication. controlling process "posts" operations to the threads this - * way. The if_lock should be possible to remove when add/rem_device is merged - * into this too. + * way. + * The if_list is RCU protected, and the if_lock remains to protect updating + * of if_list, from "add_device" as it invoked from userspace (via proc write). * * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" @@ -208,7 +209,7 @@ #define T_REMDEVALL (1<<2) /* Remove all devs */ #define T_REMDEV (1<<3) /* Remove one dev */ -/* If lock -- can be removed after some work */ +/* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); #define if_unlock(t) spin_unlock(&(t->if_lock)); @@ -241,6 +242,7 @@ struct pktgen_dev { struct proc_dir_entry *entry; /* proc file */ struct pktgen_thread *pg_thread;/* the owner */ struct list_head list; /* chaining in the thread's run-queue */ + struct rcu_head rcu; /* freed by RCU */ int running; /* if false, the test will stop */ @@ -802,7 +804,6 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen) case '\t': case ' ': goto done_str; - break; default: break; } @@ -1737,14 +1738,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) seq_puts(seq, "Running: "); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); seq_puts(seq, "\nStopped: "); - list_for_each_entry(pkt_dev, &t->if_list, list) + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (!pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); @@ -1753,7 +1754,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) else seq_puts(seq, "\nResult: NA\n"); - if_unlock(t); + rcu_read_unlock(); return 0; } @@ -1878,10 +1879,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } @@ -1931,7 +1930,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -1946,6 +1946,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } + rcu_read_unlock(); } } @@ -2997,8 +2998,8 @@ static void pktgen_run(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { /* * setup odev and create initial packet. @@ -3007,18 +3008,18 @@ static void pktgen_run(struct pktgen_thread *t) if (pkt_dev->odev) { pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); + pkt_dev->running = 1; /* Cranke yeself! */ started++; } else strcpy(pkt_dev->result, "Error starting"); } - if_unlock(t); + rcu_read_unlock(); if (started) t->control &= ~(T_STOP); } @@ -3041,27 +3042,25 @@ static int thread_is_running(const struct pktgen_thread *t) { const struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) + if (pkt_dev->running) { + rcu_read_unlock(); return 1; + } + rcu_read_unlock(); return 0; } static int pktgen_wait_thread_run(struct pktgen_thread *t) { - if_lock(t); - while (thread_is_running(t)) { - if_unlock(t); - msleep_interruptible(100); if (signal_pending(current)) goto signal; - if_lock(t); } - if_unlock(t); return 1; signal: return 0; @@ -3166,10 +3165,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) return -EINVAL; } + pkt_dev->running = 0; kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; pkt_dev->stopped_at = ktime_get(); - pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3180,9 +3179,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) { struct pktgen_dev *pkt_dev, *best = NULL; - if_lock(t); - - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; if (best == NULL) @@ -3190,7 +3188,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } - if_unlock(t); + rcu_read_unlock(); + return best; } @@ -3200,13 +3199,13 @@ static void pktgen_stop(struct pktgen_thread *t) func_enter(); - if_lock(t); + rcu_read_lock(); - list_for_each_entry(pkt_dev, &t->if_list, list) { + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); } - if_unlock(t); + rcu_read_unlock(); } /* @@ -3220,8 +3219,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3235,8 +3232,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) break; } - - if_unlock(t); } static void pktgen_rem_all_ifs(struct pktgen_thread *t) @@ -3248,8 +3243,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) /* Remove all devices, free mem */ - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3258,8 +3251,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) pktgen_remove_device(t, cur); } - - if_unlock(t); } static void pktgen_rem_thread(struct pktgen_thread *t) @@ -3407,10 +3398,10 @@ static int pktgen_thread_worker(void *arg) pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); - set_current_state(TASK_INTERRUPTIBLE); - set_freezable(); + __set_current_state(TASK_RUNNING); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3424,8 +3415,6 @@ static int pktgen_thread_worker(void *arg) continue; } - __set_current_state(TASK_RUNNING); - if (likely(pkt_dev)) { pktgen_xmit(pkt_dev); @@ -3456,9 +3445,8 @@ static int pktgen_thread_worker(void *arg) } try_to_freeze(); - - set_current_state(TASK_INTERRUPTIBLE); } + set_current_state(TASK_INTERRUPTIBLE); pr_debug("%s stopping all device\n", t->tsk->comm); pktgen_stop(t); @@ -3485,8 +3473,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, struct pktgen_dev *p, *pkt_dev = NULL; size_t len = strlen(ifname); - if_lock(t); - list_for_each_entry(p, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(p, &t->if_list, list) if (strncmp(p->odevname, ifname, len) == 0) { if (p->odevname[len]) { if (exact || p->odevname[len] != '@') @@ -3496,7 +3484,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, break; } - if_unlock(t); + rcu_read_unlock(); pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); return pkt_dev; } @@ -3510,6 +3498,12 @@ static int add_dev_to_thread(struct pktgen_thread *t, { int rv = 0; + /* This function cannot be called concurrently, as its called + * under pktgen_thread_lock mutex, but it can run from + * userspace on another CPU than the kthread. The if_lock() + * is used here to sync with concurrent instances of + * _rem_dev_from_if_list() invoked via kthread, which is also + * updating the if_list */ if_lock(t); if (pkt_dev->pg_thread) { @@ -3518,9 +3512,9 @@ static int add_dev_to_thread(struct pktgen_thread *t, goto out; } - list_add(&pkt_dev->list, &t->if_list); - pkt_dev->pg_thread = t; pkt_dev->running = 0; + pkt_dev->pg_thread = t; + list_add_rcu(&pkt_dev->list, &t->if_list); out: if_unlock(t); @@ -3675,11 +3669,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, struct list_head *q, *n; struct pktgen_dev *p; + if_lock(t); list_for_each_safe(q, n, &t->if_list) { p = list_entry(q, struct pktgen_dev, list); if (p == pkt_dev) - list_del(&p->list); + list_del_rcu(&p->list); } + if_unlock(t); } static int pktgen_remove_device(struct pktgen_thread *t, @@ -3699,20 +3695,22 @@ static int pktgen_remove_device(struct pktgen_thread *t, pkt_dev->odev = NULL; } - /* And update the thread if_list */ - - _rem_dev_from_if_list(t, pkt_dev); - + /* Remove proc before if_list entry, because add_device uses + * list to determine if interface already exist, avoid race + * with proc_create_data() */ if (pkt_dev->entry) proc_remove(pkt_dev->entry); + /* And update the thread if_list */ + _rem_dev_from_if_list(t, pkt_dev); + #ifdef CONFIG_XFRM free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); if (pkt_dev->page) put_page(pkt_dev->page); - kfree(pkt_dev); + kfree_rcu(pkt_dev, rcu); return 0; } @@ -3812,6 +3810,7 @@ static void __exit pg_cleanup(void) { unregister_netdevice_notifier(&pktgen_notifier_block); unregister_pernet_subsys(&pg_net_ops); + /* Don't need rcu_barrier() due to use of kfree_rcu() */ } module_init(pg_init); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index d3027a73fd4..12ab7b4be60 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -52,14 +52,43 @@ * test_8021q: * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? * ldh [16] ; load inner type - * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ? * ldb [18] ; load payload * and #0x8 ; as we don't have ports here, test * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [18] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x40 ; PTP_CLASS_V2_VLAN + * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 + * ret a ; return PTP class + * + * ; PTP over UDP over IPv4 over 802.1Q over Ethernet + * test_8021q_ipv4: + * jneq #0x800, test_8021q_ipv6 ; ETH_P_IP ? + * ldb [27] ; load proto + * jneq #17, drop_8021q_ipv4 ; IPPROTO_UDP ? + * ldh [24] ; load frag offset field + * jset #0x1fff, drop_8021q_ipv4; don't allow fragments + * ldxb 4*([18]&0xf) ; load IP header len + * ldh [x + 20] ; load UDP dst port + * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? + * ldh [x + 26] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 + * ret a ; return PTP class + * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over UDP over IPv6 over 802.1Q over Ethernet + * test_8021q_ipv6: + * jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ? + * ldb [24] ; load proto + * jneq #17, drop_8021q_ipv6 ; IPPROTO_UDP ? + * ldh [60] ; load UDP dst port + * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? + * ldh [66] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 * ret a ; return PTP class + * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE * * ; PTP over Ethernet * test_ieee1588: @@ -113,16 +142,39 @@ void __init ptp_classifier_init(void) { 0x44, 0, 0, 0x00000020 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, - { 0x15, 0, 9, 0x00008100 }, + { 0x15, 0, 32, 0x00008100 }, { 0x28, 0, 0, 0x00000010 }, - { 0x15, 0, 15, 0x000088f7 }, + { 0x15, 0, 7, 0x000088f7 }, { 0x30, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x00000008 }, - { 0x15, 0, 12, 0x00000000 }, + { 0x15, 0, 35, 0x00000000 }, { 0x28, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000040 }, + { 0x44, 0, 0, 0x00000070 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x0000001b }, + { 0x15, 0, 9, 0x00000011 }, + { 0x28, 0, 0, 0x00000018 }, + { 0x45, 7, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x00000012 }, + { 0x48, 0, 0, 0x00000014 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x48, 0, 0, 0x0000001a }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000050 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 8, 0x000086dd }, + { 0x30, 0, 0, 0x00000018 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x28, 0, 0, 0x0000003c }, + { 0x15, 0, 4, 0x0000013f }, + { 0x28, 0, 0, 0x00000042 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000060 }, { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 7, 0x000088f7 }, { 0x30, 0, 0, 0x0000000e }, { 0x54, 0, 0, 0x00000008 }, diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 467f326126e..04db318e621 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -41,27 +41,27 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) { size_t lopt_size = sizeof(struct listen_sock); - struct listen_sock *lopt; + struct listen_sock *lopt = NULL; nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); nr_table_entries = max_t(u32, nr_table_entries, 8); nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); - if (lopt_size > PAGE_SIZE) + + if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + lopt = kzalloc(lopt_size, GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); + if (!lopt) lopt = vzalloc(lopt_size); - else - lopt = kzalloc(lopt_size, GFP_KERNEL); - if (lopt == NULL) + if (!lopt) return -ENOMEM; - for (lopt->max_qlen_log = 3; - (1 << lopt->max_qlen_log) < nr_table_entries; - lopt->max_qlen_log++); - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; + lopt->max_qlen_log = ilog2(nr_table_entries); write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; @@ -72,22 +72,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, void __reqsk_queue_destroy(struct request_sock_queue *queue) { - struct listen_sock *lopt; - size_t lopt_size; - - /* - * this is an error recovery path only - * no locking needed and the lopt is not NULL - */ - - lopt = queue->listen_opt; - lopt_size = sizeof(struct listen_sock) + - lopt->nr_table_entries * sizeof(struct request_sock *); - - if (lopt_size > PAGE_SIZE) - vfree(lopt); - else - kfree(lopt); + /* This is an error recovery path only, no locking needed */ + kvfree(queue->listen_opt); } static inline struct listen_sock *reqsk_queue_yank_listen_sk( @@ -107,8 +93,6 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - size_t lopt_size = sizeof(struct listen_sock) + - lopt->nr_table_entries * sizeof(struct request_sock *); if (lopt->qlen != 0) { unsigned int i; @@ -125,10 +109,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } WARN_ON(lopt->qlen != 0); - if (lopt_size > PAGE_SIZE) - vfree(lopt); - else - kfree(lopt); + kvfree(lopt); } /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1063996f831..e9918020dbc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -299,7 +299,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops) if (rtnl_link_ops_get(ops->kind)) return -EEXIST; - if (!ops->dellink) + /* The check for setup is here because if ops + * does not have that filled up, it is not possible + * to use the ops for creating device. So do not + * fill up dellink as well. That disables rtnl_dellink. + */ + if (ops->setup && !ops->dellink) ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); @@ -1777,7 +1782,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) return -ENODEV; ops = dev->rtnl_link_ops; - if (!ops) + if (!ops || !ops->dellink) return -EOPNOTSUPP; ops->dellink(dev, &list_kill); @@ -1805,7 +1810,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) EXPORT_SYMBOL(rtnl_configure_link); struct net_device *rtnl_create_link(struct net *net, - char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) + char *ifname, unsigned char name_assign_type, + const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; struct net_device *dev; @@ -1823,8 +1829,8 @@ struct net_device *rtnl_create_link(struct net *net, num_rx_queues = ops->get_num_rx_queues(); err = -ENOMEM; - dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup, - num_tx_queues, num_rx_queues); + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, + ops->setup, num_tx_queues, num_rx_queues); if (!dev) goto err; @@ -1889,6 +1895,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct nlattr *linkinfo[IFLA_INFO_MAX+1]; + unsigned char name_assign_type = NET_NAME_USER; int err; #ifdef CONFIG_MODULES @@ -2038,14 +2045,19 @@ replay: return -EOPNOTSUPP; } - if (!ifname[0]) + if (!ops->setup) + return -EOPNOTSUPP; + + if (!ifname[0]) { snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); + name_assign_type = NET_NAME_ENUM; + } dest_net = rtnl_link_get_net(net, tb); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(dest_net, ifname, ops, tb); + dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -2509,6 +2521,7 @@ skip: int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { int err; @@ -2526,28 +2539,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump); static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - struct net *net = sock_net(skb->sk); struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *bdev = NULL; + struct net_device *br_dev = NULL; + const struct net_device_ops *ops = NULL; + const struct net_device_ops *cops = NULL; + struct ifinfomsg *ifm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + int brport_idx = 0; + int br_idx = 0; + int idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->priv_flags & IFF_BRIDGE_PORT) { - struct net_device *br_dev; - const struct net_device_ops *ops; + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, + ifla_policy) == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } - br_dev = netdev_master_upper_dev_get(dev); - ops = br_dev->netdev_ops; - if (ops->ndo_fdb_dump) - idx = ops->ndo_fdb_dump(skb, cb, dev, idx); + brport_idx = ifm->ifi_index; + + if (br_idx) { + br_dev = __dev_get_by_index(net, br_idx); + if (!br_dev) + return -ENODEV; + + ops = br_dev->netdev_ops; + bdev = br_dev; + } + + for_each_netdev(net, dev) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; + + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + + bdev = dev; + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; + + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + + bdev = br_dev; + cops = ops; + } + + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) + idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + idx); } + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, + idx); + + cops = NULL; } - rcu_read_unlock(); cb->args[0] = idx; return skb->len; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 6521dfd8b7c..a8770391ea5 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -43,31 +43,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) return; type = classify(skb); + if (type == PTP_CLASS_NONE) + return; + + phydev = skb->dev->phydev; + if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; - switch (type) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case PTP_CLASS_V2_VLAN: - phydev = skb->dev->phydev; - if (likely(phydev->drv->txtstamp)) { - if (!atomic_inc_not_zero(&sk->sk_refcnt)) - return; - - clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - sock_put(sk); - return; - } - - clone->sk = sk; - phydev->drv->txtstamp(phydev, clone, type); + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + sock_put(sk); + return; } - break; - default: - break; + + clone->sk = sk; + phydev->drv->txtstamp(phydev, clone, type); } } EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); @@ -114,20 +105,12 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) __skb_pull(skb, ETH_HLEN); - switch (type) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case PTP_CLASS_V2_VLAN: - phydev = skb->dev->phydev; - if (likely(phydev->drv->rxtstamp)) - return phydev->drv->rxtstamp(phydev, skb, type); - break; - default: - break; - } + if (type == PTP_CLASS_NONE) + return false; + + phydev = skb->dev->phydev; + if (likely(phydev->drv->rxtstamp)) + return phydev->drv->rxtstamp(phydev, skb, type); return false; } |