diff options
-rw-r--r-- | net/openvswitch/flow.c | 119 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 10 | ||||
-rw-r--r-- | net/openvswitch/flow_table.c | 46 | ||||
-rw-r--r-- | net/openvswitch/flow_table.h | 2 |
4 files changed, 122 insertions, 55 deletions
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index aad7a8da70b..432f04d5c89 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -65,8 +65,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { struct flow_stats *stats; __be16 tcp_flags = 0; + int node = numa_node_id(); - stats = this_cpu_ptr(flow->stats); + stats = rcu_dereference(flow->stats[node]); if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && @@ -76,68 +77,102 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - spin_lock(&stats->lock); + /* Check if already have node-specific stats. */ + if (likely(stats)) { + spin_lock(&stats->lock); + /* Mark if we write on the pre-allocated stats. */ + if (node == 0 && unlikely(flow->stats_last_writer != node)) + flow->stats_last_writer = node; + } else { + stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ + spin_lock(&stats->lock); + + /* If the current NUMA-node is the only writer on the + * pre-allocated stats keep using them. + */ + if (unlikely(flow->stats_last_writer != node)) { + /* A previous locker may have already allocated the + * stats, so we need to check again. If node-specific + * stats were already allocated, we update the pre- + * allocated stats as we have already locked them. + */ + if (likely(flow->stats_last_writer != NUMA_NO_NODE) + && likely(!rcu_dereference(flow->stats[node]))) { + /* Try to allocate node-specific stats. */ + struct flow_stats *new_stats; + + new_stats = + kmem_cache_alloc_node(flow_stats_cache, + GFP_THISNODE | + __GFP_NOMEMALLOC, + node); + if (likely(new_stats)) { + new_stats->used = jiffies; + new_stats->packet_count = 1; + new_stats->byte_count = skb->len; + new_stats->tcp_flags = tcp_flags; + spin_lock_init(&new_stats->lock); + + rcu_assign_pointer(flow->stats[node], + new_stats); + goto unlock; + } + } + flow->stats_last_writer = node; + } + } + stats->used = jiffies; stats->packet_count++; stats->byte_count += skb->len; stats->tcp_flags |= tcp_flags; - spin_unlock(&stats->lock); -} - -static void stats_read(struct flow_stats *stats, - struct ovs_flow_stats *ovs_stats, - unsigned long *used, __be16 *tcp_flags) -{ - spin_lock(&stats->lock); - if (!*used || time_after(stats->used, *used)) - *used = stats->used; - *tcp_flags |= stats->tcp_flags; - ovs_stats->n_packets += stats->packet_count; - ovs_stats->n_bytes += stats->byte_count; +unlock: spin_unlock(&stats->lock); } void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu; + int node; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - local_bh_disable(); - - for_each_possible_cpu(cpu) { - struct flow_stats *stats; + for_each_node(node) { + struct flow_stats *stats = rcu_dereference(flow->stats[node]); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - stats_read(stats, ovs_stats, used, tcp_flags); + if (stats) { + /* Local CPU may write on non-local stats, so we must + * block bottom-halves here. + */ + spin_lock_bh(&stats->lock); + if (!*used || time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock_bh(&stats->lock); + } } - - local_bh_enable(); -} - -static void stats_reset(struct flow_stats *stats) -{ - spin_lock(&stats->lock); - stats->used = 0; - stats->packet_count = 0; - stats->byte_count = 0; - stats->tcp_flags = 0; - spin_unlock(&stats->lock); } void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu; - - local_bh_disable(); - - for_each_possible_cpu(cpu) - stats_reset(per_cpu_ptr(flow->stats, cpu)); - - local_bh_enable(); + int node; + + for_each_node(node) { + struct flow_stats *stats = rcu_dereference(flow->stats[node]); + + if (stats) { + spin_lock_bh(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock_bh(&stats->lock); + } + } } static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9c0dd8aa311..ddcebc53224 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -159,12 +159,18 @@ struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; - + int stats_last_writer; /* NUMA-node id of the last writer on + * 'stats[0]'. + */ struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct flow_stats __percpu *stats; + struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + * is allocated at flow creation time, + * the rest are allocated on demand + * while holding the 'stats[0].lock'. + */ }; struct arp_eth_header { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index aa92da23053..d8ef37b937b 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -48,6 +48,7 @@ #define REHASH_INTERVAL (10 * 60 * HZ) static struct kmem_cache *flow_cache; +struct kmem_cache *flow_stats_cache __read_mostly; static u16 range_n_bytes(const struct sw_flow_key_range *range) { @@ -75,7 +76,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; - int cpu; + struct flow_stats *stats; + int node; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) @@ -83,17 +85,22 @@ struct sw_flow *ovs_flow_alloc(void) flow->sf_acts = NULL; flow->mask = NULL; + flow->stats_last_writer = NUMA_NO_NODE; - flow->stats = alloc_percpu(struct flow_stats); - if (!flow->stats) + /* Initialize the default stat node. */ + stats = kmem_cache_alloc_node(flow_stats_cache, + GFP_KERNEL | __GFP_ZERO, 0); + if (!stats) goto err; - for_each_possible_cpu(cpu) { - struct flow_stats *cpu_stats; + spin_lock_init(&stats->lock); + + RCU_INIT_POINTER(flow->stats[0], stats); + + for_each_node(node) + if (node != 0) + RCU_INIT_POINTER(flow->stats[node], NULL); - cpu_stats = per_cpu_ptr(flow->stats, cpu); - spin_lock_init(&cpu_stats->lock); - } return flow; err: kmem_cache_free(flow_cache, flow); @@ -130,8 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { + int node; + kfree((struct sf_flow_acts __force *)flow->sf_acts); - free_percpu(flow->stats); + for_each_node(node) + if (flow->stats[node]) + kmem_cache_free(flow_stats_cache, + (struct flow_stats __force *)flow->stats[node]); kmem_cache_free(flow_cache, flow); } @@ -586,16 +598,28 @@ int ovs_flow_init(void) BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) + + (num_possible_nodes() + * sizeof(struct flow_stats *)), + 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; + flow_stats_cache + = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (flow_stats_cache == NULL) { + kmem_cache_destroy(flow_cache); + flow_cache = NULL; + return -ENOMEM; + } + return 0; } /* Uninitializes the flow module. */ void ovs_flow_exit(void) { + kmem_cache_destroy(flow_stats_cache); kmem_cache_destroy(flow_cache); } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index c26c59a7ab5..ca8a5820f61 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -52,6 +52,8 @@ struct flow_table { unsigned int count; }; +extern struct kmem_cache *flow_stats_cache; + int ovs_flow_init(void); void ovs_flow_exit(void); |