From 23dabf88abb48a866fdb19ee08ebcf1ddd9b1840 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 27 Mar 2014 12:35:23 -0700 Subject: openvswitch: Remove 5-tuple optimization. The 5-tuple optimization becomes unnecessary with a later per-NUMA node stats patch. Remove it first to make the changes easier to grasp. Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a3276e3c4fe..8867d7e2d65 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -524,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(false); + flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -782,7 +782,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; - bool exact_5tuple; int error; /* Extract key. */ @@ -791,7 +790,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, &exact_5tuple, + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; @@ -830,7 +829,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Allocate flow. */ - flow = ovs_flow_alloc(!exact_5tuple); + flow = ovs_flow_alloc(); if (IS_ERR(flow)) { error = PTR_ERR(flow); goto err_unlock_ovs; @@ -914,7 +913,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -968,7 +967,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; -- cgit v1.2.3-70-g09d2 From be52c9e96a6657d117bb0ec6e11438fb246af5c7 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 09:59:40 -0700 Subject: openvswitch: Avoid assigning a NULL pointer to flow actions. Flow SET can accept an empty set of actions, with the intended semantics of leaving existing actions unmodified. This seems to have been brokin after OVS 1.7, as we have assigned the flow's actions pointer to NULL in this case, but we never check for the NULL pointer later on. This patch restores the intended behavior and documents it in the include/linux/openvswitch.h. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 4 +++- net/openvswitch/datapath.c | 14 ++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 970553cbbc8..0b979ee4bfc 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -395,7 +395,9 @@ struct ovs_key_nd { * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying * the actions to take for packets that match the key. Always present in * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for - * %OVS_FLOW_CMD_SET requests. + * %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without + * %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions, + * an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given. * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this * flow. Present in notifications if the stats would be nonzero. Ignored in * requests. diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8867d7e2d65..90a1e5e6628 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -810,6 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_kfree; } } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { + /* OVS_FLOW_CMD_NEW must have actions. */ error = -EINVAL; goto error; } @@ -849,8 +850,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ - struct sw_flow_actions *old_acts; - /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -866,11 +865,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - /* Update actions. */ - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ -- cgit v1.2.3-70-g09d2 From bb6f9a708d4067713afae2e9eb2637f6b4c01ecb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 11:32:17 -0700 Subject: openvswitch: Clarify locking. Remove unnecessary locking from functions that are always called with appropriate locking. Signed-off-by: Jarno Rajahalme Signed-off-by: Thomas Graf Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 13 +++++++------ net/openvswitch/flow.c | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 90a1e5e6628..138ea0c9e1b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -173,6 +173,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } +/* Called with ovs_mutex or RCU read lock. */ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; @@ -652,7 +653,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } -/* Called with ovs_mutex. */ +/* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) @@ -743,6 +744,7 @@ error: return err; } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, struct genl_info *info) { @@ -753,6 +755,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, return genlmsg_new_unicast(len, info, GFP_KERNEL); } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, struct genl_info *info, @@ -1094,6 +1097,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } +/* Called with ovs_mutex or RCU read lock. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1109,9 +1113,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, ovs_header->dp_ifindex = get_dpifindex(dp); - rcu_read_lock(); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); - rcu_read_unlock(); if (err) goto nla_put_failure; @@ -1136,6 +1138,7 @@ error: return -EMSGSIZE; } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, struct genl_info *info, u8 cmd) { @@ -1154,7 +1157,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, return skb; } -/* Called with ovs_mutex. */ +/* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1166,10 +1169,8 @@ static struct datapath *lookup_datapath(struct net *net, else { struct vport *vport; - rcu_read_lock(); vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; - rcu_read_unlock(); } return dp ? dp : ERR_PTR(-ENODEV); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 6d8d2da0a8e..1019fc1db06 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -122,6 +122,7 @@ unlock: spin_unlock(&stats->lock); } +/* Called with ovs_mutex. */ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { @@ -132,7 +133,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); for_each_node(node) { - struct flow_stats *stats = rcu_dereference(flow->stats[node]); + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { /* Local CPU may write on non-local stats, so we must -- cgit v1.2.3-70-g09d2 From fb5d1e9e127ad1542e5db20cd8620a1509baef69 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 13:13:14 -0700 Subject: openvswitch: Build flow cmd netlink reply only if needed. Use netlink_has_listeners() and NLM_F_ECHO flag to determine if a reply is needed or not for OVS_FLOW_CMD_NEW, OVS_FLOW_CMD_SET, or OVS_FLOW_CMD_DEL. Currently, OVS userspace does not request a reply for OVS_FLOW_CMD_NEW, but usually does for OVS_FLOW_CMD_DEL, as stats may have changed. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 70 +++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 22 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 138ea0c9e1b..cb5d64a5c75 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -62,6 +62,15 @@ int ovs_net_id __read_mostly; +/* Check if need to build a reply message. + * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ +static bool ovs_must_notify(struct genl_info *info, + const struct genl_multicast_group *grp) +{ + return info->nlhdr->nlmsg_flags & NLM_F_ECHO || + netlink_has_listeners(genl_info_net(info)->genl_sock, 0); +} + static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { @@ -746,27 +755,36 @@ error: /* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, - struct genl_info *info) + struct genl_info *info, + bool always) { + struct sk_buff *skb; size_t len; + if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + return NULL; + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - return genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + return skb; } /* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, struct genl_info *info, - u8 cmd) + u8 cmd, bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info); - if (!skb) - return ERR_PTR(-ENOMEM); + skb = ovs_flow_cmd_alloc_info(flow, info, always); + if (!skb || IS_ERR(skb)) + return skb; retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, info->snd_seq, 0, cmd); @@ -850,7 +868,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, + OVS_FLOW_CMD_NEW, false); } else { /* We found a matching flow. */ /* Bail out if we're not allowed to modify an existing flow. @@ -876,7 +895,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, + OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) @@ -884,11 +904,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } ovs_unlock(); - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); + if (reply) { + if (!IS_ERR(reply)) + ovs_notify(&dp_flow_genl_family, reply, info); + else + genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, + 0, PTR_ERR(reply)); + } + return 0; err_flow_free: @@ -935,7 +958,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -982,22 +1005,25 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info); - if (!reply) { - err = -ENOMEM; + reply = ovs_flow_cmd_alloc_info(flow, info, false); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); goto unlock; } ovs_flow_tbl_remove(&dp->table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); - + if (reply) { + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + BUG_ON(err < 0); + } ovs_flow_free(flow, true); ovs_unlock(); - ovs_notify(&dp_flow_genl_family, reply, info); + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); return 0; unlock: ovs_unlock(); -- cgit v1.2.3-70-g09d2 From 6093ae9abac18871afd0bbc5cf093dff53112fcb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:13:32 -0700 Subject: openvswitch: Minimize dp and vport critical sections. Move most memory allocations away from the ovs_mutex critical sections. vport allocations still happen while the lock is taken, as changing that would require major refactoring. Also, vports are created very rarely so it should not matter. Change ovs_dp_cmd_get() now only takes the rcu_read_lock(), rather than ovs_lock(), as nothing need to be changed. This was done by ovs_vport_cmd_get() already. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 218 +++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 108 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index cb5d64a5c75..f3bcdac80bd 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1164,23 +1164,9 @@ error: return -EMSGSIZE; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, - struct genl_info *info, u8 cmd) +static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) { - struct sk_buff *skb; - int retval; - - skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } - return skb; + return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); } /* Called with rcu_read_lock or ovs_mutex. */ @@ -1233,12 +1219,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_unlock_ovs; + goto err_free_reply; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1273,6 +1261,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_change(dp, a); + /* So far only local changes have been made, now need the lock. */ + ovs_lock(); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1291,10 +1282,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto err_destroy_local_port; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); @@ -1304,9 +1294,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_local_port: - ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: + ovs_unlock(); kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -1315,8 +1304,8 @@ err_destroy_table: err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); -err_unlock_ovs: - ovs_unlock(); +err_free_reply: + kfree_skb(reply); err: return err; } @@ -1354,16 +1343,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto unlock; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_DEL); + BUG_ON(err < 0); __dp_destroy(dp); ovs_unlock(); @@ -1371,8 +1363,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1382,29 +1376,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; ovs_dp_change(dp, info->attrs); - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, - 0, err); - err = 0; - goto unlock; - } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1414,24 +1409,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + + rcu_read_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); - goto unlock; - } - - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; + goto err_unlock_free; } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); + rcu_read_unlock(); - ovs_unlock(); return genlmsg_reply(reply, info); -unlock: - ovs_unlock(); +err_unlock_free: + rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1544,7 +1541,12 @@ error: return err; } -/* Called with ovs_mutex or RCU read lock. */ +static struct sk_buff *ovs_vport_cmd_alloc_info(void) +{ + return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +} + +/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { @@ -1606,33 +1608,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) u32 port_no; int err; - err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) - goto exit; + return -EINVAL; + + port_no = a[OVS_VPORT_ATTR_PORT_NO] + ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + if (port_no >= DP_MAX_PORTS) + return -EFBIG; + + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) - goto exit_unlock; - - if (a[OVS_VPORT_ATTR_PORT_NO]) { - port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); - - err = -EFBIG; - if (port_no >= DP_MAX_PORTS) - goto exit_unlock; + goto exit_unlock_free; + if (port_no) { vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) - goto exit_unlock; + goto exit_unlock_free; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; - goto exit_unlock; + goto exit_unlock_free; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) @@ -1650,22 +1654,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; - err = 0; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - ovs_dp_detach_port(vport); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); -exit: + kfree_skb(reply); return err; } @@ -1676,28 +1677,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; - goto exit_unlock; - } - - reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!reply) { - err = -ENOMEM; - goto exit_unlock; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_free; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_UPCALL_PID]) @@ -1711,10 +1710,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_free: - kfree_skb(reply); -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1725,30 +1723,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; - goto exit_unlock; + goto exit_unlock_free; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - - err = 0; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_DEL); + BUG_ON(err < 0); ovs_dp_detach_port(vport); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1760,24 +1761,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; - - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - + goto exit_unlock_free; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); rcu_read_unlock(); return genlmsg_reply(reply, info); -exit_unlock: +exit_unlock_free: rcu_read_unlock(); + kfree_skb(reply); return err; } -- cgit v1.2.3-70-g09d2 From 0e9796b4af9ef490e203158cb738a5a4986eb75c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:28:07 -0700 Subject: openvswitch: Reduce locking requirements. Reduce and clarify locking requirements for ovs_flow_cmd_alloc_info(), ovs_flow_cmd_fill_info() and ovs_flow_cmd_build_info(). A datapath pointer is available only when holding a lock. Change ovs_flow_cmd_fill_info() and ovs_flow_cmd_build_info() to take a dp_ifindex directly, rather than a datapath pointer that is then (only) used to get the dp_ifindex. This is useful, since the dp_ifindex is available even when the datapath pointer is not, both before and after taking a lock, which makes further critical section reduction possible. Make ovs_flow_cmd_alloc_info() take an 'acts' argument instead a 'flow' pointer. This allows some future patches to do the allocation before acquiring the flow pointer. The locking requirements after this patch are: ovs_flow_cmd_alloc_info(): May be called without locking, must not be called while holding the RCU read lock (due to memory allocation). If 'acts' belong to a flow in the flow table, however, then the caller must hold ovs_mutex. ovs_flow_cmd_fill_info(): Either ovs_mutex or RCU read lock must be held. ovs_flow_cmd_build_info(): This calls both of the above, so the caller must hold ovs_mutex. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 54 +++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 25 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f3bcdac80bd..949fc7fadaf 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -663,7 +663,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) } /* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -680,7 +680,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!ovs_header) return -EMSGSIZE; - ovs_header->dp_ifindex = get_dpifindex(dp); + ovs_header->dp_ifindex = dp_ifindex; /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); @@ -703,6 +703,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); + if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; @@ -730,9 +731,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_ovsl(flow->sf_acts); - err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) nla_nest_end(skb, start); else { @@ -753,41 +754,40 @@ error: return err; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, +/* May not be called with RCU read lock. */ +static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, struct genl_info *info, bool always) { struct sk_buff *skb; - size_t len; if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) return NULL; - len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - - skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); return skb; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, - struct datapath *dp, - struct genl_info *info, - u8 cmd, bool always) +/* Called with ovs_mutex. */ +static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, + int dp_ifindex, + struct genl_info *info, u8 cmd, + bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info, always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + always); if (!skb || IS_ERR(skb)) return skb; - retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, - info->snd_seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, + info->snd_portid, info->snd_seq, 0, + cmd); BUG_ON(retval < 0); return skb; } @@ -868,8 +868,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info, - OVS_FLOW_CMD_NEW, false); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); } else { /* We found a matching flow. */ /* Bail out if we're not allowed to modify an existing flow. @@ -895,8 +895,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info, - OVS_FLOW_CMD_NEW, false); + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) @@ -958,7 +959,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW, true); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, + OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1005,7 +1007,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info, false); + reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + false); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1014,7 +1017,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(&dp->table, flow); if (reply) { - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); @@ -1054,7 +1058,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!flow) break; - if (ovs_flow_cmd_fill_info(flow, dp, skb, + if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) -- cgit v1.2.3-70-g09d2 From aed067783e505bf66dcafa8647d08619eb5b1c55 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:40:13 -0700 Subject: openvswitch: Minimize ovs_flow_cmd_del critical section. ovs_flow_cmd_del() now allocates reply (if needed) after the flow has already been removed from the flow table. If the reply allocation fails, a netlink error is signaled with netlink_set_err(), as is already done in ovs_flow_cmd_new_or_set() in the similar situation. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 53 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 949fc7fadaf..f28beda8426 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -984,50 +984,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int err; + if (likely(a[OVS_FLOW_ATTR_KEY])) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + if (unlikely(err)) + return err; + } + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) { + if (unlikely(!dp)) { err = -ENODEV; goto unlock; } - if (!a[OVS_FLOW_ATTR_KEY]) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); - if (err) - goto unlock; - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - false); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; - } - ovs_flow_tbl_remove(&dp->table, flow); + ovs_unlock(); - if (reply) { - err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, - reply, info->snd_portid, - info->snd_seq, 0, - OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); + reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, + info, false); + if (likely(reply)) { + if (likely(!IS_ERR(reply))) { + rcu_read_lock(); /*To keep RCU checker happy. */ + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + rcu_read_unlock(); + BUG_ON(err < 0); + + ovs_notify(&dp_flow_genl_family, reply, info); + } else { + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); + } } - ovs_flow_free(flow, true); - ovs_unlock(); - if (reply) - ovs_notify(&dp_flow_genl_family, reply, info); + ovs_flow_free(flow, true); return 0; unlock: ovs_unlock(); -- cgit v1.2.3-70-g09d2 From 37bdc87ba00dadd0156db77ba48224d042202435 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:53:51 -0700 Subject: openvswitch: Split ovs_flow_cmd_new_or_set(). Following patch will be easier to reason about with separate ovs_flow_cmd_new() and ovs_flow_cmd_set() functions. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 160 ++++++++++++++++++++++++++++++++------------- 1 file changed, 116 insertions(+), 44 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f28beda8426..22665a6144f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -792,16 +792,16 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, return skb; } -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key, masked_key; - struct sw_flow *flow = NULL; + struct sw_flow *flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *acts; struct sw_flow_match match; int error; @@ -817,23 +817,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; /* Validate actions. */ - if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error; + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); - if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; - } - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { - /* OVS_FLOW_CMD_NEW must have actions. */ - error = -EINVAL; + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree; } ovs_lock(); @@ -845,11 +843,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Check if this is a duplicate flow */ flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - /* Bail out if we're not allowed to create a new flow. */ - error = -ENOENT; - if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) - goto err_unlock_ovs; - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -867,11 +860,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) acts = NULL; goto err_flow_free; } - - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); } else { - /* We found a matching flow. */ + struct sw_flow_actions *old_acts; + /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -879,30 +870,113 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) * gets fixed. */ error = -EEXIST; - if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) goto err_unlock_ovs; /* The unmasked key has to be the same for flow updates. */ if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - /* Update actions, if present. */ - if (acts) { - struct sw_flow_actions *old_acts; + /* Update actions. */ + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + ovs_unlock(); + + if (reply) { + if (!IS_ERR(reply)) + ovs_notify(&dp_flow_genl_family, reply, info); + else + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, + PTR_ERR(reply)); + } + return 0; + +err_flow_free: + ovs_flow_free(flow, false); +err_unlock_ovs: + ovs_unlock(); +err_kfree: + kfree(acts); +error: + return error; +} - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); +static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key, masked_key; + struct sw_flow *flow; + struct sw_flow_mask mask; + struct sk_buff *reply = NULL; + struct datapath *dp; + struct sw_flow_actions *acts = NULL; + struct sw_flow_match match; + int error; + + /* Extract key. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_KEY]) + goto error; + + ovs_match_init(&match, &key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto error; + + /* Validate actions. */ + if (a[OVS_FLOW_ATTR_ACTIONS]) { + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree; } + } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + error = -ENODEV; + if (!dp) + goto err_unlock_ovs; + + /* Check that the flow exists. */ + flow = ovs_flow_tbl_lookup(&dp->table, &key); + error = -ENOENT; + if (!flow) + goto err_unlock_ovs; + + /* The unmasked key has to be the same for flow updates. */ + error = -EEXIST; + if (!ovs_flow_cmp_unmasked_key(flow, &match)) + goto err_unlock_ovs; - /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) - ovs_flow_stats_clear(flow); + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); } + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + /* Clear stats. */ + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); ovs_unlock(); if (reply) { @@ -915,8 +989,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); err_kfree: @@ -1078,7 +1150,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set + .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1094,7 +1166,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set, + .doit = ovs_flow_cmd_set, }, }; -- cgit v1.2.3-70-g09d2 From 893f139b9a6c00c097b9082a90f3041cfb3a0d20 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 15:22:25 -0700 Subject: openvswitch: Minimize ovs_flow_cmd_new|set critical sections. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 192 +++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 76 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 22665a6144f..6bc9d94c8df 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -796,8 +796,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow_key key, masked_key; - struct sw_flow *flow; + struct sw_flow *flow, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; @@ -805,61 +804,77 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int error; - /* Extract key. */ + /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; - ovs_match_init(&match, &key, &mask); + /* Most of the time we need to allocate a new flow, do it before + * locking. + */ + new_flow = ovs_flow_alloc(); + if (IS_ERR(new_flow)) { + error = PTR_ERR(new_flow); + goto error; + } + + /* Extract key. */ + ovs_match_init(&match, &new_flow->unmasked_key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) - goto error; + goto err_kfree_flow; - /* Validate actions. */ - error = -EINVAL; - if (!a[OVS_FLOW_ATTR_ACTIONS]) - goto error; + ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + /* Validate actions. */ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) - goto error; + goto err_kfree_flow; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, + 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; + goto err_kfree_acts; + } + + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow) { - /* Allocate flow. */ - flow = ovs_flow_alloc(); - if (IS_ERR(flow)) { - error = PTR_ERR(flow); - goto err_unlock_ovs; - } - - flow->key = masked_key; - flow->unmasked_key = key; - rcu_assign_pointer(flow->sf_acts, acts); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (likely(!flow)) { + rcu_assign_pointer(new_flow->sf_acts, acts); /* Put flow in bucket. */ - error = ovs_flow_tbl_insert(&dp->table, flow, &mask); - if (error) { + error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); + if (unlikely(error)) { acts = NULL; - goto err_flow_free; + goto err_unlock_ovs; } + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(new_flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); } else { struct sw_flow_actions *old_acts; @@ -869,39 +884,45 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ - error = -EEXIST; - if (info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE + | NLM_F_EXCL))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); - } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); - ovs_unlock(); + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); - if (reply) { - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, - PTR_ERR(reply)); + ovs_nla_free_flow_actions(old_acts); + ovs_flow_free(new_flow, false); } + + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); +err_kfree_flow: + ovs_flow_free(new_flow, false); error: return error; } @@ -915,7 +936,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply = NULL; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; int error; @@ -942,56 +963,75 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; + goto err_kfree_acts; + } + } + + /* Can allocate before locking if have acts. */ + if (acts) { + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check that the flow exists. */ flow = ovs_flow_tbl_lookup(&dp->table, &key); - error = -ENOENT; - if (!flow) + if (unlikely(!flow)) { + error = -ENOENT; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - error = -EEXIST; - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions, if present. */ - if (acts) { - struct sw_flow_actions *old_acts; - + if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + } else { + /* Could not alloc without acts before locking. */ + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + if (unlikely(IS_ERR(reply))) { + error = PTR_ERR(reply); + goto err_unlock_ovs; + } } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) ovs_flow_stats_clear(flow); ovs_unlock(); - if (reply) { - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); - } + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); + if (old_acts) + ovs_nla_free_flow_actions(old_acts); return 0; err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); error: return error; -- cgit v1.2.3-70-g09d2 From 0c200ef94c9492205e18a18c25650cf27939889c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 6 May 2014 16:44:50 -0700 Subject: openvswitch: Simplify genetlink code. Following patch get rid of struct genl_family_and_ops which is redundant due to changes to struct genl_family. Signed-off-by: Kyle Mestery Acked-by: Kyle Mestery Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 185 ++++++++++++++++++++++----------------------- 1 file changed, 90 insertions(+), 95 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6bc9d94c8df..0d407bca81e 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -44,11 +44,11 @@ #include #include #include -#include #include #include #include -#include +#include +#include #include #include #include @@ -62,6 +62,22 @@ int ovs_net_id __read_mostly; +static struct genl_family dp_packet_genl_family; +static struct genl_family dp_flow_genl_family; +static struct genl_family dp_datapath_genl_family; + +static struct genl_multicast_group ovs_dp_flow_multicast_group = { + .name = OVS_FLOW_MCGROUP +}; + +static struct genl_multicast_group ovs_dp_datapath_multicast_group = { + .name = OVS_DATAPATH_MCGROUP +}; + +struct genl_multicast_group ovs_dp_vport_multicast_group = { + .name = OVS_VPORT_MCGROUP +}; + /* Check if need to build a reply message. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ static bool ovs_must_notify(struct genl_info *info, @@ -272,16 +288,6 @@ out: u64_stats_update_end(&stats->syncp); } -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_PACKET_FAMILY, - .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { @@ -600,6 +606,18 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_PACKET_FAMILY, + .version = OVS_PACKET_VERSION, + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_packet_genl_ops, + .n_ops = ARRAY_SIZE(dp_packet_genl_ops), +}; + static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { @@ -631,26 +649,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, } } -static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { - [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, -}; - -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_FLOW_FAMILY, - .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_flow_multicast_group = { - .name = OVS_FLOW_MCGROUP -}; - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1186,7 +1184,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_flow_genl_ops[] = { +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { + [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; + +static struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, @@ -1210,24 +1214,18 @@ static const struct genl_ops dp_flow_genl_ops[] = { }, }; -static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { - [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, -}; - -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_flow_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_DATAPATH_FAMILY, - .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX, + .name = OVS_FLOW_FAMILY, + .version = OVS_FLOW_VERSION, + .maxattr = OVS_FLOW_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_datapath_multicast_group = { - .name = OVS_DATAPATH_MCGROUP + .ops = dp_flow_genl_ops, + .n_ops = ARRAY_SIZE(dp_flow_genl_ops), + .mcgrps = &ovs_dp_flow_multicast_group, + .n_mcgrps = 1, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1574,7 +1572,13 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_datapath_genl_ops[] = { +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { + [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, @@ -1598,27 +1602,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { - [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, - [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, -}; - -struct genl_family dp_vport_genl_family = { +static struct genl_family dp_datapath_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_VPORT_FAMILY, - .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX, + .name = OVS_DATAPATH_FAMILY, + .version = OVS_DATAPATH_VERSION, + .maxattr = OVS_DP_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_vport_multicast_group = { - .name = OVS_VPORT_MCGROUP + .ops = dp_datapath_genl_ops, + .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .mcgrps = &ovs_dp_datapath_multicast_group, + .n_mcgrps = 1, }; /* Called with ovs_mutex or RCU read lock. */ @@ -1941,7 +1936,16 @@ out: return skb->len; } -static const struct genl_ops dp_vport_genl_ops[] = { +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { + [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, + [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; + +static struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, @@ -1965,26 +1969,25 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family_and_ops { - struct genl_family *family; - const struct genl_ops *ops; - int n_ops; - const struct genl_multicast_group *group; +struct genl_family dp_vport_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_VPORT_FAMILY, + .version = OVS_VPORT_VERSION, + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_vport_genl_ops, + .n_ops = ARRAY_SIZE(dp_vport_genl_ops), + .mcgrps = &ovs_dp_vport_multicast_group, + .n_mcgrps = 1, }; -static const struct genl_family_and_ops dp_genl_families[] = { - { &dp_datapath_genl_family, - dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), - &ovs_dp_datapath_multicast_group }, - { &dp_vport_genl_family, - dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), - &ovs_dp_vport_multicast_group }, - { &dp_flow_genl_family, - dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), - &ovs_dp_flow_multicast_group }, - { &dp_packet_genl_family, - dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), - NULL }, +static struct genl_family * const dp_genl_families[] = { + &dp_datapath_genl_family, + &dp_vport_genl_family, + &dp_flow_genl_family, + &dp_packet_genl_family, }; static void dp_unregister_genl(int n_families) @@ -1992,33 +1995,25 @@ static void dp_unregister_genl(int n_families) int i; for (i = 0; i < n_families; i++) - genl_unregister_family(dp_genl_families[i].family); + genl_unregister_family(dp_genl_families[i]); } static int dp_register_genl(void) { - int n_registered; int err; int i; - n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { - const struct genl_family_and_ops *f = &dp_genl_families[i]; - f->family->ops = f->ops; - f->family->n_ops = f->n_ops; - f->family->mcgrps = f->group; - f->family->n_mcgrps = f->group ? 1 : 0; - err = genl_register_family(f->family); + err = genl_register_family(dp_genl_families[i]); if (err) goto error; - n_registered++; } return 0; error: - dp_unregister_genl(n_registered); + dp_unregister_genl(i); return err; } -- cgit v1.2.3-70-g09d2 From ad55200734c65a3ec5d0c39d6ea904008baea536 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 6 May 2014 16:48:38 -0700 Subject: openvswitch: Fix tracking of flags seen in TCP flows. Flow statistics need to take into account the TCP flags from the packet currently being processed (in 'key'), not the TCP flags matched by the flow found in the kernel flow table (in 'flow'). This bug made the Open vSwitch userspace fin_timeout action have no effect in many cases. This bug is introduced by commit 88d73f6c411ac2f0578 (openvswitch: Use TCP flags in the flow key for stats.) Reported-by: Len Gao Signed-off-by: Ben Pfaff Acked-by: Jarno Rajahalme Acked-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 4 ++-- net/openvswitch/flow.c | 4 ++-- net/openvswitch/flow.h | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0d407bca81e..a863678c50a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -276,7 +276,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; - ovs_flow_stats_update(OVS_CB(skb)->flow, skb); + ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb); ovs_execute_actions(dp, skb); stats_counter = &stats->n_hit; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 334751cb152..d07ab538fc9 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -61,10 +61,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, + struct sk_buff *skb) { struct flow_stats *stats; - __be16 tcp_flags = flow->key.tp.flags; int node = numa_node_id(); stats = rcu_dereference(flow->stats[node]); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ac395d2cd82..5e5aaed3a85 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -180,7 +180,8 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, + struct sk_buff *); void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); void ovs_flow_stats_clear(struct sw_flow *); -- cgit v1.2.3-70-g09d2 From 4a46b24e147dfa9b858026da02cad0bdd4e149d2 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Mon, 30 Jun 2014 20:30:29 -0700 Subject: openvswitch: Use exact lookup for flow_get and flow_del. Due to the race condition in userspace, there is chance that two overlapping megaflows could be installed in datapath. And this causes userspace unable to delete the less inclusive megaflow flow even after it timeout, since the flow_del logic will stop at the first match of masked flow. This commit fixes the bug by making the kernel flow_del and flow_get logic check all masks in that case. Introduced by 03f0d916a (openvswitch: Mega flow implementation). Signed-off-by: Alex Wang Acked-by: Andy Zhou Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 23 +++++++++++------------ net/openvswitch/flow_table.c | 16 ++++++++++++++++ net/openvswitch/flow_table.h | 3 ++- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a863678c50a..9db4bf6740d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -889,8 +889,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* The unmasked key has to be the same for flow updates. */ if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - error = -EEXIST; - goto err_unlock_ovs; + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (!flow) { + error = -ENOENT; + goto err_unlock_ovs; + } } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); @@ -981,16 +984,12 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; } /* Check that the flow exists. */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!flow)) { error = -ENOENT; goto err_unlock_ovs; } - /* The unmasked key has to be the same for flow updates. */ - if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { - error = -EEXIST; - goto err_unlock_ovs; - } + /* Update actions, if present. */ if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); @@ -1063,8 +1062,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (!flow) { err = -ENOENT; goto unlock; } @@ -1113,8 +1112,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) { + flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); + if (unlikely(!flow)) { err = -ENOENT; goto unlock; } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 574c3abc9b3..cf2d853646f 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -456,6 +456,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); } +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + struct sw_flow_match *match) +{ + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); + struct sw_flow_mask *mask; + struct sw_flow *flow; + + /* Always called under ovs-mutex. */ + list_for_each_entry(mask, &tbl->mask_list, list) { + flow = masked_flow_lookup(ti, match->key, mask); + if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ + return flow; + } + return NULL; +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct sw_flow_mask *mask; diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index ca8a5820f61..5918bff7f3f 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -76,7 +76,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, u32 *n_mask_hit); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); - +struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, + struct sw_flow_match *match); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); -- cgit v1.2.3-70-g09d2