From c7ac8679bec9397afe8918f788cbcef88c38da54 Mon Sep 17 00:00:00 2001 From: Greg Rose <gregory.v.rose@intel.com> Date: Fri, 10 Jun 2011 01:27:09 +0000 Subject: rtnetlink: Compute and store minimum ifinfo dump size The message size allocated for rtnl ifinfo dumps was limited to a single page. This is not enough for additional interface info available with devices that support SR-IOV and caused a bug in which VF info would not be displayed if more than approximately 40 VFs were created per interface. Implement a new function pointer for the rtnl_register service that will calculate the amount of data required for the ifinfo dump and allocate enough data to satisfy the request. Signed-off-by: Greg Rose <gregory.v.rose@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- net/core/rtnetlink.c | 60 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'net/core/rtnetlink.c') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index abd936d8a71..a798fc6f2aa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -56,9 +56,11 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; }; static DEFINE_MUTEX(rtnl_mutex); +static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) return tab ? tab[msgindex].dumpit : NULL; } +static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) +{ + struct rtnl_link *tab; + + if (protocol <= RTNL_FAMILY_MAX) + tab = rtnl_msg_handlers[protocol]; + else + tab = NULL; + + if (tab == NULL || tab[msgindex].calcit == NULL) + tab = rtnl_msg_handlers[PF_UNSPEC]; + + return tab ? tab[msgindex].calcit : NULL; +} + /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @calcit: Function pointer to calc size of dump message * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) * Returns 0 on success or a negative error code. */ int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { struct rtnl_link *tab; int msgindex; @@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype, if (dumpit) tab[msgindex].dumpit = dumpit; + if (calcit) + tab[msgindex].calcit = calcit; + return 0; } EXPORT_SYMBOL_GPL(__rtnl_register); @@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register); * of memory implies no sense in continuing. */ void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { - if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0) + if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d\n", protocol, msgtype); @@ -1818,6 +1841,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } +static u16 rtnl_calcit(struct sk_buff *skb) +{ + return min_ifinfo_dump_size; +} + static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; @@ -1847,11 +1875,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + size_t if_info_size; - skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); if (skb == NULL) goto errout; + min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ @@ -1902,14 +1933,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; + u16 min_dump_alloc = 0; dumpit = rtnl_get_dumpit(family, type); if (dumpit == NULL) return -EOPNOTSUPP; + calcit = rtnl_get_calcit(family, type); + if (calcit) + min_dump_alloc = calcit(skb); __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); + err = netlink_dump_start(rtnl, skb, nlh, dumpit, + NULL, min_dump_alloc); rtnl_lock(); return err; } @@ -2019,12 +2056,13 @@ void __init rtnetlink_init(void) netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); + rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, + rtnl_dump_ifinfo, rtnl_calcit); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); } -- cgit v1.2.3-70-g09d2 From 4e985adaa504c1c1a05c8e013777ea0791a17b4d Mon Sep 17 00:00:00 2001 From: Thomas Graf <tgraf@infradead.org> Date: Tue, 21 Jun 2011 03:11:20 +0000 Subject: rtnl: provide link dump consistency info This patch adds a change sequence counter to each net namespace which is bumped whenever a netdevice is added or removed from the list. If such a change occurred while a link dump took place, the dump will have the NLM_F_DUMP_INTR flag set in the first message which has been interrupted and in all subsequent messages of the same dump. Note that links may still be modified or renamed while a dump is taking place but we can guarantee for userspace to receive a complete list of links and not miss any. Testing: I have added 500 VLAN netdevices to make sure the dump is split over multiple messages. Then while continuously dumping links in one process I also continuously deleted and re-added a dummy netdevice in another process. Multiple dumps per seconds have had the NLM_F_DUMP_INTR flag set. I guess we can wait for Johannes patch to hit net-next via the wireless tree. I just wanted to give this some testing right away. Signed-off-by: Thomas Graf <tgraf@infradead.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/net_namespace.h | 1 + net/core/dev.c | 10 ++++++++++ net/core/net_namespace.c | 1 + net/core/rtnetlink.c | 4 ++++ 4 files changed, 16 insertions(+) (limited to 'net/core/rtnetlink.c') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index aef430d779b..1ab1aec209a 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -65,6 +65,7 @@ struct net { struct list_head dev_base_head; struct hlist_head *dev_name_head; struct hlist_head *dev_index_head; + unsigned int dev_base_seq; /* protected by rtnl_mutex */ /* core fib_rules */ struct list_head rules_ops; diff --git a/net/core/dev.c b/net/core/dev.c index 6b6ef14b42f..4577e6711ec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static inline void dev_base_seq_inc(struct net *net) +{ + while (++net->dev_base_seq == 0); +} + static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); @@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev) hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(net); + return 0; } @@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev) hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(dev_net(dev)); } /* diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ea489db1bc2..5bbdbf0d366 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -129,6 +129,7 @@ static __net_init int setup_net(struct net *net) atomic_set(&net->count, 1); atomic_set(&net->passive, 1); + net->dev_base_seq = 1; #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a798fc6f2aa..99d9e953fe3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1032,6 +1032,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_idx = cb->args[1]; rcu_read_lock(); + cb->seq = net->dev_base_seq; + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -1043,6 +1045,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } -- cgit v1.2.3-70-g09d2