From ff491a7334acfd74e515c896632e37e401f52676 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Feb 2009 23:56:36 -0800 Subject: netlink: change return-value logic of netlink_broadcast() Currently, netlink_broadcast() reports errors to the caller if no messages at all were delivered: 1) If, at least, one message has been delivered correctly, returns 0. 2) Otherwise, if no messages at all were delivered due to skb_clone() failure, return -ENOBUFS. 3) Otherwise, if there are no listeners, return -ESRCH. With this patch, the caller knows if the delivery of any of the messages to the listeners have failed: 1) If it fails to deliver any message (for whatever reason), return -ENOBUFS. 2) Otherwise, if all messages were delivered OK, returns 0. 3) Otherwise, if no listeners, return -ESRCH. In the current ctnetlink code and in Netfilter in general, we can add reliable logging and connection tracking event delivery by dropping the packets whose events were not successfully delivered over Netlink. Of course, this option would be settable via /proc as this approach reduces performance (in terms of filtered connections per seconds by a stateful firewall) but providing reliable logging and event delivery (for conntrackd) in return. This patch also changes some clients of netlink_broadcast() that may report ENOBUFS errors via printk. This error handling is not of any help. Instead, the userspace daemons that are listening to those netlink messages should resync themselves with the kernel-side if they hit ENOBUFS. BTW, netlink_broadcast() clients include those that call cn_netlink_send(), nlmsg_multicast() and genlmsg_multicast() since they internally call netlink_broadcast() and return its error value. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/netlink') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9eb895c7a2a..6ee69c27f80 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -950,6 +950,7 @@ struct netlink_broadcast_data { u32 pid; u32 group; int failure; + int delivery_failure; int congested; int delivered; gfp_t allocation; @@ -999,6 +1000,7 @@ static inline int do_one_broadcast(struct sock *sk, p->skb2 = NULL; } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); + p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; @@ -1025,6 +1027,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, info.pid = pid; info.group = group; info.failure = 0; + info.delivery_failure = 0; info.congested = 0; info.delivered = 0; info.allocation = allocation; @@ -1045,13 +1048,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, if (info.skb2) kfree_skb(info.skb2); + if (info.delivery_failure || info.failure) + return -ENOBUFS; + if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) yield(); return 0; } - if (info.failure) - return -ENOBUFS; return -ESRCH; } EXPORT_SYMBOL(netlink_broadcast); -- cgit v1.2.3-70-g09d2 From be0c22a46cfb79ab2342bb28fde99afa94ef868e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Feb 2009 01:40:43 +0000 Subject: netlink: add NETLINK_BROADCAST_ERROR socket option This patch adds NETLINK_BROADCAST_ERROR which is a netlink socket option that the listener can set to make netlink_broadcast() return errors in the delivery to the caller. This option is useful if the caller of netlink_broadcast() do something with the result of the message delivery, like in ctnetlink where it drops a network packet if the event delivery failed, this is used to enable reliable logging and state-synchronization. If this socket option is not set, netlink_broadcast() only reports ESRCH errors and silently ignore ENOBUFS errors, which is what most netlink_broadcast() callers should do. This socket option is based on a suggestion from Patrick McHardy. Patrick McHardy can exchange this patch for a beer from me ;). Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + net/netlink/af_netlink.c | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'net/netlink') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 51b09a1f46c..1e6bf995435 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -103,6 +103,7 @@ struct nlmsgerr #define NETLINK_ADD_MEMBERSHIP 1 #define NETLINK_DROP_MEMBERSHIP 2 #define NETLINK_PKTINFO 3 +#define NETLINK_BROADCAST_ERROR 4 struct nl_pktinfo { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ee69c27f80..ed587be1e1c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -85,6 +85,7 @@ struct netlink_sock { #define NETLINK_KERNEL_SOCKET 0x1 #define NETLINK_RECV_PKTINFO 0x2 +#define NETLINK_BROADCAST_SEND_ERROR 0x4 static inline struct netlink_sock *nlk_sk(struct sock *sk) { @@ -995,12 +996,15 @@ static inline int do_one_broadcast(struct sock *sk, netlink_overrun(sk); /* Clone failed. Notify ALL listeners. */ p->failure = 1; + if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + p->delivery_failure = 1; } else if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); - p->delivery_failure = 1; + if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; @@ -1048,7 +1052,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, if (info.skb2) kfree_skb(info.skb2); - if (info.delivery_failure || info.failure) + if (info.delivery_failure) return -ENOBUFS; if (info.delivered) { @@ -1163,6 +1167,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, err = 0; break; } + case NETLINK_BROADCAST_ERROR: + if (val) + nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; + else + nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1195,6 +1206,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_BROADCAST_ERROR: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; + if (put_user(len, optlen) || + put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } -- cgit v1.2.3-70-g09d2 From 1ce85fe402137824246bad03ff85f3913d565c17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Feb 2009 23:18:28 -0800 Subject: netlink: change nlmsg_notify() return value logic This patch changes the return value of nlmsg_notify() as follows: If NETLINK_BROADCAST_ERROR is set by any of the listeners and an error in the delivery happened, return the broadcast error; else if there are no listeners apart from the socket that requested a change with the echo flag, return the result of the unicast notification. Thus, with this patch, the unicast notification is handled in the same way of a broadcast listener that has set the NETLINK_BROADCAST_ERROR socket flag. This patch is useful in case that the caller of nlmsg_notify() wants to know the result of the delivery of a netlink notification (including the broadcast delivery) and take any action in case that the delivery failed. For example, ctnetlink can drop packets if the event delivery failed to provide reliable logging and state-synchronization at the cost of dropping packets. This patch also modifies the rtnetlink code to ignore the return value of rtnl_notify() in all callers. The function rtnl_notify() (before this patch) returned the error of the unicast notification which makes rtnl_set_sk_err() reports errors to all listeners. This is not of any help since the origin of the change (the socket that requested the echoing) notices the ENOBUFS error if the notification fails and should resync itself. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 ++-- net/bridge/br_netlink.c | 3 ++- net/core/fib_rules.c | 3 ++- net/core/neighbour.c | 3 ++- net/core/rtnetlink.c | 9 +++++---- net/decnet/dn_dev.c | 3 ++- net/decnet/dn_table.c | 3 ++- net/ipv4/devinet.c | 3 ++- net/ipv4/fib_semantics.c | 5 +++-- net/ipv6/addrconf.c | 9 ++++++--- net/ipv6/ndisc.c | 6 +----- net/ipv6/route.c | 5 +++-- net/netlink/af_netlink.c | 14 ++++++++++---- net/phonet/pn_netlink.c | 5 +++-- 14 files changed, 45 insertions(+), 30 deletions(-) (limited to 'net/netlink') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1e5f6730ff3..35a07c830f7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -622,8 +622,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); -extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags); +extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, + u32 group, struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ba7be195803..fcffb3fb117 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -98,7 +98,8 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 32b3a0152d7..98691e1466b 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, goto errout; } - err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, ops->nlgroup, err); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 278a142d104..e1144cb94b9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2534,7 +2534,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 790dd205bb5..d78030f88bd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } -int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) +void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; int report = 0; @@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - return nlmsg_notify(rtnl, skb, pid, group, report, flags); + nlmsg_notify(rtnl, skb, pid, group, report, flags); } void rtnl_set_sk_err(struct net *net, u32 group, int error) @@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index daf2b98b15f..e457769bf7a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -769,7 +769,8 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 69ad9280c69..67054b0d550 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -375,7 +375,8 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d519a6a6672..126bb911880 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1216,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4817dea3bc7..f831df50090 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, - info->nlh, GFP_KERNEL); + rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 03e2a1ad71e..f8f76d6e21c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3638,7 +3638,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); @@ -3849,7 +3850,8 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); @@ -3919,7 +3921,8 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3cd83b85e9e..9f061d1adbc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1095,11 +1095,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) &ipv6_hdr(ra)->saddr); nlmsg_end(skb, nlh); - err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, - GFP_ATOMIC); - if (err < 0) - goto errout; - + rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3d486a3eda..1394ddb6e35 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2400,8 +2400,9 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ed587be1e1c..2760b62dc2c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1760,12 +1760,18 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, exclude_pid = pid; } - /* errors reported via destination sk->sk_err */ - nlmsg_multicast(sk, skb, exclude_pid, group, flags); + /* errors reported via destination sk->sk_err, but propagate + * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ + err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); } - if (report) - err = nlmsg_unicast(sk, skb, pid); + if (report) { + int err2; + + err2 = nlmsg_unicast(sk, skb, pid); + if (!err || err == -ESRCH) + err = err2; + } return err; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 1ceea1f9241..cec4e595168 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -47,8 +47,9 @@ static void rtmsg_notify(int event, struct net_device *dev, u8 addr) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); -- cgit v1.2.3-70-g09d2 From 91744f6559393697e13bf0f9f3f35f884e2520f9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:34:41 +0000 Subject: netlink: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/netlink') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2760b62dc2c..e57d700bf6d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1049,8 +1049,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, netlink_unlock_table(); - if (info.skb2) - kfree_skb(info.skb2); + kfree_skb(info.skb2); if (info.delivery_failure) return -ENOBUFS; @@ -1542,8 +1541,7 @@ EXPORT_SYMBOL(netlink_set_nonroot); static void netlink_destroy_callback(struct netlink_callback *cb) { - if (cb->skb) - kfree_skb(cb->skb); + kfree_skb(cb->skb); kfree(cb); } -- cgit v1.2.3-70-g09d2 From e9cc8bddaea3944fabfebb968bc88d603239beed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 4 Mar 2009 14:53:30 +0800 Subject: netlink: Move netlink attribute parsing support to lib Netlink attribute parsing may be used even if CONFIG_NET is not set. Move it from net/netlink to lib and control its inclusion based on the new config symbol CONFIG_NLATTR, which is selected by CONFIG_NET. Signed-off-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Herbert Xu --- lib/Kconfig | 6 + lib/Makefile | 2 + lib/nlattr.c | 473 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/Kconfig | 1 + net/netlink/Makefile | 2 +- net/netlink/attr.c | 473 --------------------------------------------------- 6 files changed, 483 insertions(+), 474 deletions(-) create mode 100644 lib/nlattr.c delete mode 100644 net/netlink/attr.c (limited to 'net/netlink') diff --git a/lib/Kconfig b/lib/Kconfig index 03c2c24b908..cea9e30a88f 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -174,4 +174,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS depends on EXPERIMENTAL && BROKEN +# +# Netlink attribute parsing support is select'ed if needed +# +config NLATTR + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded2..b2c09da02ca 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -84,6 +84,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o +obj-$(CONFIG_NLATTR) += nlattr.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/nlattr.c b/lib/nlattr.c new file mode 100644 index 00000000000..56c3ce7fe29 --- /dev/null +++ b/lib/nlattr.c @@ -0,0 +1,473 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf + * Alexey Kuznetsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { + [NLA_U8] = sizeof(u8), + [NLA_U16] = sizeof(u16), + [NLA_U32] = sizeof(u32), + [NLA_U64] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + +static int validate_nla(struct nlattr *nla, int maxtype, + const struct nla_policy *policy) +{ + const struct nla_policy *pt; + int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); + + if (type <= 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + BUG_ON(pt->type > NLA_TYPE_MAX); + + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; + + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; + + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + case NLA_BINARY: + if (pt->len && attrlen > pt->len) + return -ERANGE; + break; + + case NLA_NESTED_COMPAT: + if (attrlen < pt->len) + return -ERANGE; + if (attrlen < NLA_ALIGN(pt->len)) + break; + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) + return -ERANGE; + nla = nla_data(nla) + NLA_ALIGN(pt->len); + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) + return -ERANGE; + break; + case NLA_NESTED: + /* a nested attributes is allowed to be empty; if its not, + * it must have a size of at least NLA_HDRLEN. + */ + if (attrlen == 0) + break; + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } + + return 0; +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + nla_for_each_attr(nla, head, len, rem) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + err = 0; +errout: + return err; +} + +/** + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessable via the attribute type. Attributes with a type + * exceeding maxtype will be silently ignored for backwards compatibility + * reasons. policy may be set to NULL if no validation is required. + * + * Returns 0 on success or a negative error code. + */ +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type > 0 && type <= maxtype) { + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + tb[type] = nla; + } + } + + if (unlikely(rem > 0)) + printk(KERN_WARNING "netlink: %d bytes leftover after parsing " + "attributes.\n", rem); + + err = 0; +errout: + return err; +} + +/** + * nla_find - Find a specific attribute in a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @attrtype: type of attribute to look for + * + * Returns the first attribute in the stream matching the specified type. + */ +struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +{ + struct nlattr *nla; + int rem; + + nla_for_each_attr(nla, head, len, rem) + if (nla_type(nla) == attrtype) + return nla; + + return NULL; +} + +/** + * nla_strlcpy - Copy string attribute payload into a sized buffer + * @dst: where to copy the string to + * @nla: attribute to copy the string from + * @dstsize: size of destination buffer + * + * Copies at most dstsize - 1 bytes into the destination buffer. + * The result is always a valid NUL-terminated string. Unlike + * strlcpy the destination buffer is always padded out. + * + * Returns the length of the source buffer. + */ +size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) +{ + size_t srclen = nla_len(nla); + char *src = nla_data(nla); + + if (srclen > 0 && src[srclen - 1] == '\0') + srclen--; + + if (dstsize > 0) { + size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; + + memset(dst, 0, dstsize); + memcpy(dst, src, len); + } + + return srclen; +} + +/** + * nla_memcpy - Copy a netlink attribute into another memory area + * @dest: where to copy to memcpy + * @src: netlink attribute to copy from + * @count: size of the destination area + * + * Note: The number of bytes copied is limited by the length of + * attribute's payload. memcpy + * + * Returns the number of bytes copied. + */ +int nla_memcpy(void *dest, const struct nlattr *src, int count) +{ + int minlen = min_t(int, count, nla_len(src)); + + memcpy(dest, nla_data(src), minlen); + + return minlen; +} + +/** + * nla_memcmp - Compare an attribute with sized memory area + * @nla: netlink attribute + * @data: memory area + * @size: size of memory area + */ +int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size) +{ + int d = nla_len(nla) - size; + + if (d == 0) + d = memcmp(nla_data(nla), data, size); + + return d; +} + +/** + * nla_strcmp - Compare a string attribute against a string + * @nla: netlink string attribute + * @str: another string + */ +int nla_strcmp(const struct nlattr *nla, const char *str) +{ + int len = strlen(str) + 1; + int d = nla_len(nla) - len; + + if (d == 0) + d = memcmp(nla_data(nla), str, len); + + return d; +} + +/** + * __nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + struct nlattr *nla; + + nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); + nla->nla_type = attrtype; + nla->nla_len = nla_attr_size(attrlen); + + memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); + + return nla; +} + +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + +/** + * nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return NULL; + + return __nla_reserve(skb, attrtype, attrlen); +} + +/** + * nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + +/** + * __nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nlattr *nla; + + nla = __nla_reserve(skb, attrtype, attrlen); + memcpy(nla_data(nla), data, attrlen); +} + +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} + +/** + * nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return -EMSGSIZE; + + __nla_put(skb, attrtype, attrlen, data); + return 0; +} + +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -EMSGSIZE; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} + +/** + * nla_append - Add a netlink attribute without header or padding + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_append(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -EMSGSIZE; + + memcpy(skb_put(skb, attrlen), data, attrlen); + return 0; +} + +EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(nla_parse); +EXPORT_SYMBOL(nla_find); +EXPORT_SYMBOL(nla_strlcpy); +EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); +EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); +EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); +EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); +EXPORT_SYMBOL(nla_memcpy); +EXPORT_SYMBOL(nla_memcmp); +EXPORT_SYMBOL(nla_strcmp); +EXPORT_SYMBOL(nla_append); diff --git a/net/Kconfig b/net/Kconfig index cdb8fdef6c4..eab40a48135 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -4,6 +4,7 @@ menuconfig NET bool "Networking support" + select NLATTR ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even diff --git a/net/netlink/Makefile b/net/netlink/Makefile index e3589c2de49..bdd6ddf4e95 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -2,4 +2,4 @@ # Makefile for the netlink driver. # -obj-y := af_netlink.o attr.o genetlink.o +obj-y := af_netlink.o genetlink.o diff --git a/net/netlink/attr.c b/net/netlink/attr.c deleted file mode 100644 index 56c3ce7fe29..00000000000 --- a/net/netlink/attr.c +++ /dev/null @@ -1,473 +0,0 @@ -/* - * NETLINK Netlink attributes - * - * Authors: Thomas Graf - * Alexey Kuznetsov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { - [NLA_U8] = sizeof(u8), - [NLA_U16] = sizeof(u16), - [NLA_U32] = sizeof(u32), - [NLA_U64] = sizeof(u64), - [NLA_NESTED] = NLA_HDRLEN, -}; - -static int validate_nla(struct nlattr *nla, int maxtype, - const struct nla_policy *policy) -{ - const struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); - - if (type <= 0 || type > maxtype) - return 0; - - pt = &policy[type]; - - BUG_ON(pt->type > NLA_TYPE_MAX); - - switch (pt->type) { - case NLA_FLAG: - if (attrlen > 0) - return -ERANGE; - break; - - case NLA_NUL_STRING: - if (pt->len) - minlen = min_t(int, attrlen, pt->len + 1); - else - minlen = attrlen; - - if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) - return -EINVAL; - /* fall through */ - - case NLA_STRING: - if (attrlen < 1) - return -ERANGE; - - if (pt->len) { - char *buf = nla_data(nla); - - if (buf[attrlen - 1] == '\0') - attrlen--; - - if (attrlen > pt->len) - return -ERANGE; - } - break; - - case NLA_BINARY: - if (pt->len && attrlen > pt->len) - return -ERANGE; - break; - - case NLA_NESTED_COMPAT: - if (attrlen < pt->len) - return -ERANGE; - if (attrlen < NLA_ALIGN(pt->len)) - break; - if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) - return -ERANGE; - nla = nla_data(nla) + NLA_ALIGN(pt->len); - if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) - return -ERANGE; - break; - case NLA_NESTED: - /* a nested attributes is allowed to be empty; if its not, - * it must have a size of at least NLA_HDRLEN. - */ - if (attrlen == 0) - break; - default: - if (pt->len) - minlen = pt->len; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; - - if (attrlen < minlen) - return -ERANGE; - } - - return 0; -} - -/** - * nla_validate - Validate a stream of attributes - * @head: head of attribute stream - * @len: length of attribute stream - * @maxtype: maximum attribute type to be expected - * @policy: validation policy - * - * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. - * - * Returns 0 on success or a negative error code. - */ -int nla_validate(struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - nla_for_each_attr(nla, head, len, rem) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - err = 0; -errout: - return err; -} - -/** - * nla_parse - Parse a stream of attributes into a tb buffer - * @tb: destination array with maxtype+1 elements - * @maxtype: maximum attribute type to be expected - * @head: head of attribute stream - * @len: length of attribute stream - * @policy: validation policy - * - * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessable via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. - * - * Returns 0 on success or a negative error code. - */ -int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - const struct nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type > 0 && type <= maxtype) { - if (policy) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - tb[type] = nla; - } - } - - if (unlikely(rem > 0)) - printk(KERN_WARNING "netlink: %d bytes leftover after parsing " - "attributes.\n", rem); - - err = 0; -errout: - return err; -} - -/** - * nla_find - Find a specific attribute in a stream of attributes - * @head: head of attribute stream - * @len: length of attribute stream - * @attrtype: type of attribute to look for - * - * Returns the first attribute in the stream matching the specified type. - */ -struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) -{ - struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) - if (nla_type(nla) == attrtype) - return nla; - - return NULL; -} - -/** - * nla_strlcpy - Copy string attribute payload into a sized buffer - * @dst: where to copy the string to - * @nla: attribute to copy the string from - * @dstsize: size of destination buffer - * - * Copies at most dstsize - 1 bytes into the destination buffer. - * The result is always a valid NUL-terminated string. Unlike - * strlcpy the destination buffer is always padded out. - * - * Returns the length of the source buffer. - */ -size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) -{ - size_t srclen = nla_len(nla); - char *src = nla_data(nla); - - if (srclen > 0 && src[srclen - 1] == '\0') - srclen--; - - if (dstsize > 0) { - size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; - - memset(dst, 0, dstsize); - memcpy(dst, src, len); - } - - return srclen; -} - -/** - * nla_memcpy - Copy a netlink attribute into another memory area - * @dest: where to copy to memcpy - * @src: netlink attribute to copy from - * @count: size of the destination area - * - * Note: The number of bytes copied is limited by the length of - * attribute's payload. memcpy - * - * Returns the number of bytes copied. - */ -int nla_memcpy(void *dest, const struct nlattr *src, int count) -{ - int minlen = min_t(int, count, nla_len(src)); - - memcpy(dest, nla_data(src), minlen); - - return minlen; -} - -/** - * nla_memcmp - Compare an attribute with sized memory area - * @nla: netlink attribute - * @data: memory area - * @size: size of memory area - */ -int nla_memcmp(const struct nlattr *nla, const void *data, - size_t size) -{ - int d = nla_len(nla) - size; - - if (d == 0) - d = memcmp(nla_data(nla), data, size); - - return d; -} - -/** - * nla_strcmp - Compare a string attribute against a string - * @nla: netlink string attribute - * @str: another string - */ -int nla_strcmp(const struct nlattr *nla, const char *str) -{ - int len = strlen(str) + 1; - int d = nla_len(nla) - len; - - if (d == 0) - d = memcmp(nla_data(nla), str, len); - - return d; -} - -/** - * __nla_reserve - reserve room for attribute on the skb - * @skb: socket buffer to reserve room on - * @attrtype: attribute type - * @attrlen: length of attribute payload - * - * Adds a netlink attribute header to a socket buffer and reserves - * room for the payload but does not copy it. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute header and payload. - */ -struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) -{ - struct nlattr *nla; - - nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); - nla->nla_type = attrtype; - nla->nla_len = nla_attr_size(attrlen); - - memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); - - return nla; -} - -/** - * __nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the payload. - */ -void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - void *start; - - start = skb_put(skb, NLA_ALIGN(attrlen)); - memset(start, 0, NLA_ALIGN(attrlen)); - - return start; -} - -/** - * nla_reserve - reserve room for attribute on the skb - * @skb: socket buffer to reserve room on - * @attrtype: attribute type - * @attrlen: length of attribute payload - * - * Adds a netlink attribute header to a socket buffer and reserves - * room for the payload but does not copy it. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute header and payload. - */ -struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return NULL; - - return __nla_reserve(skb, attrtype, attrlen); -} - -/** - * nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return NULL; - - return __nla_reserve_nohdr(skb, attrlen); -} - -/** - * __nla_put - Add a netlink attribute to a socket buffer - * @skb: socket buffer to add attribute to - * @attrtype: attribute type - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute header and payload. - */ -void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, - const void *data) -{ - struct nlattr *nla; - - nla = __nla_reserve(skb, attrtype, attrlen); - memcpy(nla_data(nla), data, attrlen); -} - -/** - * __nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute payload. - */ -void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - void *start; - - start = __nla_reserve_nohdr(skb, attrlen); - memcpy(start, data, attrlen); -} - -/** - * nla_put - Add a netlink attribute to a socket buffer - * @skb: socket buffer to add attribute to - * @attrtype: attribute type - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute header and payload. - */ -int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -EMSGSIZE; - - __nla_put(skb, attrtype, attrlen, data); - return 0; -} - -/** - * nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -EMSGSIZE; - - __nla_put_nohdr(skb, attrlen, data); - return 0; -} - -/** - * nla_append - Add a netlink attribute without header or padding - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_append(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -EMSGSIZE; - - memcpy(skb_put(skb, attrlen), data, attrlen); - return 0; -} - -EXPORT_SYMBOL(nla_validate); -EXPORT_SYMBOL(nla_parse); -EXPORT_SYMBOL(nla_find); -EXPORT_SYMBOL(nla_strlcpy); -EXPORT_SYMBOL(__nla_reserve); -EXPORT_SYMBOL(__nla_reserve_nohdr); -EXPORT_SYMBOL(nla_reserve); -EXPORT_SYMBOL(nla_reserve_nohdr); -EXPORT_SYMBOL(__nla_put); -EXPORT_SYMBOL(__nla_put_nohdr); -EXPORT_SYMBOL(nla_put); -EXPORT_SYMBOL(nla_put_nohdr); -EXPORT_SYMBOL(nla_memcpy); -EXPORT_SYMBOL(nla_memcmp); -EXPORT_SYMBOL(nla_strcmp); -EXPORT_SYMBOL(nla_append); -- cgit v1.2.3-70-g09d2 From dd5b6ce6fd465eab90357711c8e8124dc3a31ff0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Mar 2009 13:21:06 +0100 Subject: nefilter: nfnetlink: add nfnetlink_set_err and use it in ctnetlink This patch adds nfnetlink_set_err() to propagate the error to netlink broadcast listener in case of memory allocation errors in the message building. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 1 + net/netfilter/nf_conntrack_netlink.c | 2 ++ net/netfilter/nfnetlink.c | 6 ++++++ net/netlink/af_netlink.c | 1 + 4 files changed, 10 insertions(+) (limited to 'net/netlink') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 7d8e0455cca..135e5cfe68a 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -76,6 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); extern int nfnetlink_has_listeners(unsigned int group); extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); +extern void nfnetlink_set_err(u32 pid, u32 group, int error); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); extern void nfnl_lock(void); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1fe9d15ac5..1b75c9efb0e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -518,6 +518,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: + nfnetlink_set_err(0, group, -ENOBUFS); kfree_skb(skb); return NOTIFY_DONE; } @@ -1514,6 +1515,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: + nfnetlink_set_err(0, 0, -ENOBUFS); kfree_skb(skb); return NOTIFY_DONE; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9c0ba17a1dd..2785d66a7e3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -113,6 +113,12 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) } EXPORT_SYMBOL_GPL(nfnetlink_send); +void nfnetlink_set_err(u32 pid, u32 group, int error) +{ + netlink_set_err(nfnl, pid, group, error); +} +EXPORT_SYMBOL_GPL(nfnetlink_set_err); + int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) { return netlink_unicast(nfnl, skb, pid, flags); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ee69c27f80..5b33879c642 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1106,6 +1106,7 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +EXPORT_SYMBOL(netlink_set_err); /* must be called with netlink table grabbed */ static void netlink_update_socket_mc(struct netlink_sock *nlk, -- cgit v1.2.3-70-g09d2 From 38938bfe3489394e2eed5e40c9bb8f66a2ce1405 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Mar 2009 16:37:55 -0700 Subject: netlink: add NETLINK_NO_ENOBUFS socket flag This patch adds the NETLINK_NO_ENOBUFS socket flag. This flag can be used by unicast and broadcast listeners to avoid receiving ENOBUFS errors. Generally speaking, ENOBUFS errors are useful to notify two things to the listener: a) You may increase the receiver buffer size via setsockopt(). b) You have lost messages, you may be out of sync. In some cases, ignoring ENOBUFS errors can be useful. For example: a) nfnetlink_queue: this subsystem does not have any sort of resync method and you can decide to ignore ENOBUFS once you have set a given buffer size. b) ctnetlink: you can use this together with the socket flag NETLINK_BROADCAST_SEND_ERROR to stop getting ENOBUFS errors as you do not need to resync (packets whose event are not delivered are drop to provide reliable logging and state-synchronization). Moreover, the use of NETLINK_NO_ENOBUFS also reduces a "go up, go down" effect in terms of performance which is due to the netlink congestion control when the listener cannot back off. The effect is the following: 1) throughput rate goes up and netlink messages are inserted in the receiver buffer. 2) Then, netlink buffer fills and overruns (set on nlk->state bit 0). 3) While the listener empties the receiver buffer, netlink keeps dropping messages. Thus, throughput goes dramatically down. 4) Then, once the listener has emptied the buffer (nlk->state bit 0 is set off), goto step 1. This effect is easy to trigger with netlink broadcast under heavy load, and it is more noticeable when using a big receiver buffer. You can find some results in [1] that show this problem. [1] http://1984.lsi.us.es/linux/netlink/ This patch also includes the use of sk_drop to account the number of netlink messages drop due to overrun. This value is shown in /proc/net/netlink. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + net/netlink/af_netlink.c | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'net/netlink') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1e6bf995435..5ba398e9030 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -104,6 +104,7 @@ struct nlmsgerr #define NETLINK_DROP_MEMBERSHIP 2 #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 +#define NETLINK_NO_ENOBUFS 5 struct nl_pktinfo { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b73d4e61c5a..8b6bbb3032b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -86,6 +86,7 @@ struct netlink_sock { #define NETLINK_KERNEL_SOCKET 0x1 #define NETLINK_RECV_PKTINFO 0x2 #define NETLINK_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_RECV_NO_ENOBUFS 0x8 static inline struct netlink_sock *nlk_sk(struct sock *sk) { @@ -717,10 +718,15 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, static void netlink_overrun(struct sock *sk) { - if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); + struct netlink_sock *nlk = nlk_sk(sk); + + if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { + if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { + sk->sk_err = ENOBUFS; + sk->sk_error_report(sk); + } } + atomic_inc(&sk->sk_drops); } static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) @@ -1182,6 +1188,15 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; err = 0; break; + case NETLINK_NO_ENOBUFS: + if (val) { + nlk->flags |= NETLINK_RECV_NO_ENOBUFS; + clear_bit(0, &nlk->state); + wake_up_interruptible(&nlk->wait); + } else + nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1224,6 +1239,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_NO_ENOBUFS: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; + if (put_user(len, optlen) || + put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1879,12 +1904,12 @@ static int netlink_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "sk Eth Pid Groups " - "Rmem Wmem Dump Locks\n"); + "Rmem Wmem Dump Locks Drops\n"); else { struct sock *s = v; struct netlink_sock *nlk = nlk_sk(s); - seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", + seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n", s, s->sk_protocol, nlk->pid, @@ -1892,7 +1917,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) atomic_read(&s->sk_rmem_alloc), atomic_read(&s->sk_wmem_alloc), nlk->cb, - atomic_read(&s->sk_refcnt) + atomic_read(&s->sk_refcnt), + atomic_read(&s->sk_drops) ); } -- cgit v1.2.3-70-g09d2