From dcf1a3573eae69937fb14462369c4d3e6f4a37f1 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 22 Apr 2009 20:18:19 -0400 Subject: net/sunrpc/svc_xprt.c: fix sparse warnings Fix the following sparse warnings in net/sunrpc/svc_xprt.c. warning: symbol 'svc_recv' was not declared. Should it be static? warning: symbol 'svc_drop' was not declared. Should it be static? warning: symbol 'svc_send' was not declared. Should it be static? warning: symbol 'svc_close_all' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c200d92e57e..f200393ac87 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -11,6 +11,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT -- cgit v1.2.3-70-g09d2 From abc5c44d6284fab8fb21bcfc52c0f16f980637df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:31:25 -0400 Subject: SUNRPC: Fix error return value of svc_addr_len() The svc_addr_len() helper function returns -EAFNOSUPPORT if it doesn't recognize the address family of the passed-in socket address. However, the return type of this function is size_t, which means -EAFNOSUPPORT is turned into a very large positive value in this case. The check in svc_udp_recvfrom() to see if the return value is less than zero therefore won't work at all. Additionally, handle_connect_req() passes this value directly to memset(). This could cause memset() to clobber a large chunk of memory if svc_addr_len() has returned an error. Currently the address family of these addresses, however, is known to be supported long before handle_connect_req() is called, so this isn't a real risk. Change the error return value of svc_addr_len() to zero, which fits in the range of size_t, and is safer to pass to memset() directly. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 5 +++-- net/sunrpc/svcsock.c | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0d9cb6ef28b..d790c52525c 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -118,7 +118,7 @@ static inline unsigned short svc_addr_port(const struct sockaddr *sa) return 0; } -static inline size_t svc_addr_len(struct sockaddr *sa) +static inline size_t svc_addr_len(const struct sockaddr *sa) { switch (sa->sa_family) { case AF_INET: @@ -126,7 +126,8 @@ static inline size_t svc_addr_len(struct sockaddr *sa) case AF_INET6: return sizeof(struct sockaddr_in6); } - return -EAFNOSUPPORT; + + return 0; } static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af3198814c1..8b083283413 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -426,13 +426,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) long all[SVC_PKTINFO_SPACE / sizeof(long)]; } buffer; struct cmsghdr *cmh = &buffer.hdr; - int err, len; struct msghdr msg = { .msg_name = svc_addr(rqstp), .msg_control = cmh, .msg_controllen = sizeof(buffer), .msg_flags = MSG_DONTWAIT, }; + size_t len; + int err; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* udp sockets need large rcvbuf as all pending @@ -464,8 +465,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); - if (len < 0) - return len; + if (len == 0) + return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); -- cgit v1.2.3-70-g09d2 From 335c54bdc4d3bacdbd619ec95cd0b352435bd37f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:25 -0400 Subject: NFSD: Prevent a buffer overflow in svc_xprt_names() The svc_xprt_names() function can overflow its buffer if it's so near the end of the passed in buffer that the "name too long" string still doesn't fit. Of course, it could never tell if it was near the end of the passed in buffer, since its only caller passes in zero as the buffer length. Let's make this API a little safer. Change svc_xprt_names() so it *always* checks for a buffer overflow, and change its only caller to pass in the correct buffer length. If svc_xprt_names() does overflow its buffer, it now fails with an ENAMETOOLONG errno, instead of trying to write a message at the end of the buffer. I don't like this much, but I can't figure out a clean way that's always safe to return some of the names, *and* an indication that the buffer was not long enough. The displayed error when doing a 'cat /proc/fs/nfsd/portlist' is "File name too long". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/svc_xprt.h | 2 +- net/sunrpc/svc_xprt.c | 56 ++++++++++++++++++++++++++++------------- 3 files changed, 41 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index e051847b93f..6a1cd908e6b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -918,7 +918,7 @@ static ssize_t __write_ports_names(char *buf) { if (nfsd_serv == NULL) return 0; - return svc_xprt_names(nfsd_serv, buf, 0); + return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index d790c52525c..2223ae0b5ed 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -83,7 +83,7 @@ int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, const sa_family_t af, const unsigned short port); -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); static inline void svc_xprt_get(struct svc_xprt *xprt) { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f200393ac87..6f33d33cc06 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1098,36 +1098,58 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, } EXPORT_SYMBOL_GPL(svc_find_xprt); -/* - * Format a buffer with a list of the active transports. A zero for - * the buflen parameter disables target buffer overflow checking. +static int svc_one_xprt_name(const struct svc_xprt *xprt, + char *pos, int remaining) +{ + int len; + + len = snprintf(pos, remaining, "%s %u\n", + xprt->xpt_class->xcl_name, + svc_xprt_local_port(xprt)); + if (len >= remaining) + return -ENAMETOOLONG; + return len; +} + +/** + * svc_xprt_names - format a buffer with a list of transport names + * @serv: pointer to an RPC service + * @buf: pointer to a buffer to be filled in + * @buflen: length of buffer to be filled in + * + * Fills in @buf with a string containing a list of transport names, + * each name terminated with '\n'. + * + * Returns positive length of the filled-in string on success; otherwise + * a negative errno value is returned if an error occurs. */ -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; - char xprt_str[64]; - int totlen = 0; - int len; + int len, totlen; + char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); + + pos = buf; + totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - len = snprintf(xprt_str, sizeof(xprt_str), - "%s %d\n", xprt->xpt_class->xcl_name, - svc_xprt_local_port(xprt)); - /* If the string was truncated, replace with error string */ - if (len >= sizeof(xprt_str)) - strcpy(xprt_str, "name-too-long\n"); - /* Don't overflow buffer */ - len = strlen(xprt_str); - if (buflen && (len + totlen >= buflen)) + len = svc_one_xprt_name(xprt, pos, buflen - totlen); + if (len < 0) { + *buf = '\0'; + totlen = len; + } + if (len <= 0) break; - strcpy(buf+totlen, xprt_str); + + pos += len; totlen += len; } + spin_unlock_bh(&serv->sv_lock); return totlen; } -- cgit v1.2.3-70-g09d2 From bfba9ab4c64f0e5c33930711e6c073c285e01fcf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:33 -0400 Subject: SUNRPC: pass buffer size to svc_addsock() Adjust the synopsis of svc_addsock() to pass in the size of the output buffer. Add a documenting comment. This is a cosmetic change for now. A subsequent patch will make sure the buffer length is passed to one_sock_name(), where the length will actually be useful. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/svcsock.h | 3 ++- net/sunrpc/svcsock.c | 16 +++++++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6a1cd908e6b..1f1c2159b80 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -943,7 +943,7 @@ static ssize_t __write_ports_addfd(char *buf) if (err != 0) goto out; - err = svc_addsock(nfsd_serv, fd, buf); + err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) lockd_down(); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 483e10380aa..e23241c53f4 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -39,7 +39,8 @@ int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); -int svc_addsock(struct svc_serv *serv, int fd, char *name_return); +int svc_addsock(struct svc_serv *serv, const int fd, + char *name_return, const size_t len); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8b083283413..6bec1e25b54 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1128,9 +1128,19 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return) +/** + * svc_addsock - add a listener socket to an RPC service + * @serv: pointer to RPC service to which to add a new listener + * @fd: file descriptor of the new listener + * @name_return: pointer to buffer to fill in with name of listener + * @len: size of the buffer + * + * Fills in socket name and returns positive length of name if successful. + * Name is terminated with '\n'. On error, returns a negative errno + * value. + */ +int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); -- cgit v1.2.3-70-g09d2 From 8435d34dbbe75678c3cdad3d53b1e7996a79b3bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:40 -0400 Subject: SUNRPC: pass buffer size to svc_sock_names() Adjust the synopsis of svc_sock_names() to pass in the size of the output buffer. Add a documenting comment. This is a cosmetic change for now. A subsequent patch will make sure the buffer length is passed to one_sock_name(), where the length will actually be useful. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 3 ++- include/linux/sunrpc/svcsock.h | 4 +++- net/sunrpc/svcsock.c | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f1c2159b80..b64a7fbfccf 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -966,7 +966,8 @@ static ssize_t __write_ports_delfd(char *buf) return -ENOMEM; if (nfsd_serv != NULL) - len = svc_sock_names(buf, nfsd_serv, toclose); + len = svc_sock_names(nfsd_serv, buf, + SIMPLE_TRANSACTION_LIMIT, toclose); if (len >= 0) lockd_down(); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index e23241c53f4..82716313894 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -38,7 +38,9 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); -int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); +int svc_sock_names(struct svc_serv *serv, char *buf, + const size_t buflen, + const char *toclose); int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, const size_t len); void svc_init_xprt_sock(void); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6bec1e25b54..032b52ea954 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -259,8 +259,23 @@ static int one_sock_name(char *buf, struct svc_sock *svsk) return len; } -int -svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) +/** + * svc_sock_names - construct a list of listener names in a string + * @serv: pointer to RPC service + * @buf: pointer to a buffer to fill in with socket names + * @buflen: size of the buffer to be filled + * @toclose: pointer to '\0'-terminated C string containing the name + * of a listener to be closed + * + * Fills in @buf with a '\n'-separated list of names of listener + * sockets. If @toclose is not NULL, the socket named by @toclose + * is closed, and is not included in the output list. + * + * Returns positive length of the socket name string, or a negative + * errno value on error. + */ +int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, + const char *toclose) { struct svc_sock *svsk, *closesk = NULL; int len = 0; -- cgit v1.2.3-70-g09d2 From e7942b9f2586fb15e1b898acc7c198ffd60aee4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:48 -0400 Subject: SUNRPC: Switch one_sock_name() to use snprintf() Use snprintf() in one_sock_name() to prevent overflowing the output buffer. If the name doesn't fit in the buffer, the buffer is filled in with an empty string, and -ENAMETOOLONG is returned. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 032b52ea954..61d4a3281f9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -240,22 +240,27 @@ out: /* * Report socket names for nfsdfs */ -static int one_sock_name(char *buf, struct svc_sock *svsk) +static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { int len; switch(svsk->sk_sk->sk_family) { - case AF_INET: - len = sprintf(buf, "ipv4 %s %pI4 %d\n", + case PF_INET: + len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", svsk->sk_sk->sk_protocol == IPPROTO_UDP ? "udp" : "tcp", &inet_sk(svsk->sk_sk)->rcv_saddr, inet_sk(svsk->sk_sk)->num); break; default: - len = sprintf(buf, "*unknown-%d*\n", + len = snprintf(buf, remaining, "*unknown-%d*\n", svsk->sk_sk->sk_family); } + + if (len >= remaining) { + *buf = '\0'; + return -ENAMETOOLONG; + } return len; } @@ -282,15 +287,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, if (!serv) return 0; + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { - int onelen = one_sock_name(buf+len, svsk); - if (toclose && strcmp(toclose, buf+len) == 0) + int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); + if (onelen < 0) { + len = onelen; + break; + } + if (toclose && strcmp(toclose, buf + len) == 0) closesk = svsk; else len += onelen; } spin_unlock_bh(&serv->sv_lock); + if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... @@ -1195,7 +1206,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, sockfd_put(so); return err; } - return one_sock_name(name_return, svsk); + return svc_one_sock_name(svsk, name_return, len); } EXPORT_SYMBOL_GPL(svc_addsock); -- cgit v1.2.3-70-g09d2 From 58de2f86585dd8fc785aca6625adee32af84b8d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:55 -0400 Subject: SUNRPC: Support PF_INET6 in one_sock_name() Add an arm to the switch statement in svc_one_sock_name() so it can construct the name of PF_INET6 sockets properly. Signed-off-by: Chuck Lever Cc: Aime Le Rouzic Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 61d4a3281f9..983bfa9f310 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -252,6 +252,13 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) &inet_sk(svsk->sk_sk)->rcv_saddr, inet_sk(svsk->sk_sk)->num); break; + case PF_INET6: + len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", + svsk->sk_sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp", + &inet6_sk(svsk->sk_sk)->rcv_saddr, + inet_sk(svsk->sk_sk)->num); + break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", svsk->sk_sk->sk_family); -- cgit v1.2.3-70-g09d2 From 017cb47f46722f29d101a709a2094d151111ed6d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:33:03 -0400 Subject: SUNRPC: Clean up one_sock_name() Clean up svc_one_sock_name() by setting up automatic variables for frequently used expressions. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 983bfa9f310..4e6d406264a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -242,26 +242,27 @@ out: */ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { + const struct sock *sk = svsk->sk_sk; + const char *proto_name = sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp"; int len; - switch(svsk->sk_sk->sk_family) { + switch (sk->sk_family) { case PF_INET: len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + proto_name, + &inet_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet6_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + proto_name, + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", - svsk->sk_sk->sk_family); + sk->sk_family); } if (len >= remaining) { -- cgit v1.2.3-70-g09d2 From 68f2d02669f7102be80aae47155f45e18950d223 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 11 Jun 2009 18:19:45 +0300 Subject: mac80211: Do not try to associate with an empty SSID It looks like some programs (e.g., NM) are setting an empty SSID with SIOCSIWESSID in some cases. This seems to trigger mac80211 to try to associate with an invalid configuration (wildcard SSID) which will result in failing associations (or odd issues, potentially including kernel panic with some drivers) if the AP were to actually accept this anyway). Only start association process if the SSID is actually set. This speeds up connection with NM in number of cases and avoids sending out broken association request frames. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d779c57a822..e5de9cea003 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2445,6 +2445,14 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, true, true, WLAN_REASON_DEAUTH_LEAVING); + if (ifmgd->ssid_len == 0) { + /* + * Only allow association to be started if a valid SSID + * is configured. + */ + return; + } + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) || ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE) set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); -- cgit v1.2.3-70-g09d2 From db2e6bd4e966a36c6b2f1921feb3537e8254415c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 14 Jun 2009 17:37:39 +0200 Subject: mac80211: add queue debugfs file I suspect that some driver bugs can cause queues to be stopped while they shouldn't be, but it's hard to find out whether that is the case or not without having any visible information about the queues. This adds a file to debugfs that allows us to see the queues' statuses. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 25 +++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 1 + 2 files changed, 26 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 11c72311f35..6c439cd5cce 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -163,6 +163,29 @@ static const struct file_operations noack_ops = { .open = mac80211_open_file_generic }; +static ssize_t queues_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + unsigned long flags; + char buf[IEEE80211_MAX_QUEUES * 20]; + int q, res = 0; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (q = 0; q < local->hw.queues; q++) + res += sprintf(buf + res, "%02d: %#.8lx/%d\n", q, + local->queue_stop_reasons[q], + __netif_subqueue_stopped(local->mdev, q)); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + return simple_read_from_buffer(user_buf, count, ppos, buf, res); +} + +static const struct file_operations queues_ops = { + .read = queues_read, + .open = mac80211_open_file_generic +}; + /* statistics stuff */ #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ @@ -298,6 +321,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(tsf); + DEBUGFS_ADD(queues); DEBUGFS_ADD_MODE(reset, 0200); DEBUGFS_ADD(noack); @@ -350,6 +374,7 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(wep_iv); DEBUGFS_DEL(tsf); + DEBUGFS_DEL(queues); DEBUGFS_DEL(reset); DEBUGFS_DEL(noack); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4dbc2896419..f41fe1f1430 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -783,6 +783,7 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *wep_iv; struct dentry *tsf; + struct dentry *queues; struct dentry *reset; struct dentry *noack; struct dentry *statistics; -- cgit v1.2.3-70-g09d2 From 7e9debe9789456426ec8574ead879e33da19ee57 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jun 2009 13:42:25 +0200 Subject: mac80211: disconnect when user changes channel If we do not disconnect when a channel switch is requested, we end up eventually detection beacon loss from the AP and then disconnecting, without ever really telling the AP, so we might just as well disconnect right away. Additionally, this fixes a problem with iwlwifi where the driver will clear some internal state on channel changes like this and then get confused when we actually go clear that state from mac80211. It may look like this patch drops the no-IBSS check, but that is already handled by cfg80211 in the wext handler it provides for IBSS (cfg80211_ibss_wext_siwfreq). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/mlme.c | 5 ++++- net/mac80211/util.c | 25 ------------------------- net/mac80211/wext.c | 31 +++++++++++++++++++++++++++---- 4 files changed, 31 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f41fe1f1430..68eb5052179 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1101,7 +1101,6 @@ void ieee802_11_parse_elems(u8 *start, size_t len, u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, struct ieee802_11_elems *elems, u64 filter, u32 crc); -int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e5de9cea003..84e59b9b493 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2223,7 +2223,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) capa_mask, capa_val); if (bss) { - ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); + local->oper_channel = bss->cbss.channel; + local->oper_channel_type = NL80211_CHAN_NO_HT; + ieee80211_hw_config(local, 0); + if (!(ifmgd->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(sdata, bss->ssid, bss->ssid_len); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 66ce96a69f3..915e7776931 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -774,31 +774,6 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, dev_queue_xmit(skb); } -int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) -{ - int ret = -EINVAL; - struct ieee80211_channel *chan; - struct ieee80211_local *local = sdata->local; - - chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); - - if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - chan->flags & IEEE80211_CHAN_NO_IBSS) - return ret; - local->oper_channel = chan; - local->oper_channel_type = NL80211_CHAN_NO_HT; - - if (local->sw_scanning || local->hw_scanning) - ret = 0; - else - ret = ieee80211_hw_config( - local, IEEE80211_CONF_CHANGE_CHANNEL); - } - - return ret; -} - u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band) { diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index d2d81b10334..1da81f45674 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -55,6 +55,8 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_freq *freq, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_channel *chan; if (sdata->vif.type == NL80211_IFTYPE_ADHOC) return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra); @@ -69,17 +71,38 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(sdata, + chan = ieee80211_get_channel(local->hw.wiphy, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; - if (div > 0) - return ieee80211_set_freq(sdata, freq->m / div); - else + if (div <= 0) return -EINVAL; + chan = ieee80211_get_channel(local->hw.wiphy, freq->m / div); } + + if (!chan) + return -EINVAL; + + if (chan->flags & IEEE80211_CHAN_DISABLED) + return -EINVAL; + + /* + * no change except maybe auto -> fixed, ignore the HT + * setting so you can fix a channel you're on already + */ + if (local->oper_channel == chan) + return 0; + + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_sta_req_auth(sdata); + + local->oper_channel = chan; + local->oper_channel_type = NL80211_CHAN_NO_HT; + ieee80211_hw_config(local, 0); + + return 0; } -- cgit v1.2.3-70-g09d2 From 1fa6f4af9f55bc1b753af04276984429d6b5a613 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jun 2009 18:13:58 +0200 Subject: mac80211: fix wext bssid/ssid setting When changing to a new BSSID or SSID, the code in ieee80211_set_disassoc() needs to have the old data still valid to be able to disconnect and clean up properly. Currently, however, the old data is thrown away before ieee80211_set_disassoc() is ever called, so fix that by calling the function _before_ the old data is overwritten. This is (one of) the issue(s) causing mac80211 to hold cfg80211's BSS structs forever, and them thus being returned in scan results after they're long gone. http://www.intellinuxwireless.org/bugzilla/show_bug.cgi?id=2015 Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 84e59b9b493..aca22b00b6a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1102,14 +1102,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; u32 changed = 0, config_changed = 0; - rcu_read_lock(); - - sta = sta_info_get(local, ifmgd->bssid); - if (!sta) { - rcu_read_unlock(); - return; - } - if (deauth) { ifmgd->direct_probe_tries = 0; ifmgd->auth_tries = 0; @@ -1120,7 +1112,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - ieee80211_sta_tear_down_BA_sessions(sta); + rcu_read_lock(); + sta = sta_info_get(local, ifmgd->bssid); + if (sta) + ieee80211_sta_tear_down_BA_sessions(sta); + rcu_read_unlock(); bss = ieee80211_rx_bss_get(local, ifmgd->bssid, conf->channel->center_freq, @@ -1156,8 +1152,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->ssid, ifmgd->ssid_len); } - rcu_read_unlock(); - ieee80211_set_wmm_default(sdata); ieee80211_recalc_idle(local); @@ -2487,6 +2481,10 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size ifmgd = &sdata->u.mgd; if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) { + if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, + WLAN_REASON_DEAUTH_LEAVING); + /* * Do not use reassociation if SSID is changed (different ESS). */ @@ -2511,6 +2509,11 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + if (compare_ether_addr(bssid, ifmgd->bssid) != 0 && + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, + WLAN_REASON_DEAUTH_LEAVING); + if (is_valid_ether_addr(bssid)) { memcpy(ifmgd->bssid, bssid, ETH_ALEN); ifmgd->flags |= IEEE80211_STA_BSSID_SET; -- cgit v1.2.3-70-g09d2 From 6aad89c8376e432231b78d1bdbcc10ef7708b011 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Jun 2009 12:52:21 -0700 Subject: sunrpc: align cache_clean work's timer Align cache_clean work. Signed-off-by: Anton Blanchard Cc: Neil Brown Cc: Trond Myklebust Cc: "David S. Miller" Signed-off-by: Andrew Morton Acked-by: David S. Miller Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20029a79a5d..ff0c23053d2 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -488,7 +488,7 @@ static void do_cache_clean(struct work_struct *work) { int delay = 5; if (cache_clean() == -1) - delay = 30*HZ; + delay = round_jiffies_relative(30*HZ); if (list_empty(&cache_list)) delay = 0; -- cgit v1.2.3-70-g09d2 From 59fb30660be3f3ead54b3850c401d462449caaaa Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Sun, 14 Jun 2009 00:05:26 +0200 Subject: sunrpc: potential memory leak in function rdma_read_xdr In case the check on ch_count fails the cleanup path is skipped and the previously allocated memory 'rpl_map', 'chl_map' is not freed. Reported by Coverity. Signed-off-by: Christian Engelmayer Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 42a6f9f2028..9e884383134 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -397,14 +397,14 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (!ch) return 0; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; + /* Allocate temporary reply and chunk maps */ + rpl_map = svc_rdma_get_req_map(); + chl_map = svc_rdma_get_req_map(); + if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, -- cgit v1.2.3-70-g09d2 From 14ebaf81e13ce66bff275380b246796fd16cbfa1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 16 Jun 2009 05:40:30 -0700 Subject: x25: Fix sleep from timer on socket destroy. If socket destuction gets delayed to a timer, we try to lock_sock() from that timer which won't work. Use bh_lock_sock() in that case. Signed-off-by: David S. Miller Tested-by: Ingo Molnar --- include/net/x25.h | 2 +- net/x25/af_x25.c | 23 ++++++++++++++++++----- net/x25/x25_timer.c | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/x25.h b/include/net/x25.h index fc3f03d976f..2cda0401156 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -187,7 +187,7 @@ extern int x25_addr_ntoa(unsigned char *, struct x25_address *, extern int x25_addr_aton(unsigned char *, struct x25_address *, struct x25_address *); extern struct sock *x25_find_socket(unsigned int, struct x25_neigh *); -extern void x25_destroy_socket(struct sock *); +extern void x25_destroy_socket_from_timer(struct sock *); extern int x25_rx_call_request(struct sk_buff *, struct x25_neigh *, unsigned int); extern void x25_kill_by_neigh(struct x25_neigh *); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ed80af8ca5f..c51f3095739 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -332,14 +332,14 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) /* * Deferred destroy. */ -void x25_destroy_socket(struct sock *); +static void __x25_destroy_socket(struct sock *); /* * handler for deferred kills. */ static void x25_destroy_timer(unsigned long data) { - x25_destroy_socket((struct sock *)data); + x25_destroy_socket_from_timer((struct sock *)data); } /* @@ -349,12 +349,10 @@ static void x25_destroy_timer(unsigned long data) * will touch it and we are (fairly 8-) ) safe. * Not static as it's used by the timer */ -void x25_destroy_socket(struct sock *sk) +static void __x25_destroy_socket(struct sock *sk) { struct sk_buff *skb; - sock_hold(sk); - lock_sock(sk); x25_stop_heartbeat(sk); x25_stop_timer(sk); @@ -385,7 +383,22 @@ void x25_destroy_socket(struct sock *sk) /* drop last reference so sock_put will free */ __sock_put(sk); } +} +void x25_destroy_socket_from_timer(struct sock *sk) +{ + sock_hold(sk); + bh_lock_sock(sk); + __x25_destroy_socket(sk); + bh_unlock_sock(sk); + sock_put(sk); +} + +static void x25_destroy_socket(struct sock *sk) +{ + sock_hold(sk); + lock_sock(sk); + __x25_destroy_socket(sk); release_sock(sk); sock_put(sk); } diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d3e3e54db93..5c5db1a3639 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -113,7 +113,7 @@ static void x25_heartbeat_expiry(unsigned long param) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); - x25_destroy_socket(sk); + x25_destroy_socket_from_timer(sk); return; } break; -- cgit v1.2.3-70-g09d2 From c564039fd83ea16a86a96d52632794b24849e507 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Jun 2009 10:12:03 +0000 Subject: net: sk_wmem_alloc has initial value of one, not zero commit 2b85a34e911bf483c27cfdd124aeb1605145dc80 (net: No more expensive sock_hold()/sock_put() on each tx) changed initial sk_wmem_alloc value. Some protocols check sk_wmem_alloc value to determine if a timer must delay socket deallocation. We must take care of the sk_wmem_alloc value being one instead of zero when no write allocations are pending. Reported by Ingo Molnar, and full diagnostic from David Miller. This patch introduces three helpers to get read/write allocations and a followup patch will use these helpers to report correct write allocations to user. Reported-by: Ingo Molnar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 33 +++++++++++++++++++++++++++++++++ net/appletalk/ddp.c | 6 ++---- net/ax25/af_ax25.c | 3 +-- net/econet/af_econet.c | 6 ++---- net/netrom/af_netrom.c | 3 +-- net/rose/af_rose.c | 3 +-- net/x25/af_x25.c | 3 +-- 7 files changed, 41 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 010e14a93c9..570c7a12b54 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1206,6 +1206,39 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, return 0; } +/** + * sk_wmem_alloc_get - returns write allocations + * @sk: socket + * + * Returns sk_wmem_alloc minus initial offset of one + */ +static inline int sk_wmem_alloc_get(const struct sock *sk) +{ + return atomic_read(&sk->sk_wmem_alloc) - 1; +} + +/** + * sk_rmem_alloc_get - returns read allocations + * @sk: socket + * + * Returns sk_rmem_alloc + */ +static inline int sk_rmem_alloc_get(const struct sock *sk) +{ + return atomic_read(&sk->sk_rmem_alloc); +} + +/** + * sk_has_allocations - check if allocations are outstanding + * @sk: socket + * + * Returns true if socket has write or read allocations + */ +static inline int sk_has_allocations(const struct sock *sk) +{ + return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b603cbacdc5..f7a53b219ef 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -162,8 +162,7 @@ static void atalk_destroy_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; add_timer(&sk->sk_timer); } else @@ -175,8 +174,7 @@ static inline void atalk_destroy_socket(struct sock *sk) atalk_remove_socket(sk); skb_queue_purge(&sk->sk_receive_queue); - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { setup_timer(&sk->sk_timer, atalk_destroy_timer, (unsigned long)sk); sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd9d06f291d..61b35b95549 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -330,8 +330,7 @@ void ax25_destroy_socket(ax25_cb *ax25) } if (ax25->sk != NULL) { - if (atomic_read(&ax25->sk->sk_wmem_alloc) || - atomic_read(&ax25->sk->sk_rmem_alloc)) { + if (sk_has_allocations(ax25->sk)) { /* Defer: outstanding buffers */ setup_timer(&ax25->dtimer, ax25_destroy_timer, (unsigned long)ax25); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8121bf0029e..2e1f836d424 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -540,8 +540,7 @@ static void econet_destroy_timer(unsigned long data) { struct sock *sk=(struct sock *)data; - if (!atomic_read(&sk->sk_wmem_alloc) && - !atomic_read(&sk->sk_rmem_alloc)) { + if (!sk_has_allocations(sk)) { sk_free(sk); return; } @@ -579,8 +578,7 @@ static int econet_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); - if (atomic_read(&sk->sk_rmem_alloc) || - atomic_read(&sk->sk_wmem_alloc)) { + if (sk_has_allocations(sk)) { sk->sk_timer.data = (unsigned long)sk; sk->sk_timer.expires = jiffies + HZ; sk->sk_timer.function = econet_destroy_timer; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3be0e016ab7..cd911904cbe 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -286,8 +286,7 @@ void nr_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.function = nr_destroy_timer; sk->sk_timer.expires = jiffies + 2 * HZ; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 877a7f65f70..4dd9a7d1894 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -356,8 +356,7 @@ void rose_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ setup_timer(&sk->sk_timer, rose_destroy_timer, (unsigned long)sk); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c51f3095739..8cd2390b0d4 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -372,8 +372,7 @@ static void __x25_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; sk->sk_timer.function = x25_destroy_timer; -- cgit v1.2.3-70-g09d2 From aae2006e9b0c294114915c13022fa348e1a88023 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 1 Apr 2009 09:22:40 -0400 Subject: nfs41: sunrpc: Export the call prepare state for session reset Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 13 +++++++++++++ net/sunrpc/sched.c | 2 +- 4 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c39a21040dc..37881f1a0bd 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -143,6 +143,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags); struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags); +void rpc_restart_call_prepare(struct rpc_task *); void rpc_restart_call(struct rpc_task *); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 64981a2f1ca..177376880fa 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -237,6 +237,7 @@ void rpc_show_tasks(void); int rpc_init_mempool(void); void rpc_destroy_mempool(void); extern struct workqueue_struct *rpciod_workqueue; +void rpc_prepare_task(struct rpc_task *task); static inline void rpc_exit(struct rpc_task *task, int status) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5abab094441..d00e8135f86 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -694,6 +694,19 @@ void rpc_force_rebind(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_force_rebind); +/* + * Restart an (async) RPC call from the call_prepare state. + * Usually called from within the exit handler. + */ +void +rpc_restart_call_prepare(struct rpc_task *task) +{ + if (RPC_ASSASSINATED(task)) + return; + task->tk_action = rpc_prepare_task; +} +EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); + /* * Restart an (async) RPC call. Usually called from within the * exit handler. diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff50a054686..1102ce1251f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); /* * Helper to call task->tk_ops->rpc_call_prepare */ -static void rpc_prepare_task(struct rpc_task *task) +void rpc_prepare_task(struct rpc_task *task) { task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } -- cgit v1.2.3-70-g09d2 From 18dca02aeb3c49dfce87c76be643b139d05cf647 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:22:53 -0400 Subject: nfs41: Add ability to read RPC call direction on TCP stream. NFSv4.1 callbacks can arrive over an existing connection. This patch adds the logic to read the RPC call direction (call or reply). It does this by updating the state machine to look for the call direction invoking xs_tcp_read_calldir(...) after reading the XID. [nfs41: Keep track of RPC call/reply direction with a flag] As per 11/14/08 review of RFC 53/85. Add a new flag to track whether the incoming message is an RPC call or an RPC reply. TCP_RPC_REPLY is set in the 'struct sock_xprt' tcp_flags in xs_tcp_read_calldir() if the message is an RPC reply sent on the forechannel. It is cleared if the message is an RPC request sent on the back channel. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprtsock.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e1859614601..8975c10591c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -270,6 +270,12 @@ struct sock_xprt { #define TCP_RCV_COPY_FRAGHDR (1UL << 1) #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_COPY_CALLDIR (1UL << 4) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 5) static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) { @@ -956,7 +962,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_offset = 0; /* Sanity check of the record length */ - if (unlikely(transport->tcp_reclen < 4)) { + if (unlikely(transport->tcp_reclen < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_force_disconnect(xprt); return; @@ -991,13 +997,48 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r if (used != len) return; transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; transport->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", + dprintk("RPC: reading %s XID %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" + : "request with", ntohl(transport->tcp_xid)); xs_tcp_check_fraghdr(transport); } +static inline void xs_tcp_read_calldir(struct sock_xprt *transport, + struct xdr_skb_reader *desc) +{ + size_t len, used; + u32 offset; + __be32 calldir; + + /* + * We want transport->tcp_offset to be 8 at the end of this routine + * (4 bytes for the xid and 4 bytes for the call/reply flag). + * When this function is called for the first time, + * transport->tcp_offset is 4 (after having already read the xid). + */ + offset = transport->tcp_offset - sizeof(transport->tcp_xid); + len = sizeof(calldir) - offset; + dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); + used = xdr_skb_read_bits(desc, &calldir, len); + transport->tcp_offset += used; + if (used != len) + return; + transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_copied += 4; + if (ntohl(calldir) == RPC_REPLY) + transport->tcp_flags |= TCP_RPC_REPLY; + else + transport->tcp_flags &= ~TCP_RPC_REPLY; + dprintk("RPC: reading %s CALL/REPLY flag %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? + "reply for" : "request with", calldir); + xs_tcp_check_fraghdr(transport); +} + static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1114,6 +1155,11 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns xs_tcp_read_xid(transport, &desc); continue; } + /* Read in the call/reply flag */ + if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + xs_tcp_read_calldir(transport, &desc); + continue; + } /* Read in the request data */ if (transport->tcp_flags & TCP_RCV_COPY_DATA) { xs_tcp_read_request(xprt, &desc); -- cgit v1.2.3-70-g09d2 From f4a2e418bfd03a1f25f515e8a92ecd584d96cfc1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:22:54 -0400 Subject: nfs41: Process the RPC call direction Reading and storing the RPC direction is a three step process. 1. xs_tcp_read_calldir() reads the RPC direction, but it will not store it in the XDR buffer since the 'struct rpc_rqst' is not yet available. 2. The 'struct rpc_rqst' is obtained during the TCP_RCV_COPY_DATA state. This state need not necessarily be preceeded by the TCP_RCV_READ_CALLDIR. For example, we may be reading a continuation packet to a large reply. Therefore, we can't simply obtain the 'struct rpc_rqst' during the TCP_RCV_READ_CALLDIR state and assume it's available during TCP_RCV_COPY_DATA. This patch adds a new TCP_RCV_READ_CALLDIR flag to indicate the need to read the RPC direction. It then uses TCP_RCV_COPY_CALLDIR to indicate the RPC direction needs to be saved after the 'struct rpc_rqst' has been allocated. 3. The 'struct rpc_rqst' is obtained by the xs_tcp_read_data() helper functions. xs_tcp_read_common() then saves the RPC direction in the XDR buffer if TCP_RCV_COPY_CALLDIR is set. This will happen when we're reading the data immediately after the direction was read. xs_tcp_read_common() then clears this flag. [was nfs41: Skip past the RPC call direction] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: sunrpc: Add RPC direction back into the XDR buffer] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: sunrpc: Don't skip past the RPC call direction] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/clnt.c | 5 +++-- net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d00e8135f86..aca3ab6fc14 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1390,13 +1390,14 @@ rpc_verify_header(struct rpc_task *task) } if ((len -= 3) < 0) goto out_overflow; - p += 1; /* skip XID */ + p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { dprintk("RPC: %5u %s: not an RPC reply: %x\n", - task->tk_pid, __func__, n); + task->tk_pid, __func__, n); goto out_garbage; } + if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if (--len < 0) goto out_overflow; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8975c10591c..a48df1449ec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -270,12 +270,13 @@ struct sock_xprt { #define TCP_RCV_COPY_FRAGHDR (1UL << 1) #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_COPY_CALLDIR (1UL << 4) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) /* * TCP RPC flags */ -#define TCP_RPC_REPLY (1UL << 5) +#define TCP_RPC_REPLY (1UL << 6) static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) { @@ -997,7 +998,7 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r if (used != len) return; transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_READ_CALLDIR; transport->tcp_copied = 4; dprintk("RPC: reading %s XID %08x\n", (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" @@ -1026,9 +1027,13 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, transport->tcp_offset += used; if (used != len) return; - transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; transport->tcp_flags |= TCP_RCV_COPY_DATA; - transport->tcp_copied += 4; + /* + * We don't yet have the XDR buffer, so we will write the calldir + * out after we get the buffer from the 'struct rpc_rqst' + */ if (ntohl(calldir) == RPC_REPLY) transport->tcp_flags |= TCP_RPC_REPLY; else @@ -1059,6 +1064,20 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea } rcvbuf = &req->rq_private_buf; + + if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + /* + * Save the RPC direction in the XDR buffer + */ + __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ? + htonl(RPC_REPLY) : 0; + + memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, + &calldir, sizeof(calldir)); + transport->tcp_copied += sizeof(calldir); + transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + } + len = desc->count; if (len > transport->tcp_reclen - transport->tcp_offset) { struct xdr_skb_reader my_desc; @@ -1156,7 +1175,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns continue; } /* Read in the call/reply flag */ - if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) { xs_tcp_read_calldir(transport, &desc); continue; } -- cgit v1.2.3-70-g09d2 From f9acac1a4710ce88871f1ae323fc91c1cb6e9d52 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:22:59 -0400 Subject: nfs41: Initialize new rpc_xprt callback related fields Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprt.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 06ca058572f..52739f82df1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1049,6 +1049,11 @@ found: INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); +#if defined(CONFIG_NFS_V4_1) + spin_lock_init(&xprt->bc_pa_lock); + INIT_LIST_HEAD(&xprt->bc_pa_list); +#endif /* CONFIG_NFS_V4_1 */ + INIT_WORK(&xprt->task_cleanup, xprt_autoclose); setup_timer(&xprt->timer, xprt_init_autodisconnect, (unsigned long)xprt); -- cgit v1.2.3-70-g09d2 From fb7a0b9addbdbbb13b7bc02abf55ee524ea19ce1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:00 -0400 Subject: nfs41: New backchannel helper routines This patch introduces support to setup the callback xprt on the client side. It allocates/ destroys the preallocated memory structures used to process backchannel requests. At setup time, xprt_setup_backchannel() is invoked to allocate one or more rpc_rqst structures and substructures. This ensures that they are available when an RPC callback arrives. The rpc_rqst structures are maintained in a linked list attached to the rpc_xprt structure. We keep track of the number of allocations so that they can be correctly removed when the channel is destroyed. When an RPC callback arrives, xprt_alloc_bc_request() is invoked to obtain a preallocated rpc_rqst structure. An rpc_xprt structure is returned, and its RPC_BC_PREALLOC_IN_USE bit is set in rpc_xprt->bc_flags. The structure is removed from the the list since it is now in use, and it will be later added back when its user is done with it. After the RPC callback replies, the rpc_rqst structure is returned by invoking xprt_free_bc_request(). This clears the RPC_BC_PREALLOC_IN_USE bit and adds it back to the list, allowing it to be reused by a subsequent RPC callback request. To be consistent with the reception of RPC messages, the backchannel requests should be placed into the 'struct rpc_rqst' rq_rcv_buf, which is then in turn copied to the 'struct rpc_rqst' rq_private_buf. [nfs41: Preallocate rpc_rqst receive buffer for handling callbacks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Update copyright notice and explain page allocation] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/Makefile | 1 + net/sunrpc/backchannel_rqst.c | 278 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 net/sunrpc/backchannel_rqst.c (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 703af7ebf6c..beae030e80b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -183,6 +183,7 @@ struct rpc_xprt { #if defined(CONFIG_NFS_V4_1) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ + unsigned int bc_alloc_count; /* Total number of preallocs */ spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5369aa369b3..4a01f9684b8 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,5 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c new file mode 100644 index 00000000000..f56e18a2349 --- /dev/null +++ b/net/sunrpc/backchannel_rqst.c @@ -0,0 +1,278 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#include +#include + +#ifdef RPC_DEBUG +#define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +#if defined(CONFIG_NFS_V4_1) + +/* + * Helper routines that track the number of preallocation elements + * on the transport. + */ +static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) +{ + return xprt->bc_alloc_count > 0; +} + +static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + xprt->bc_alloc_count += n; +} + +static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + return xprt->bc_alloc_count -= n; +} + +/* + * Free the preallocated rpc_rqst structure and the memory + * buffers hanging off of it. + */ +static void xprt_free_allocation(struct rpc_rqst *req) +{ + struct xdr_buf *xbufp; + + dprintk("RPC: free allocations for req= %p\n", req); + BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + xbufp = &req->rq_private_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + xbufp = &req->rq_snd_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + list_del(&req->rq_bc_pa_list); + kfree(req); +} + +/* + * Preallocate up to min_reqs structures and related buffers for use + * by the backchannel. This function can be called multiple times + * when creating new sessions that use the same rpc_xprt. The + * preallocated buffers are added to the pool of resources used by + * the rpc_xprt. Anyone of these resources may be used used by an + * incoming callback request. It's up to the higher levels in the + * stack to enforce that the maximum number of session slots is not + * being exceeded. + * + * Some callback arguments can be large. For example, a pNFS server + * using multiple deviceids. The list can be unbound, but the client + * has the ability to tell the server the maximum size of the callback + * requests. Each deviceID is 16 bytes, so allocate one page + * for the arguments to have enough room to receive a number of these + * deviceIDs. The NFS client indicates to the pNFS server that its + * callback requests can be up to 4096 bytes in size. + */ +int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) +{ + struct page *page_rcv = NULL, *page_snd = NULL; + struct xdr_buf *xbufp = NULL; + struct rpc_rqst *req, *tmp; + struct list_head tmp_list; + int i; + + dprintk("RPC: setup backchannel transport\n"); + + /* + * We use a temporary list to keep track of the preallocated + * buffers. Once we're done building the list we splice it + * into the backchannel preallocation list off of the rpc_xprt + * struct. This helps minimize the amount of time the list + * lock is held on the rpc_xprt struct. It also makes cleanup + * easier in case of memory allocation errors. + */ + INIT_LIST_HEAD(&tmp_list); + for (i = 0; i < min_reqs; i++) { + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (req == NULL) { + printk(KERN_ERR "Failed to create bc rpc_rqst\n"); + goto out_free; + } + + /* Add the allocated buffer to the tmp list */ + dprintk("RPC: adding req= %p\n", req); + list_add(&req->rq_bc_pa_list, &tmp_list); + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + page_rcv = alloc_page(GFP_KERNEL); + if (page_rcv == NULL) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + xbufp = &req->rq_rcv_buf; + xbufp->head[0].iov_base = page_address(page_rcv); + xbufp->head[0].iov_len = PAGE_SIZE; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = PAGE_SIZE; + xbufp->buflen = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + page_snd = alloc_page(GFP_KERNEL); + if (page_snd == NULL) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + + xbufp = &req->rq_snd_buf; + xbufp->head[0].iov_base = page_address(page_snd); + xbufp->head[0].iov_len = 0; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = 0; + xbufp->buflen = PAGE_SIZE; + } + + /* + * Add the temporary list to the backchannel preallocation list + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_splice(&tmp_list, &xprt->bc_pa_list); + xprt_inc_alloc_count(xprt, min_reqs); + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: setup backchannel transport done\n"); + return 0; + +out_free: + /* + * Memory allocation failed, free the temporary list + */ + list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) + xprt_free_allocation(req); + + dprintk("RPC: setup backchannel transport failed\n"); + return -1; +} +EXPORT_SYMBOL(xprt_setup_backchannel); + +/* + * Destroys the backchannel preallocated structures. + * Since these structures may have been allocated by multiple calls + * to xprt_setup_backchannel, we only destroy up to the maximum number + * of reqs specified by the caller. + * @xprt: the transport holding the preallocated strucures + * @max_reqs the maximum number of preallocated structures to destroy + */ +void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) +{ + struct rpc_rqst *req = NULL, *tmp = NULL; + + dprintk("RPC: destroy backchannel transport\n"); + + BUG_ON(max_reqs == 0); + spin_lock_bh(&xprt->bc_pa_lock); + xprt_dec_alloc_count(xprt, max_reqs); + list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { + dprintk("RPC: req=%p\n", req); + xprt_free_allocation(req); + if (--max_reqs == 0) + break; + } + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: backchannel list empty= %s\n", + list_empty(&xprt->bc_pa_list) ? "true" : "false"); +} +EXPORT_SYMBOL(xprt_destroy_backchannel); + +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + + dprintk("RPC: allocate a backchannel request\n"); + spin_lock_bh(&xprt->bc_pa_lock); + if (!list_empty(&xprt->bc_pa_list)) { + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + list_del(&req->rq_bc_pa_list); + } else { + req = NULL; + } + spin_unlock_bh(&xprt->bc_pa_lock); + + if (req != NULL) { + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + req->rq_received = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(req->rq_private_buf)); + } + dprintk("RPC: backchannel req=%p\n", req); + return req; +} + +/* + * Return the preallocated rpc_rqst structure and XDR buffers + * associated with this rpc_task. + */ +void xprt_free_bc_request(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + + dprintk("RPC: free backchannel req=%p\n", req); + + smp_mb__before_clear_bit(); + BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + smp_mb__after_clear_bit(); + + if (!xprt_need_to_requeue(xprt)) { + /* + * The last remaining session was destroyed while this + * entry was in use. Free the entry and don't attempt + * to add back to the list because there is no need to + * have anymore preallocated entries. + */ + dprintk("RPC: Last session removed req=%p\n", req); + xprt_free_allocation(req); + return; + } + + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); +} + +#endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.3-70-g09d2 From 44b98efdd0a205bdca2cb63493350d06ff6804b1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:02 -0400 Subject: nfs41: New xs_tcp_read_data() Handles RPC replies and backchannel callbacks. Traditionally the NFS client has expected only RPC replies on its open connections. With NFSv4.1, callbacks can arrive over an existing open connection. This patch refactors the old xs_tcp_read_request() into an RPC reply handler: xs_tcp_read_reply(), a new backchannel callback handler: xs_tcp_read_callback(), and a common routine to read the data off the transport: xs_tcp_read_common(). The new xs_tcp_read_callback() queues callback requests onto a queue where the callback service (a separate thread) is listening for the processing. This patch incorporates work and suggestions from Rahul Iyer (iyer@netapp.com) and Benny Halevy (bhalevy@panasas.com). xs_tcp_read_callback() drops the connection when the number of expected callbacks is exceeded. Use xprt_force_disconnect(), ensuring tasks on the pending queue are awaken on disconnect. [nfs41: Keep track of RPC call/reply direction with a flag] [nfs41: Preallocate rpc_rqst receive buffer for handling callbacks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: sunrpc: xs_tcp_read_callback() should use xprt_force_disconnect()] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Moves embedded #ifdefs into #ifdef function blocks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprtsock.c | 144 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 126 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a48df1449ec..e3e3a57116f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -34,6 +34,9 @@ #include #include #include +#ifdef CONFIG_NFS_V4_1 +#include +#endif #include #include @@ -1044,25 +1047,16 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, xs_tcp_check_fraghdr(transport); } -static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_common(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc, + struct rpc_rqst *req) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *rcvbuf; size_t len; ssize_t r; - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->transport_lock); - req = xprt_lookup_rqst(xprt, transport->tcp_xid); - if (!req) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->tcp_xid)); - spin_unlock(&xprt->transport_lock); - return; - } - rcvbuf = &req->rq_private_buf; if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { @@ -1114,7 +1108,7 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea "tcp_offset = %u, tcp_reclen = %u\n", xprt, transport->tcp_copied, transport->tcp_offset, transport->tcp_reclen); - goto out; + return; } dprintk("RPC: XID %08x read %Zd bytes\n", @@ -1130,11 +1124,125 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_flags &= ~TCP_RCV_COPY_DATA; } -out: + return; +} + +/* + * Finds the request corresponding to the RPC xid and invokes the common + * tcp read code to read the data. + */ +static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); + + /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_rqst(xprt, transport->tcp_xid); + if (!req) { + dprintk("RPC: XID %08x request not found!\n", + ntohl(transport->tcp_xid)); + spin_unlock(&xprt->transport_lock); + return -1; + } + + xs_tcp_read_common(xprt, desc, req); + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); - xs_tcp_check_fraghdr(transport); + return 0; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Obtains an rpc_rqst previously allocated and invokes the common + * tcp read code to read the data. The result is placed in the callback + * queue. + * If we're unable to obtain the rpc_rqst we schedule the closing of the + * connection and return -1. + */ +static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + req = xprt_alloc_bc_request(xprt); + if (req == NULL) { + printk(KERN_WARNING "Callback slot table overflowed\n"); + xprt_force_disconnect(xprt); + return -1; + } + + req->rq_xid = transport->tcp_xid; + dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); + xs_tcp_read_common(xprt, desc, req); + + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { + struct svc_serv *bc_serv = xprt->bc_serv; + + /* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + spin_unlock(&bc_serv->sv_cb_lock); + wake_up(&bc_serv->sv_cb_waitq); + } + + req->rq_private_buf.len = transport->tcp_copied; + + return 0; +} + +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + return (transport->tcp_flags & TCP_RPC_REPLY) ? + xs_tcp_read_reply(xprt, desc) : + xs_tcp_read_callback(xprt, desc); +} +#else +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + return xs_tcp_read_reply(xprt, desc); +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Read data off the transport. This can be either an RPC_CALL or an + * RPC_REPLY. Relay the processing to helper functions. + */ +static void xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + if (_xs_tcp_read_data(xprt, desc) == 0) + xs_tcp_check_fraghdr(transport); + else { + /* + * The transport_lock protects the request handling. + * There's no need to hold it to update the tcp_flags. + */ + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + } } static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) @@ -1181,7 +1289,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns } /* Read in the request data */ if (transport->tcp_flags & TCP_RCV_COPY_DATA) { - xs_tcp_read_request(xprt, &desc); + xs_tcp_read_data(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ -- cgit v1.2.3-70-g09d2 From 88b5ed73bcd0f21e008b6e303a02c8b7cb1199f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2009 13:22:57 -0700 Subject: SUNRPC: Fix a missing "break" option in xs_tcp_setup_socket() In the case of -EADDRNOTAVAIL and/or unhandled connection errors, we want to get rid of the existing socket and retry immediately, just as the comment says. Currently we end up sleeping for a minute, due to the missing "break" statement. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6c2d6158655..f7f3dfd211e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1792,6 +1792,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, */ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); xprt_force_disconnect(xprt); + break; case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: -- cgit v1.2.3-70-g09d2 From 55ae1aabfb108106dd095de2578ceef1c755a8b8 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:03 -0400 Subject: nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- include/linux/sunrpc/sched.h | 2 + include/linux/sunrpc/xprt.h | 12 +++++ net/sunrpc/clnt.c | 117 ++++++++++++++++++++++++++++++++++++++++++- net/sunrpc/stats.c | 6 ++- net/sunrpc/sunrpc.h | 37 ++++++++++++++ net/sunrpc/xprt.c | 38 +++++++++++--- 6 files changed, 203 insertions(+), 9 deletions(-) create mode 100644 net/sunrpc/sunrpc.h (limited to 'net') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 177376880fa..401097781fc 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -210,6 +210,8 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + const struct rpc_call_ops *ops); void rpc_put_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_release_calldata(const struct rpc_call_ops *, void *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index beae030e80b..55c6c37e249 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -215,6 +215,18 @@ struct rpc_xprt { /* buffer in use */ #endif /* CONFIG_NFS_V4_1 */ +#if defined(CONFIG_NFS_V4_1) +static inline int bc_prealloc(struct rpc_rqst *req) +{ + return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); +} +#else +static inline int bc_prealloc(struct rpc_rqst *req) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_1 */ + struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ struct sockaddr * srcaddr; /* optional local address */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index aca3ab6fc14..f3e93b8eb90 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,7 +36,9 @@ #include #include #include +#include +#include "sunrpc.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -63,6 +65,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); +#if defined(CONFIG_NFS_V4_1) +static void call_bc_transmit(struct rpc_task *task); +#endif /* CONFIG_NFS_V4_1 */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -613,6 +618,50 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); +#if defined(CONFIG_NFS_V4_1) +/** + * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run + * rpc_execute against it + * @ops: RPC call ops + */ +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + const struct rpc_call_ops *tk_ops) +{ + struct rpc_task *task; + struct xdr_buf *xbufp = &req->rq_snd_buf; + struct rpc_task_setup task_setup_data = { + .callback_ops = tk_ops, + }; + + dprintk("RPC: rpc_run_bc_task req= %p\n", req); + /* + * Create an rpc_task to send the data + */ + task = rpc_new_task(&task_setup_data); + if (!task) { + xprt_free_bc_request(req); + goto out; + } + task->tk_rqstp = req; + + /* + * Set up the xdr_buf length. + * This also indicates that the buffer is XDR encoded already. + */ + xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + + xbufp->tail[0].iov_len; + + task->tk_action = call_bc_transmit; + atomic_inc(&task->tk_count); + BUG_ON(atomic_read(&task->tk_count) != 2); + rpc_execute(task); + +out: + dprintk("RPC: rpc_run_bc_task: task= %p\n", task); + return task; +} +#endif /* CONFIG_NFS_V4_1 */ + void rpc_call_start(struct rpc_task *task) { @@ -1098,7 +1147,7 @@ call_transmit(struct rpc_task *task) * in order to allow access to the socket to other RPC requests. */ call_transmit_status(task); - if (task->tk_msg.rpc_proc->p_decode != NULL) + if (rpc_reply_expected(task)) return; task->tk_action = rpc_exit_task; rpc_wake_up_queued_task(&task->tk_xprt->pending, task); @@ -1133,6 +1182,72 @@ call_transmit_status(struct rpc_task *task) } } +#if defined(CONFIG_NFS_V4_1) +/* + * 5b. Send the backchannel RPC reply. On error, drop the reply. In + * addition, disconnect on connectivity errors. + */ +static void +call_bc_transmit(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + BUG_ON(task->tk_status != 0); + task->tk_status = xprt_prepare_transmit(task); + if (task->tk_status == -EAGAIN) { + /* + * Could not reserve the transport. Try again after the + * transport is released. + */ + task->tk_status = 0; + task->tk_action = call_bc_transmit; + return; + } + + task->tk_action = rpc_exit_task; + if (task->tk_status < 0) { + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + return; + } + + xprt_transmit(task); + xprt_end_transmit(task); + dprint_status(task); + switch (task->tk_status) { + case 0: + /* Success */ + break; + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -ETIMEDOUT: + /* + * Problem reaching the server. Disconnect and let the + * forechannel reestablish the connection. The server will + * have to retransmit the backchannel request and we'll + * reprocess it. Since these ops are idempotent, there's no + * need to cache our reply at this time. + */ + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); + break; + default: + /* + * We were unable to reply and will have to drop the + * request. The server should reconnect and retransmit. + */ + BUG_ON(task->tk_status == -EAGAIN); + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + break; + } + rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +} +#endif /* CONFIG_NFS_V4_1 */ + /* * 6. Sort out the RPC call status */ diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 1ef6e46d9da..8487aa0f1f5 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -141,12 +141,14 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); void rpc_count_iostats(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *stats = task->tk_client->cl_metrics; + struct rpc_iostats *stats; struct rpc_iostats *op_metrics; long rtt, execute, queue; - if (!stats || !req) + if (!task->tk_client || !task->tk_client->cl_metrics || !req) return; + + stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h new file mode 100644 index 00000000000..5d9dd742264 --- /dev/null +++ b/net/sunrpc/sunrpc.h @@ -0,0 +1,37 @@ +/****************************************************************************** + +(c) 2008 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * Functions and macros used internally by RPC + */ + +#ifndef _NET_SUNRPC_SUNRPC_H +#define _NET_SUNRPC_SUNRPC_H + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + +#endif /* _NET_SUNRPC_SUNRPC_H */ + diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 52739f82df1..0eea2bfe111 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -12,8 +12,9 @@ * - Next, the caller puts together the RPC message, stuffs it into * the request struct, and calls xprt_transmit(). * - xprt_transmit sends the message and installs the caller on the - * transport's wait list. At the same time, it installs a timer that - * is run after the packet's timeout has expired. + * transport's wait list. At the same time, if a reply is expected, + * it installs a timer that is run after the packet's timeout has + * expired. * - When a packet arrives, the data_ready handler walks the list of * pending requests for that transport. If a matching XID is found, the * caller is woken up, and the timer removed. @@ -46,6 +47,8 @@ #include #include +#include "sunrpc.h" + /* * Local variables */ @@ -873,7 +876,10 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); if (!req->rq_received) { - if (list_empty(&req->rq_list)) { + if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { + /* + * Add to the list only if we're expecting a reply + */ spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, @@ -908,8 +914,13 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_received) + else if (!req->rq_received && rpc_reply_expected(task)) { + /* + * Sleep on the pending queue since + * we're expecting a reply. + */ rpc_sleep_on(&xprt->pending, task, xprt_timer); + } spin_unlock_bh(&xprt->transport_lock); } @@ -982,11 +993,17 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) */ void xprt_release(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; struct rpc_rqst *req; + int is_bc_request; if (!(req = task->tk_rqstp)) return; + + /* Preallocated backchannel request? */ + is_bc_request = bc_prealloc(req); + + xprt = req->rq_xprt; rpc_count_iostats(task); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); @@ -999,10 +1016,19 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); - xprt->ops->buf_free(req->rq_buffer); + if (!bc_prealloc(req)) + xprt->ops->buf_free(req->rq_buffer); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); + + /* + * Early exit if this is a backchannel preallocated request. + * There is no need to have it added to the RPC slot list. + */ + if (is_bc_request) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %5u release request %p\n", task->tk_pid, req); -- cgit v1.2.3-70-g09d2 From 0d90ba1cd416525c4825c111db862d8b15a02e9b Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:04 -0400 Subject: nfs41: Backchannel callback service helper routines Executes the backchannel task on the RPC state machine using the existing open connection previously established by the client. Signed-off-by: Ricardo Labiaga nfs41: Add bc_svc.o to sunrpc Makefile. [nfs41: bc_send() does not need to be exported outside RPC module] [nfs41: xprt_free_bc_request() need not be exported outside RPC module] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Update copyright] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- include/linux/sunrpc/bc_xprt.h | 3 ++ net/sunrpc/Makefile | 2 +- net/sunrpc/bc_svc.c | 81 ++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 3 ++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 net/sunrpc/bc_svc.c (limited to 'net') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 5965ae4f902..6508f0dc0ef 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -29,12 +29,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #ifdef CONFIG_NFS_V4_1 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs); +void bc_release_request(struct rpc_task *); +int bc_send(struct rpc_rqst *req); #else /* CONFIG_NFS_V4_1 */ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 4a01f9684b8..db73fd2a3f0 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o -sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o +sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c new file mode 100644 index 00000000000..13f214f5312 --- /dev/null +++ b/net/sunrpc/bc_svc.c @@ -0,0 +1,81 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * The NFSv4.1 callback service helper routines. + * They implement the transport level processing required to send the + * reply over an existing open connection previously established by the client. + */ + +#if defined(CONFIG_NFS_V4_1) + +#include + +#include +#include +#include + +#define RPCDBG_FACILITY RPCDBG_SVCDSP + +void bc_release_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + dprintk("RPC: bc_release_request: task= %p\n", task); + + /* + * Release this request only if it's a backchannel + * preallocated request + */ + if (!bc_prealloc(req)) + return; + xprt_free_bc_request(req); +} + +/* Empty callback ops */ +static const struct rpc_call_ops nfs41_callback_ops = { +}; + + +/* + * Send the callback reply + */ +int bc_send(struct rpc_rqst *req) +{ + struct rpc_task *task; + int ret; + + dprintk("RPC: bc_send req= %p\n", req); + task = rpc_run_bc_task(req, &nfs41_callback_ops); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + BUG_ON(atomic_read(&task->tk_count) != 1); + ret = task->tk_status; + rpc_put_task(task); + } + return ret; + dprintk("RPC: bc_send ret= %d \n", ret); +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e3e3a57116f..8a721867b60 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2183,6 +2183,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, +#if defined(CONFIG_NFS_V4_1) + .release_request = bc_release_request, +#endif /* CONFIG_NFS_V4_1 */ .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, -- cgit v1.2.3-70-g09d2 From 1cad7ea6fe98dc414bd3df55275c147bd15ebf97 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:06 -0400 Subject: nfs41: Refactor svc_process() net/sunrpc/svc.c:svc_process() is used by the NFSv4 callback service to process RPC requests arriving over connections initiated by the server. NFSv4.1 supports callbacks over the backchannel on connections initiated by the client. This patch refactors svc_process() so that common code can also be used by the backchannel. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/svc.c | 80 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 8847add6ca1..bfda66db2f4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -970,20 +970,18 @@ svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) } /* - * Process the RPC request. + * Common routine for processing the RPC request. */ -int -svc_process(struct svc_rqst *rqstp) +static int +svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) { struct svc_program *progp; struct svc_version *versp = NULL; /* compiler food */ struct svc_procedure *procp = NULL; - struct kvec * argv = &rqstp->rq_arg.head[0]; - struct kvec * resv = &rqstp->rq_res.head[0]; struct svc_serv *serv = rqstp->rq_server; kxdrproc_t xdr; __be32 *statp; - u32 dir, prog, vers, proc; + u32 prog, vers, proc; __be32 auth_stat, rpc_stat; int auth_res; __be32 *reply_statp; @@ -993,19 +991,6 @@ svc_process(struct svc_rqst *rqstp) if (argv->iov_len < 6*4) goto err_short_len; - /* setup response xdr_buf. - * Initially it has just one page - */ - rqstp->rq_resused = 1; - resv->iov_base = page_address(rqstp->rq_respages[0]); - resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages + 1; - rqstp->rq_res.len = 0; - rqstp->rq_res.page_base = 0; - rqstp->rq_res.page_len = 0; - rqstp->rq_res.buflen = PAGE_SIZE; - rqstp->rq_res.tail[0].iov_base = NULL; - rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ @@ -1014,17 +999,13 @@ svc_process(struct svc_rqst *rqstp) /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); - rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = svc_getnl(argv); vers = svc_getnl(argv); /* First words of reply: */ svc_putnl(resv, 1); /* REPLY */ - if (dir != 0) /* direction != CALL */ - goto err_bad_dir; if (vers != 2) /* RPC version number */ goto err_bad_rpc; @@ -1147,7 +1128,7 @@ svc_process(struct svc_rqst *rqstp) sendit: if (svc_authorise(rqstp)) goto dropit; - return svc_send(rqstp); + return 1; /* Caller can now send it */ dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ @@ -1161,12 +1142,6 @@ err_short_len: goto dropit; /* drop request */ -err_bad_dir: - svc_printk(rqstp, "bad direction %d, dropping request\n", dir); - - serv->sv_stats->rpcbadfmt++; - goto dropit; /* drop request */ - err_bad_rpc: serv->sv_stats->rpcbadfmt++; svc_putnl(resv, 1); /* REJECT */ @@ -1219,6 +1194,51 @@ err_bad: } EXPORT_SYMBOL_GPL(svc_process); +/* + * Process the RPC request. + */ +int +svc_process(struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct svc_serv *serv = rqstp->rq_server; + u32 dir; + int error; + + /* + * Setup response xdr_buf. + * Initially it has just one page + */ + rqstp->rq_resused = 1; + resv->iov_base = page_address(rqstp->rq_respages[0]); + resv->iov_len = 0; + rqstp->rq_res.pages = rqstp->rq_respages + 1; + rqstp->rq_res.len = 0; + rqstp->rq_res.page_base = 0; + rqstp->rq_res.page_len = 0; + rqstp->rq_res.buflen = PAGE_SIZE; + rqstp->rq_res.tail[0].iov_base = NULL; + rqstp->rq_res.tail[0].iov_len = 0; + + rqstp->rq_xid = svc_getu32(argv); + + dir = svc_getnl(argv); + if (dir != 0) { + /* direction != CALL */ + svc_printk(rqstp, "bad direction %d, dropping request\n", dir); + serv->sv_stats->rpcbadfmt++; + svc_drop(rqstp); + return 0; + } + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + return svc_send(rqstp); +} + /* * Return (transport-specific) limit on the rpc payload. */ -- cgit v1.2.3-70-g09d2 From 4d6bbb6233c9cf23822a2f66f8470c9f40854b77 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:07 -0400 Subject: nfs41: Backchannel bc_svc_process() Implement the NFSv4.1 backchannel service. Invokes the common callback processing logic svc_process_common() to authenticate the call and dispatch the appropriate NFSv4.1 XDR decoder and operation procedure. It then invokes bc_send() to send the reply over the same connection. bc_send() is implemented in a separate patch. At this time there is no slot validation or reply cache handling. [nfs41: Preallocate rpc_rqst receive buffer for handling callbacks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Move bc_svc_process() declaration to correct patch] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- include/linux/sunrpc/svc.h | 2 ++ net/sunrpc/svc.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 4a8afbd6200..16043c4a8bf 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -419,6 +419,8 @@ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); +int bc_svc_process(struct svc_serv *, struct rpc_rqst *, + struct svc_rqst *); int svc_register(const struct svc_serv *, const int, const unsigned short, const unsigned short); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bfda66db2f4..06b52e465f4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -25,6 +25,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCDSP @@ -1239,6 +1240,54 @@ svc_process(struct svc_rqst *rqstp) return svc_send(rqstp); } +#if defined(CONFIG_NFS_V4_1) +/* + * Process a backchannel RPC request that arrived over an existing + * outbound connection + */ +int +bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, + struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + int error; + + /* Build the svc_rqst used by the common processing routine */ + rqstp->rq_xid = req->rq_xid; + rqstp->rq_prot = req->rq_xprt->prot; + rqstp->rq_server = serv; + + rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); + memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); + memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); + memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); + + /* reset result send buffer "put" position */ + resv->iov_len = 0; + + if (rqstp->rq_prot != IPPROTO_TCP) { + printk(KERN_ERR "No support for Non-TCP transports!\n"); + BUG(); + } + + /* + * Skip the next two words because they've already been + * processed in the trasport + */ + svc_getu32(argv); /* XID */ + svc_getnl(argv); /* CALLDIR */ + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + return bc_send(req); +} +EXPORT_SYMBOL(bc_svc_process); +#endif /* CONFIG_NFS_V4_1 */ + /* * Return (transport-specific) limit on the rpc payload. */ -- cgit v1.2.3-70-g09d2 From 7652e5a09ba319241607b22d9055ce93fd5b8039 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 1 Apr 2009 09:23:09 -0400 Subject: nfs41: sunrpc: provide functions to create and destroy a svc_xprt for backchannel use For nfs41 callbacks we need an svc_xprt to process requests coming up the backchannel socket as rpc_rqst's that are transformed into svc_rqst's that need a rq_xprt to be processed. The svc_{udp,tcp}_create methods are too heavy for this job as svc_create_socket creates an actual socket to listen on while for nfs41 we're "reusing" the fore channel's socket. Signed-off-by: Benny Halevy --- include/linux/sunrpc/svcsock.h | 2 ++ net/sunrpc/svcsock.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 483e10380aa..6bb1ec4ae31 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -42,6 +42,8 @@ int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); int svc_addsock(struct svc_serv *serv, int fd, char *name_return); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); +void svc_sock_destroy(struct svc_xprt *); /* * svc_makesock socket characteristics diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9d504234af4..a2a03e50053 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1327,3 +1327,42 @@ static void svc_sock_free(struct svc_xprt *xprt) sock_release(svsk->sk_sock); kfree(svsk); } + +/* + * Create a svc_xprt. + * + * For internal use only (e.g. nfsv4.1 backchannel). + * Callers should typically use the xpo_create() method. + */ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +{ + struct svc_sock *svsk; + struct svc_xprt *xprt = NULL; + + dprintk("svc: %s\n", __func__); + svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); + if (!svsk) + goto out; + + xprt = &svsk->sk_xprt; + if (prot == IPPROTO_TCP) + svc_xprt_init(&svc_tcp_class, xprt, serv); + else if (prot == IPPROTO_UDP) + svc_xprt_init(&svc_udp_class, xprt, serv); + else + BUG(); +out: + dprintk("svc: %s return %p\n", __func__, xprt); + return xprt; +} +EXPORT_SYMBOL_GPL(svc_sock_create); + +/* + * Destroy a svc_sock. + */ +void svc_sock_destroy(struct svc_xprt *xprt) +{ + if (xprt) + kfree(container_of(xprt, struct svc_sock, sk_xprt)); +} +EXPORT_SYMBOL_GPL(svc_sock_destroy); -- cgit v1.2.3-70-g09d2 From 9c9f3f5fa62cc4959e4d4d1cf1ec74f2d6ac1197 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 1 Apr 2009 09:23:10 -0400 Subject: nfs41: sunrpc: add a struct svc_xprt pointer to struct svc_serv for backchannel use This svc_xprt is passed on to the callback service thread to be later used to processes incoming svc_rqst's Signed-off-by: Benny Halevy --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 16043c4a8bf..ea8009695c6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -103,6 +103,7 @@ struct svc_serv { spinlock_t sv_cb_lock; /* protects the svc_cb_list */ wait_queue_head_t sv_cb_waitq; /* sleep here if there are no * entries in the svc_cb_list */ + struct svc_xprt *bc_xprt; #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 06b52e465f4..b35048fabe2 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -487,6 +487,10 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); +#if defined(CONFIG_NFS_V4_1) + svc_sock_destroy(serv->bc_xprt); +#endif /* CONFIG_NFS_V4_1 */ + svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); -- cgit v1.2.3-70-g09d2 From 8f975242352e92898dc641ebff0d24808f39848a Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 1 Apr 2009 09:23:11 -0400 Subject: nfs41: create a svc_xprt for nfs41 callback thread and use for incoming callbacks Signed-off-by: Benny Halevy --- fs/nfs/callback.c | 17 ++++++++++++++++- net/sunrpc/svc.c | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 47092889806..37815f3216a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -185,16 +185,31 @@ nfs41_callback_svc(void *vrqstp) struct svc_rqst * nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { + struct svc_xprt *bc_xprt; + struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); + + dprintk("--> %s\n", __func__); + /* Create a svc_sock for the service */ + bc_xprt = svc_sock_create(serv, xprt->prot); + if (!bc_xprt) + goto out; + /* * Save the svc_serv in the transport so that it can * be referenced when the session backchannel is initialized */ + serv->bc_xprt = bc_xprt; xprt->bc_serv = serv; INIT_LIST_HEAD(&serv->sv_cb_list); spin_lock_init(&serv->sv_cb_lock); init_waitqueue_head(&serv->sv_cb_waitq); - return svc_prepare_thread(serv, &serv->sv_pools[0]); + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) + svc_sock_destroy(bc_xprt); +out: + dprintk("--> %s return %p\n", __func__, rqstp); + return rqstp; } static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b35048fabe2..6b90ce439c0 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1258,6 +1258,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, int error; /* Build the svc_rqst used by the common processing routine */ + rqstp->rq_xprt = serv->bc_xprt; rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; rqstp->rq_server = serv; -- cgit v1.2.3-70-g09d2 From 343952fa5aac888934ffc203abed26a823400eb6 Mon Sep 17 00:00:00 2001 From: Rahul Iyer Date: Wed, 1 Apr 2009 09:23:17 -0400 Subject: nfs41: Get the rpc_xprt * from the rpc_rqst instead of the rpc_clnt. Obtain the rpc_xprt from the rpc_rqst so that calls and callback replies can both use the same code path. A client needs the rpc_xprt in order to reply to a callback. Signed-off-by: Rahul Iyer Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0eea2bfe111..c144611223f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -195,8 +195,8 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); */ int xprt_reserve_xprt(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -858,7 +858,7 @@ out_unlock: void xprt_end_transmit(struct rpc_task *task) { - xprt_release_write(task->tk_xprt, task); + xprt_release_write(task->tk_rqstp->rq_xprt, task); } /** -- cgit v1.2.3-70-g09d2 From dd2b63d049480979016b959abc2d141cdddb1389 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:28 -0400 Subject: nfs41: Rename rq_received to rq_reply_bytes_recvd The 'rq_received' member of 'struct rpc_rqst' is used to track when we have received a reply to our request. With v4.1, the backchannel can now accept callback requests over the existing connection. Rename this field to make it clear that it is only used for tracking reply bytes and not all bytes received on the connection. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/clnt.c | 8 ++++---- net/sunrpc/stats.c | 2 +- net/sunrpc/xprt.c | 15 ++++++++------- 5 files changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 55c6c37e249..1175d58efc2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -67,7 +67,8 @@ struct rpc_rqst { struct rpc_task * rq_task; /* RPC task data */ __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ - int rq_received; /* receive completed */ + int rq_reply_bytes_recvd; /* number of reply */ + /* bytes received */ u32 rq_seqno; /* gss seq no. used on req. */ int rq_enc_pages_num; struct page **rq_enc_pages; /* scratch pages for use by diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index f56e18a2349..5a7d342e308 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -230,7 +230,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) if (req != NULL) { set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); - req->rq_received = 0; + req->rq_reply_bytes_recvd = 0; req->rq_bytes_sent = 0; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f3e93b8eb90..5bc2f45bddf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1258,8 +1258,8 @@ call_status(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; int status; - if (req->rq_received > 0 && !req->rq_bytes_sent) - task->tk_status = req->rq_received; + if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent) + task->tk_status = req->rq_reply_bytes_recvd; dprint_status(task); @@ -1376,7 +1376,7 @@ call_decode(struct rpc_task *task) /* * Ensure that we see all writes made by xprt_complete_rqst() - * before it changed req->rq_received. + * before it changed req->rq_reply_bytes_recvd. */ smp_rmb(); req->rq_rcv_buf.len = req->rq_private_buf.len; @@ -1417,7 +1417,7 @@ out_retry: task->tk_status = 0; /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { - req->rq_received = req->rq_rcv_buf.len = 0; + req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 8487aa0f1f5..1b4e6791ecf 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -156,7 +156,7 @@ void rpc_count_iostats(struct rpc_task *task) op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += task->tk_bytes_sent; - op_metrics->om_bytes_recv += req->rq_received; + op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; queue = (long)req->rq_xtime - task->tk_start; if (queue < 0) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c144611223f..f412a852bc7 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -806,9 +806,10 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) list_del_init(&req->rq_list); req->rq_private_buf.len = copied; - /* Ensure all writes are done before we update req->rq_received */ + /* Ensure all writes are done before we update */ + /* req->rq_reply_bytes_recvd */ smp_wmb(); - req->rq_received = copied; + req->rq_reply_bytes_recvd = copied; rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -823,7 +824,7 @@ static void xprt_timer(struct rpc_task *task) dprintk("RPC: %5u xprt_timer\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (!req->rq_received) { + if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) xprt->ops->timer(task); } else @@ -845,8 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (req->rq_received && !req->rq_bytes_sent) { - err = req->rq_received; + if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { + err = req->rq_reply_bytes_recvd; goto out_unlock; } if (!xprt->ops->reserve_xprt(task)) @@ -875,7 +876,7 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_received) { + if (!req->rq_reply_bytes_recvd) { if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { /* * Add to the list only if we're expecting a reply @@ -914,7 +915,7 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_received && rpc_reply_expected(task)) { + else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { /* * Sleep on the pending queue since * we're expecting a reply. -- cgit v1.2.3-70-g09d2 From 19633e129c65e5bb62b1af545c5479afcdb01fc4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 17 Jun 2009 05:23:27 +0000 Subject: skbuff: skb_mac_header_was_set is always true on >32 bit Looking at the crash in log_martians(), one suspect is that the check for mac header being set is not correct. The value of mac_header defaults to 0 on allocation, therefore skb_mac_header_was_set will always be true on platforms using NET_SKBUFF_USES_OFFSET. Signed-off-by: Stephen Hemminger Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a94a303737..436695d8deb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -201,6 +201,10 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->mac_header = ~0U; +#endif + /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); -- cgit v1.2.3-70-g09d2 From 603a8bbe62e54108055fca46ecdd611c10c6cd0a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 17 Jun 2009 12:17:34 +0000 Subject: skbuff: don't corrupt mac_header on skb expansion The skb mac_header field is sometimes NULL (or ~0u) as a sentinel value. The places where skb is expanded add an offset which would change this flag into an invalid pointer (or offset). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/skbuff.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 436695d8deb..a4c01f5c658 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -661,7 +661,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; - new->mac_header += offset; + if (skb_mac_header_was_set(new)) + new->mac_header += offset; #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -843,7 +844,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->tail += off; skb->transport_header += off; skb->network_header += off; - skb->mac_header += off; + if (skb_mac_header_was_set(skb)) + skb->mac_header += off; skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; @@ -935,7 +937,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; - n->mac_header += off; + if (skb_mac_header_was_set(skb)) + n->mac_header += off; #endif return n; -- cgit v1.2.3-70-g09d2 From b964758050f856e44f5fe645d03bea8a1b0b66bd Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 16 Jun 2009 08:33:55 +0000 Subject: pkt_sched: Update drops stats in act_police Action police statistics could be misleading because drops are not shown when expected. With feedback from: Jamal Hadi Salim Reported-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_police.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f8f047b6124..723964c3ee4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -294,6 +294,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } @@ -327,6 +329,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, } police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -- cgit v1.2.3-70-g09d2 From 7b85576d15bf2574b0a451108f59f9ad4170dd3f Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 18 Jun 2009 00:28:51 -0700 Subject: ipv4: Fix fib_trie rebalancing, part 2 My previous patch, which explicitly delays freeing of tnodes by adding them to the list to flush them after the update is finished, isn't strict enough. It treats exceptionally tnodes without parent, assuming they are newly created, so "invisible" for the read side yet. But the top tnode doesn't have parent as well, so we have to exclude all exceptions (at least until a better way is found). Additionally we need to move rcu assignment of this node before flushing, so the return type of the trie_rebalance() function is changed. Reported-by: Yan Zheng Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d1a39b1277d..012cf5a6858 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -391,13 +391,8 @@ static inline void tnode_free(struct tnode *tn) static void tnode_free_safe(struct tnode *tn) { BUG_ON(IS_LEAF(tn)); - - if (node_parent((struct node *) tn)) { - tn->tnode_free = tnode_free_head; - tnode_free_head = tn; - } else { - tnode_free(tn); - } + tn->tnode_free = tnode_free_head; + tnode_free_head = tn; } static void tnode_free_flush(void) @@ -1009,7 +1004,7 @@ fib_find_node(struct trie *t, u32 key) return NULL; } -static struct node *trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; t_key cindex, key; @@ -1033,12 +1028,13 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) } /* Handle last (top) tnode */ - if (IS_TNODE(tn)) { + if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - tnode_free_flush(); - } - return (struct node *)tn; + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); + + return; } /* only used from updater-side */ @@ -1186,7 +1182,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) /* Rebalance the trie */ - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); done: return fa_head; } @@ -1605,7 +1601,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); } else rcu_assign_pointer(t->trie, NULL); -- cgit v1.2.3-70-g09d2 From 31278e71471399beaff9280737e52b47db4dc345 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 17 Jun 2009 01:12:19 +0000 Subject: net: group address list and its count This patch is inspired by patch recently posted by Johannes Berg. Basically what my patch does is to group list and a count of addresses into newly introduced structure netdev_hw_addr_list. This brings us two benefits: 1) struct net_device becames a bit nicer. 2) in the future there will be a possibility to operate with lists independently on netdevices (with exporting right functions). I wanted to introduce this patch before I'll post a multicast lists conversion. Signed-off-by: Jiri Pirko drivers/net/bnx2.c | 4 +- drivers/net/e1000/e1000_main.c | 4 +- drivers/net/ixgbe/ixgbe_main.c | 6 +- drivers/net/mv643xx_eth.c | 2 +- drivers/net/niu.c | 4 +- drivers/net/virtio_net.c | 10 ++-- drivers/s390/net/qeth_l2_main.c | 2 +- include/linux/netdevice.h | 17 +++-- net/core/dev.c | 130 ++++++++++++++++++-------------------- 9 files changed, 89 insertions(+), 90 deletions(-) Signed-off-by: David S. Miller --- drivers/net/bnx2.c | 4 +- drivers/net/e1000/e1000_main.c | 4 +- drivers/net/ixgbe/ixgbe_main.c | 6 +- drivers/net/mv643xx_eth.c | 2 +- drivers/net/niu.c | 4 +- drivers/net/virtio_net.c | 10 ++-- drivers/s390/net/qeth_l2_main.c | 2 +- include/linux/netdevice.h | 17 ++++-- net/core/dev.c | 130 +++++++++++++++++++--------------------- 9 files changed, 89 insertions(+), 90 deletions(-) (limited to 'net') diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 7e3738112c4..38f1c3375d7 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3552,14 +3552,14 @@ bnx2_set_rx_mode(struct net_device *dev) sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN; } - if (dev->uc_count > BNX2_MAX_UNICAST_ADDRESSES) { + if (dev->uc.count > BNX2_MAX_UNICAST_ADDRESSES) { rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS; sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN | BNX2_RPM_SORT_USER0_PROM_VLAN; } else if (!(dev->flags & IFF_PROMISC)) { /* Add all entries into to the match filter list */ i = 0; - list_for_each_entry(ha, &dev->uc_list, list) { + list_for_each_entry(ha, &dev->uc.list, list) { bnx2_set_mac_addr(bp, ha->addr, i + BNX2_START_UNICAST_ADDRESS_INDEX); sort_mode |= (1 << diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 8d36743c814..5e3356f8eb5 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2370,7 +2370,7 @@ static void e1000_set_rx_mode(struct net_device *netdev) rctl |= E1000_RCTL_VFE; } - if (netdev->uc_count > rar_entries - 1) { + if (netdev->uc.count > rar_entries - 1) { rctl |= E1000_RCTL_UPE; } else if (!(netdev->flags & IFF_PROMISC)) { rctl &= ~E1000_RCTL_UPE; @@ -2394,7 +2394,7 @@ static void e1000_set_rx_mode(struct net_device *netdev) */ i = 1; if (use_uc) - list_for_each_entry(ha, &netdev->uc_list, list) { + list_for_each_entry(ha, &netdev->uc.list, list) { if (i == rar_entries) break; e1000_rar_set(hw, ha->addr, i++); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index a551a96ce67..e756e220db3 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -2321,7 +2321,7 @@ static void ixgbe_set_rx_mode(struct net_device *netdev) IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); /* reprogram secondary unicast list */ - hw->mac.ops.update_uc_addr_list(hw, &netdev->uc_list); + hw->mac.ops.update_uc_addr_list(hw, &netdev->uc.list); /* reprogram multicast list */ addr_count = netdev->mc_count; @@ -5261,7 +5261,7 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) /** * ixgbe_add_sanmac_netdev - Add the SAN MAC address to the corresponding - * netdev->dev_addr_list + * netdev->dev_addrs * @netdev: network interface device structure * * Returns non-zero on failure @@ -5282,7 +5282,7 @@ static int ixgbe_add_sanmac_netdev(struct net_device *dev) /** * ixgbe_del_sanmac_netdev - Removes the SAN MAC address to the corresponding - * netdev->dev_addr_list + * netdev->dev_addrs * @netdev: network interface device structure * * Returns non-zero on failure diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index b4e18a58cb1..745ae8b4a2e 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1729,7 +1729,7 @@ static u32 uc_addr_filter_mask(struct net_device *dev) return 0; nibbles = 1 << (dev->dev_addr[5] & 0x0f); - list_for_each_entry(ha, &dev->uc_list, list) { + list_for_each_entry(ha, &dev->uc.list, list) { if (memcmp(dev->dev_addr, ha->addr, 5)) return 0; if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index fa61a12c5e1..d2146d4a10f 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -6376,7 +6376,7 @@ static void niu_set_rx_mode(struct net_device *dev) if ((dev->flags & IFF_ALLMULTI) || (dev->mc_count > 0)) np->flags |= NIU_FLAGS_MCAST; - alt_cnt = dev->uc_count; + alt_cnt = dev->uc.count; if (alt_cnt > niu_num_alt_addr(np)) { alt_cnt = 0; np->flags |= NIU_FLAGS_PROMISC; @@ -6385,7 +6385,7 @@ static void niu_set_rx_mode(struct net_device *dev) if (alt_cnt) { int index = 0; - list_for_each_entry(ha, &dev->uc_list, list) { + list_for_each_entry(ha, &dev->uc.list, list) { err = niu_set_alt_mac(np, index, ha->addr); if (err) printk(KERN_WARNING PFX "%s: Error %d " diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 52198f6797a..2a6e81d5b57 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -709,7 +709,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) allmulti ? "en" : "dis"); /* MAC filter - use one buffer for both lists */ - mac_data = buf = kzalloc(((dev->uc_count + dev->mc_count) * ETH_ALEN) + + mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) + (2 * sizeof(mac_data->entries)), GFP_ATOMIC); if (!buf) { dev_warn(&dev->dev, "No memory for MAC address buffer\n"); @@ -719,16 +719,16 @@ static void virtnet_set_rx_mode(struct net_device *dev) sg_init_table(sg, 2); /* Store the unicast list and count in the front of the buffer */ - mac_data->entries = dev->uc_count; + mac_data->entries = dev->uc.count; i = 0; - list_for_each_entry(ha, &dev->uc_list, list) + list_for_each_entry(ha, &dev->uc.list, list) memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN); sg_set_buf(&sg[0], mac_data, - sizeof(mac_data->entries) + (dev->uc_count * ETH_ALEN)); + sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN)); /* multicast list and count fill the end */ - mac_data = (void *)&mac_data->macs[dev->uc_count][0]; + mac_data = (void *)&mac_data->macs[dev->uc.count][0]; mac_data->entries = dev->mc_count; addr = dev->mc_list; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index ecd3d06c0d5..3607d107f49 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -655,7 +655,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev) for (dm = dev->mc_list; dm; dm = dm->next) qeth_l2_add_mc(card, dm->da_addr, 0); - list_for_each_entry(ha, &dev->uc_list, list) + list_for_each_entry(ha, &dev->uc.list, list) qeth_l2_add_mc(card, ha->addr, 1); spin_unlock_bh(&card->mclock); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ea8d6dfe54..d4a4d986779 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -224,6 +224,11 @@ struct netdev_hw_addr { struct rcu_head rcu_head; }; +struct netdev_hw_addr_list { + struct list_head list; + int count; +}; + struct hh_cache { struct hh_cache *hh_next; /* Next entry */ @@ -776,9 +781,8 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ - struct list_head uc_list; /* Secondary unicast mac - addresses */ - int uc_count; /* Number of installed ucasts */ + struct netdev_hw_addr_list uc; /* Secondary unicast + mac addresses */ int uc_promisc; spinlock_t addr_list_lock; struct dev_addr_list *mc_list; /* Multicast mac addresses */ @@ -810,7 +814,8 @@ struct net_device because most packets are unicast) */ - struct list_head dev_addr_list; /* list of device hw addresses */ + struct netdev_hw_addr_list dev_addrs; /* list of device + hw addresses */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -1806,11 +1811,11 @@ static inline void netif_addr_unlock_bh(struct net_device *dev) } /* - * dev_addr_list walker. Should be used only for read access. Call with + * dev_addrs walker. Should be used only for read access. Call with * rcu_read_lock held. */ #define for_each_dev_addr(dev, ha) \ - list_for_each_entry_rcu(ha, &dev->dev_addr_list, list) + list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list) /* These functions live elsewhere (drivers/net/net_init.c, but related) */ diff --git a/net/core/dev.c b/net/core/dev.c index 576a61574a9..baf2dc13a34 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3461,10 +3461,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc_count > 0 && !dev->uc_promisc) { + if (dev->uc.count > 0 && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc_count == 0 && dev->uc_promisc) { + } else if (dev->uc.count == 0 && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } @@ -3483,9 +3483,8 @@ void dev_set_rx_mode(struct net_device *dev) /* hw addresses list handling functions */ -static int __hw_addr_add(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; int alloc_size; @@ -3493,7 +3492,7 @@ static int __hw_addr_add(struct list_head *list, int *delta, if (addr_len > MAX_ADDR_LEN) return -EINVAL; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && ha->type == addr_type) { ha->refcount++; @@ -3512,9 +3511,8 @@ static int __hw_addr_add(struct list_head *list, int *delta, ha->type = addr_type; ha->refcount = 1; ha->synced = false; - list_add_tail_rcu(&ha->list, list); - if (delta) - (*delta)++; + list_add_tail_rcu(&ha->list, &list->list); + list->count++; return 0; } @@ -3526,120 +3524,121 @@ static void ha_rcu_free(struct rcu_head *head) kfree(ha); } -static int __hw_addr_del(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && (ha->type == addr_type || !addr_type)) { if (--ha->refcount) return 0; list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); - if (delta) - (*delta)--; + list->count--; return 0; } } return -ENOENT; } -static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { int err; struct netdev_hw_addr *ha, *ha2; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, to_delta, ha->addr, - addr_len, type); + err = __hw_addr_add(to_list, ha->addr, addr_len, type); if (err) goto unroll; } return 0; unroll: - list_for_each_entry(ha2, from_list, list) { + list_for_each_entry(ha2, &from_list->list, list) { if (ha2 == ha) break; type = addr_type ? addr_type : ha2->type; - __hw_addr_del(to_list, to_delta, ha2->addr, - addr_len, type); + __hw_addr_del(to_list, ha2->addr, addr_len, type); } return err; } -static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, addr_type); } } -static int __hw_addr_sync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->synced) { - err = __hw_addr_add(to_list, to_delta, ha->addr, + err = __hw_addr_add(to_list, ha->addr, addr_len, ha->type); if (err) break; ha->synced = true; ha->refcount++; } else if (ha->refcount == 1) { - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, ha->type); - __hw_addr_del(from_list, from_delta, ha->addr, - addr_len, ha->type); + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } return err; } -static void __hw_addr_unsync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->synced) { - __hw_addr_del(to_list, to_delta, ha->addr, + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); ha->synced = false; - __hw_addr_del(from_list, from_delta, ha->addr, + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } } - -static void __hw_addr_flush(struct list_head *list) +static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, list, list) { + list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); } + list->count = 0; +} + +static void __hw_addr_init(struct netdev_hw_addr_list *list) +{ + INIT_LIST_HEAD(&list->list); + list->count = 0; } /* Device addresses handling functions */ @@ -3648,7 +3647,7 @@ static void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->dev_addr_list); + __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } @@ -3660,16 +3659,16 @@ static int dev_addr_init(struct net_device *dev) /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->dev_addr_list); + __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr), + err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ - ha = list_first_entry(&dev->dev_addr_list, + ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } @@ -3694,8 +3693,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr, ASSERT_RTNL(); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len, - addr_type); + err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3725,11 +3723,12 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr, * We can not remove the first address from the list because * dev->dev_addr points to that. */ - ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list); + ha = list_first_entry(&dev->dev_addrs.list, + struct netdev_hw_addr, list); if (ha->addr == dev->dev_addr && ha->refcount == 1) return -ENOENT; - err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len, + err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); @@ -3757,8 +3756,7 @@ int dev_addr_add_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); @@ -3784,15 +3782,14 @@ int dev_addr_del_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); return 0; } EXPORT_SYMBOL(dev_addr_del_multiple); -/* unicast and multicast addresses handling functions */ +/* multicast addresses handling functions */ int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int glbl) @@ -3868,8 +3865,8 @@ int dev_unicast_delete(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + err = __hw_addr_del(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); return err; @@ -3892,8 +3889,8 @@ int dev_unicast_add(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + err = __hw_addr_add(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); return err; @@ -3966,8 +3963,7 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - err = __hw_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); return err; @@ -3990,8 +3986,7 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; - __hw_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -4000,15 +3995,14 @@ static void dev_unicast_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->uc_list); - dev->uc_count = 0; + __hw_addr_flush(&dev->uc); } static void dev_unicast_init(struct net_device *dev) { /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->uc_list); + __hw_addr_init(&dev->uc); } -- cgit v1.2.3-70-g09d2 From 31e6d363abcd0d05766c82f1a9c905a4c974a199 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2009 19:05:41 -0700 Subject: net: correct off-by-one write allocations reports commit 2b85a34e911bf483c27cfdd124aeb1605145dc80 (net: No more expensive sock_hold()/sock_put() on each tx) changed initial sk_wmem_alloc value. We need to take into account this offset when reporting sk_wmem_alloc to user, in PROC_FS files or various ioctls (SIOCOUTQ/TIOCOUTQ) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/appletalk/atalk_proc.c | 4 ++-- net/appletalk/ddp.c | 3 +-- net/ax25/af_ax25.c | 11 ++++++----- net/bluetooth/af_bluetooth.c | 2 +- net/decnet/af_decnet.c | 2 +- net/ieee802154/dgram.c | 3 ++- net/ipv4/inet_diag.c | 4 ++-- net/ipv4/raw.c | 7 ++++--- net/ipv4/udp.c | 7 ++++--- net/ipv6/raw.c | 7 ++++--- net/ipv6/udp.c | 4 ++-- net/ipx/af_ipx.c | 2 +- net/ipx/ipx_proc.c | 4 ++-- net/irda/af_irda.c | 3 ++- net/key/af_key.c | 4 ++-- net/llc/llc_proc.c | 4 ++-- net/netlink/af_netlink.c | 4 ++-- net/netrom/af_netrom.c | 6 +++--- net/packet/af_packet.c | 3 ++- net/rose/af_rose.c | 7 ++++--- net/sched/em_meta.c | 4 ++-- net/sctp/socket.c | 4 ++-- net/unix/af_unix.c | 2 +- net/x25/af_x25.c | 4 ++-- net/x25/x25_proc.c | 4 ++-- 25 files changed, 58 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index fd8e0847b25..80caad1a31a 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -204,8 +204,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) "%02X %d\n", s->sk_type, ntohs(at->src_net), at->src_node, at->src_port, ntohs(at->dest_net), at->dest_node, at->dest_port, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); out: return 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index f7a53b219ef..590b8396362 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1748,8 +1748,7 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { /* Protocol layer */ case TIOCOUTQ: { - long amount = sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc); + long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 61b35b95549..da0f64f82b5 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1690,7 +1690,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; res = put_user(amount, (int __user *)argp); @@ -1780,8 +1781,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ); ax25_info.n2count = ax25->n2count; ax25_info.state = ax25->state; - ax25_info.rcv_q = atomic_read(&sk->sk_rmem_alloc); - ax25_info.snd_q = atomic_read(&sk->sk_wmem_alloc); + ax25_info.rcv_q = sk_wmem_alloc_get(sk); + ax25_info.snd_q = sk_rmem_alloc_get(sk); ax25_info.vs = ax25->vs; ax25_info.vr = ax25->vr; ax25_info.va = ax25->va; @@ -1921,8 +1922,8 @@ static int ax25_info_show(struct seq_file *seq, void *v) if (ax25->sk != NULL) { seq_printf(seq, " %d %d %lu\n", - atomic_read(&ax25->sk->sk_wmem_alloc), - atomic_read(&ax25->sk->sk_rmem_alloc), + sk_wmem_alloc_get(ax25->sk), + sk_rmem_alloc_get(ax25->sk), sock_i_ino(ax25->sk)); } else { seq_puts(seq, " * * *\n"); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 02b9baa1930..0250e060015 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -337,7 +337,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *) arg); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a5e3a593e47..d351b8db0df 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1240,7 +1240,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return val; case TIOCOUTQ: - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *)arg); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1779677aed4..14d39840dd6 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -126,7 +126,8 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b0b273503e2..a706a47f4db 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -156,10 +156,10 @@ static int inet_csk_diag_fill(struct sock *sk, r->idiag_inode = sock_i_ino(sk); if (minfo) { - minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); + minfo->idiag_rmem = sk_rmem_alloc_get(sk); minfo->idiag_wmem = sk->sk_wmem_queued; minfo->idiag_fmem = sk->sk_forward_alloc; - minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); + minfo->idiag_tmem = sk_wmem_alloc_get(sk); } handler->idiag_get_info(sk, r, info); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3dc9171a272..2979f14bb18 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -799,7 +799,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: { @@ -935,8 +936,8 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8f4158d7c9a..80e3812837a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -840,7 +840,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } @@ -1721,8 +1722,8 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 36a090d87a3..8b0b6f94806 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1130,7 +1130,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) switch(cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: @@ -1236,8 +1237,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fc333d85472..023beda6b22 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1061,8 +1061,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 1627050e29f..417b0e30949 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1835,7 +1835,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; rc = put_user(amount, (int __user *)argp); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 5ed97ad0e2e..576178482f8 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -280,8 +280,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) } seq_printf(seq, "%08X %08X %02X %03d\n", - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); out: return 0; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 3eb5bcc75f9..5922febe25c 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1762,7 +1762,8 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; if (put_user(amount, (unsigned int __user *)arg)) diff --git a/net/key/af_key.c b/net/key/af_key.c index 643c1be2d02..dba9abd27f9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3662,8 +3662,8 @@ static int pfkey_seq_show(struct seq_file *f, void *v) seq_printf(f ,"%p %-6d %-6u %-6u %-6u %-6lu\n", s, atomic_read(&s->sk_refcnt), - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), sock_i_uid(s), sock_i_ino(s) ); diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index d208b3396d9..f97be471fe2 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -134,8 +134,8 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, - atomic_read(&sk->sk_wmem_alloc), - atomic_read(&sk->sk_rmem_alloc) - llc->copied_seq, + sk_wmem_alloc_get(sk), + sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, llc->link); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8b6bbb3032b..2936fa3b6dc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1914,8 +1914,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) s->sk_protocol, nlk->pid, nlk->groups ? (u32)nlk->groups[0] : 0, - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), nlk->cb, atomic_read(&s->sk_refcnt), atomic_read(&s->sk_drops) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index cd911904cbe..ce51ce012cd 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1205,7 +1205,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) long amount; lock_sock(sk); - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; release_sock(sk); @@ -1341,8 +1341,8 @@ static int nr_info_show(struct seq_file *seq, void *v) nr->n2count, nr->n2, nr->window, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); bh_unlock_sock(s); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4f76e5552d8..ebe5718baa3 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1987,7 +1987,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4dd9a7d1894..6bd8e93869e 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1309,7 +1309,8 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; return put_user(amount, (unsigned int __user *) argp); @@ -1480,8 +1481,8 @@ static int rose_info_show(struct seq_file *seq, void *v) rose->hb / HZ, ax25_display_timer(&rose->idletimer) / (60 * HZ), rose->idle / (60 * HZ), - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 266151ae85a..18d85d25910 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -349,13 +349,13 @@ META_COLLECTOR(int_sk_type) META_COLLECTOR(int_sk_rmem_alloc) { SKIP_NONLOCAL(skb); - dst->value = atomic_read(&skb->sk->sk_rmem_alloc); + dst->value = sk_rmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_wmem_alloc) { SKIP_NONLOCAL(skb); - dst->value = atomic_read(&skb->sk->sk_wmem_alloc); + dst->value = sk_wmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_omem_alloc) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f01e5d8a24..35ba035970a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -130,7 +130,7 @@ static inline int sctp_wspace(struct sctp_association *asoc) if (asoc->ep->sndbuf_policy) amt = asoc->sndbuf_used; else - amt = atomic_read(&asoc->base.sk->sk_wmem_alloc); + amt = sk_wmem_alloc_get(asoc->base.sk); if (amt >= asoc->base.sk->sk_sndbuf) { if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) @@ -6523,7 +6523,7 @@ static int sctp_writeable(struct sock *sk) { int amt = 0; - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amt < 0) amt = 0; return amt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9dcc6e7f96e..36d4e44d623 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1946,7 +1946,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: - amount = atomic_read(&sk->sk_wmem_alloc); + amount = sk_wmem_alloc_get(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8cd2390b0d4..21cdc872004 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1271,8 +1271,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { - int amount = sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc); + int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + if (amount < 0) amount = 0; rc = put_user(amount, (unsigned int __user *)argp); diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 1afa44d25be..0a04e62e0e1 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -163,8 +163,8 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v) devname, x25->lci & 0x0FFF, x25->state, x25->vs, x25->vr, x25->va, x25_display_timer(s) / HZ, x25->t2 / HZ, x25->t21 / HZ, x25->t22 / HZ, x25->t23 / HZ, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); out: return 0; -- cgit v1.2.3-70-g09d2 From 81e2a3d5b75cbf0b42428b9d5a7cc7c85be9e7a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2009 19:06:12 -0700 Subject: atm: sk_wmem_alloc initial value is one commit 2b85a34e911bf483c27cfdd124aeb1605145dc80 (net: No more expensive sock_hold()/sock_put() on each tx) changed initial sk_wmem_alloc value. This broke net/atm since this protocol assumed a null initial value. This patch makes necessary changes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/atm/common.c | 12 ++++++------ net/atm/ioctl.c | 3 +-- net/atm/proc.c | 4 ++-- net/atm/raw.c | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index d34edbe754c..c1c97936192 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -62,15 +62,15 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size) struct sk_buff *skb; struct sock *sk = sk_atm(vcc); - if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) { + if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", - atomic_read(&sk->sk_wmem_alloc), size, + sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); return NULL; } - while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); - pr_debug("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), - skb->truesize); + while (!(skb = alloc_skb(size, GFP_KERNEL))) + schedule(); + pr_debug("AlTx %d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; } @@ -145,7 +145,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - atomic_set(&sk->sk_wmem_alloc, 0); + atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 76ed3c8d26e..4da8892ced5 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -63,8 +63,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg error = -EINVAL; goto done; } - error = put_user(sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc), + error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk), (int __user *) argp) ? -EFAULT : 0; goto done; case SIOCINQ: diff --git a/net/atm/proc.c b/net/atm/proc.c index e7b3b273907..38de5ff61ec 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -204,8 +204,8 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc) seq_printf(seq, "%3d", sk->sk_family); } seq_printf(seq, " %04lx %5d %7d/%7d %7d/%7d [%d]\n", vcc->flags, sk->sk_err, - atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf, - atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf, + sk_wmem_alloc_get(sk), sk->sk_sndbuf, + sk_rmem_alloc_get(sk), sk->sk_rcvbuf, atomic_read(&sk->sk_refcnt)); } diff --git a/net/atm/raw.c b/net/atm/raw.c index b0a2d8cb674..cbfcc71a17b 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -33,7 +33,7 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb) struct sock *sk = sk_atm(vcc); pr_debug("APopR (%d) %d -= %d\n", vcc->vci, - atomic_read(&sk->sk_wmem_alloc), skb->truesize); + sk_wmem_alloc_get(sk), skb->truesize); atomic_sub(skb->truesize, &sk->sk_wmem_alloc); dev_kfree_skb_any(skb); sk->sk_write_space(sk); -- cgit v1.2.3-70-g09d2 From 47fcb03fefee2501e79176932a4184fc24d6f8ec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: SUNRPC: Fix the TCP server's send buffer accounting Currently, the sunrpc server is refusing to allow us to process new RPC calls if the TCP send buffer is 2/3 full, even if we do actually have enough free space to guarantee that we can send another request. The following patch fixes svc_tcp_has_wspace() so that we only stop processing requests if we know that the socket buffer cannot possibly fit another reply. It also fixes the tcp write_space() callback so that we only clear the SOCK_NOSPACE flag when the TCP send buffer is less than 2/3 full. This should ensure that the send window will grow as per the standard TCP socket code. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 004a2f9dc43..b09c80c56ee 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -380,6 +380,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; + sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif } @@ -421,6 +422,15 @@ static void svc_write_space(struct sock *sk) } } +static void svc_tcp_write_space(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) + clear_bit(SOCK_NOSPACE, &sock->flags); + svc_write_space(sk); +} + /* * Copy the UDP datagram's destination address to the rqstp structure. * The 'destination' address in this case is the address to which the @@ -1015,25 +1025,16 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) static int svc_tcp_has_wspace(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; + struct svc_serv *serv = svsk->sk_xprt.xpt_server; int required; - int wspace; - /* - * Set the SOCK_NOSPACE flag before checking the available - * sock space. - */ + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required) + return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; - wspace = sk_stream_wspace(svsk->sk_sk); - - if (wspace < sk_stream_min_wspace(svsk->sk_sk)) - return 0; - if (required * 2 > wspace) - return 0; - - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 1; + return 0; } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, @@ -1089,7 +1090,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_tcp_data_ready; - sk->sk_write_space = svc_write_space; + sk->sk_write_space = svc_tcp_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; -- cgit v1.2.3-70-g09d2 From bb664f49f8be17d7b8bf9821144e8a53d7fcfe8a Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 17 Jun 2009 21:54:47 +0000 Subject: af_iucv: Change if condition in sendmsg() for more readability Change the if condition to exit sendmsg() if the socket in not connected. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 163 ++++++++++++++++++++++++++--------------------------- 1 file changed, 81 insertions(+), 82 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a9b3a6f9ea9..42b7198a688 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -747,108 +747,107 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - if (sk->sk_state == IUCV_CONNECTED) { - /* initialize defaults */ - cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; - - /* iterate over control messages */ - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - - if (!CMSG_OK(msg, cmsg)) { - err = -EINVAL; - goto out; - } + /* Return if the socket is not in connected state */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ENOTCONN; + goto out; + } - if (cmsg->cmsg_level != SOL_IUCV) - continue; + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; - if (cmsg->cmsg_type & cmsg_done) { - err = -EINVAL; - goto out; - } - cmsg_done |= cmsg->cmsg_type; + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { - switch (cmsg->cmsg_type) { - case SCM_IUCV_TRGCLS: - if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { - err = -EINVAL; - goto out; - } + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; + goto out; + } - /* set iucv message target class */ - memcpy(&txmsg.class, - (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + if (cmsg->cmsg_level != SOL_IUCV) + continue; - break; + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; + } + cmsg_done |= cmsg->cmsg_type; - default: + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { err = -EINVAL; goto out; - break; } - } - /* allocate one skb for each iucv message: - * this is fine for SOCK_SEQPACKET (unless we want to support - * segmented records using the MSG_EOR flag), but - * for SOCK_STREAM we might want to improve it in future */ - if (!(skb = sock_alloc_send_skb(sk, len, - msg->msg_flags & MSG_DONTWAIT, - &err))) - goto out; + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { - err = -EFAULT; - goto fail; + break; + + default: + err = -EINVAL; + goto out; + break; } + } - /* increment and save iucv message tag for msg_completion cbk */ - txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); - skb_queue_tail(&iucv->send_skb_q, skb); + /* allocate one skb for each iucv message: + * this is fine for SOCK_SEQPACKET (unless we want to support + * segmented records using the MSG_EOR flag), but + * for SOCK_STREAM we might want to improve it in future */ + skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, + &err); + if (!skb) + goto out; + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { + err = -EFAULT; + goto fail; + } - if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) - && skb->len <= 7) { - err = iucv_send_iprm(iucv->path, &txmsg, skb); + /* increment and save iucv message tag for msg_completion cbk */ + txmsg.tag = iucv->send_tag++; + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + skb_queue_tail(&iucv->send_skb_q, skb); - /* on success: there is no message_complete callback - * for an IPRMDATA msg; remove skb from send queue */ - if (err == 0) { - skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); - } + if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) + && skb->len <= 7) { + err = iucv_send_iprm(iucv->path, &txmsg, skb); - /* this error should never happen since the - * IUCV_IPRMDATA path flag is set... sever path */ - if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); - skb_unlink(skb, &iucv->send_skb_q); - err = -EPIPE; - goto fail; - } - } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, - (void *) skb->data, skb->len); - if (err) { - if (err == 3) { - user_id[8] = 0; - memcpy(user_id, iucv->dst_user_id, 8); - appl_id[8] = 0; - memcpy(appl_id, iucv->dst_name, 8); - pr_err("Application %s on z/VM guest %s" - " exceeds message limit\n", - user_id, appl_id); - } + /* on success: there is no message_complete callback + * for an IPRMDATA msg; remove skb from send queue */ + if (err == 0) { + skb_unlink(skb, &iucv->send_skb_q); + kfree_skb(skb); + } + + /* this error should never happen since the + * IUCV_IPRMDATA path flag is set... sever path */ + if (err == 0x15) { + iucv_path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } - - } else { - err = -ENOTCONN; - goto out; + } else + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) { + user_id[8] = 0; + memcpy(user_id, iucv->dst_user_id, 8); + appl_id[8] = 0; + memcpy(appl_id, iucv->dst_name, 8); + pr_err("Application %s on z/VM guest %s" + " exceeds message limit\n", + appl_id, user_id); + } + skb_unlink(skb, &iucv->send_skb_q); + err = -EPIPE; + goto fail; } release_sock(sk); -- cgit v1.2.3-70-g09d2 From 0ea920d211e0a870871965418923b08da2025b4a Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 17 Jun 2009 21:54:48 +0000 Subject: af_iucv: Return -EAGAIN if iucv msg limit is exceeded If the iucv message limit for a communication path is exceeded, sendmsg() returns -EAGAIN instead of -EPIPE. The calling application can then handle this error situtation, e.g. to try again after waiting some time. For blocking sockets, sendmsg() waits up to the socket timeout before returning -EAGAIN. For the new wait condition, a macro has been introduced and the iucv_sock_wait_state() has been refactored to this macro. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/net/iucv/af_iucv.h | 2 - net/iucv/af_iucv.c | 144 ++++++++++++++++++++++++++++++++------------- 2 files changed, 103 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 21ee49ffcba..f82a1e87737 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -94,8 +94,6 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo); int iucv_sock_wait_cnt(struct sock *sk, unsigned long timeo); void iucv_accept_enqueue(struct sock *parent, struct sock *sk); void iucv_accept_unlink(struct sock *sk); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 42b7198a688..abadb4a846c 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -53,6 +53,38 @@ static const u8 iprm_shutdown[8] = #define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ #define CB_TRGCLS_LEN (TRGCLS_SIZE) +#define __iucv_sock_wait(sk, condition, timeo, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + long __timeo = timeo; \ + ret = 0; \ + while (!(condition)) { \ + prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + if (!__timeo) { \ + ret = -EAGAIN; \ + break; \ + } \ + if (signal_pending(current)) { \ + ret = sock_intr_errno(__timeo); \ + break; \ + } \ + release_sock(sk); \ + __timeo = schedule_timeout(__timeo); \ + lock_sock(sk); \ + ret = sock_error(sk); \ + if (ret) \ + break; \ + } \ + finish_wait(sk->sk_sleep, &__wait); \ +} while (0) + +#define iucv_sock_wait(sk, condition, timeo) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __iucv_sock_wait(sk, condition, timeo, __ret); \ + __ret; \ +}) static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -121,6 +153,48 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) return msg->length; } +/** + * iucv_sock_in_state() - check for specific states + * @sk: sock structure + * @state: first iucv sk state + * @state: second iucv sk state + * + * Returns true if the socket in either in the first or second state. + */ +static int iucv_sock_in_state(struct sock *sk, int state, int state2) +{ + return (sk->sk_state == state || sk->sk_state == state2); +} + +/** + * iucv_below_msglim() - function to check if messages can be sent + * @sk: sock structure + * + * Returns true if the send queue length is lower than the message limit. + * Always returns true if the socket is not connected (no iucv path for + * checking the message limit). + */ +static inline int iucv_below_msglim(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (sk->sk_state != IUCV_CONNECTED) + return 1; + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); +} + +/** + * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit + */ +static void iucv_sock_wake_msglim(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible_all(sk->sk_sleep); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + read_unlock(&sk->sk_callback_lock); +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -212,7 +286,9 @@ static void iucv_sock_close(struct sock *sk) timeo = sk->sk_lingertime; else timeo = IUCV_DISCONN_TIMEOUT; - err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); + err = iucv_sock_wait(sk, + iucv_sock_in_state(sk, IUCV_CLOSED, 0), + timeo); } case IUCV_CLOSING: /* fall through */ @@ -393,39 +469,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo) -{ - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - add_wait_queue(sk->sk_sleep, &wait); - while (sk->sk_state != state && sk->sk_state != state2) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) { - err = -EAGAIN; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return err; -} - /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -570,8 +613,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, } if (sk->sk_state != IUCV_CONNECTED) { - err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, - sock_sndtimeo(sk, flags & O_NONBLOCK)); + err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, + IUCV_DISCONN), + sock_sndtimeo(sk, flags & O_NONBLOCK)); } if (sk->sk_state == IUCV_DISCONN) { @@ -725,9 +769,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_message txmsg; struct cmsghdr *cmsg; int cmsg_done; + long timeo; char user_id[9]; char appl_id[9]; int err; + int noblock = msg->msg_flags & MSG_DONTWAIT; err = sock_error(sk); if (err) @@ -799,8 +845,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, - &err); + skb = sock_alloc_send_skb(sk, len, noblock, &err); if (!skb) goto out; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -808,6 +853,18 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } + /* wait if outstanding messages for iucv path has reached */ + timeo = sock_sndtimeo(sk, noblock); + err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo); + if (err) + goto fail; + + /* return -ECONNRESET if the socket is no longer connected */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ECONNRESET; + goto fail; + } + /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); @@ -844,9 +901,10 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, pr_err("Application %s on z/VM guest %s" " exceeds message limit\n", appl_id, user_id); - } + err = -EAGAIN; + } else + err = -EPIPE; skb_unlink(skb, &iucv->send_skb_q); - err = -EPIPE; goto fail; } @@ -1463,7 +1521,11 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); - kfree_skb(this); + if (this) { + kfree_skb(this); + /* wake up any process waiting for sending */ + iucv_sock_wake_msglim(sk); + } } BUG_ON(!this); -- cgit v1.2.3-70-g09d2 From 25502bda07b4cd4f1d9942cca2df446c4a0f167c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 18 Jun 2009 04:16:46 +0000 Subject: ieee802154: use standard routine for printing dumps Use print_hex_dump_bytes instead of self-written dumping function for outputting packet dumps. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- net/ieee802154/af_ieee802154.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 882a927cefa..3bb6bdb1dac 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -39,14 +39,6 @@ #include "af802154.h" -#define DBG_DUMP(data, len) { \ - int i; \ - pr_debug("function: %s: data: len %d:\n", __func__, len); \ - for (i = 0; i < len; i++) {\ - pr_debug("%02x: %02x\n", i, (data)[i]); \ - } \ -} - /* * Utility function for families */ @@ -302,10 +294,12 @@ static struct net_proto_family ieee802154_family_ops = { static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - DBG_DUMP(skb->data, skb->len); if (!netif_running(dev)) return -ENODEV; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); +#endif if (!net_eq(dev_net(dev), &init_net)) goto drop; -- cgit v1.2.3-70-g09d2 From 7fa20a7f60df0afceafbb8197b5d110507f42c72 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 16 Jun 2009 14:53:24 +0100 Subject: rfkill: rfkill_set_block() when suspended nitpick If we return after fiddling with the state, userspace will see the wrong state and rfkill_set_sw_state() won't work until the next call to rfkill_set_block(). At the moment rfkill_set_block() will always be called from rfkill_resume(), but this will change in future. Also, presumably the point of this test is to avoid bothering devices which may be suspended. If we don't want to call set_block(), we probably don't want to call query() either :-). Signed-off-by: Alan Jenkins Signed-off-by: John W. Linville --- net/rfkill/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 4e68ab439d5..868d79f8ac1 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -270,6 +270,9 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) unsigned long flags; int err; + if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) + return; + /* * Some platforms (...!) generate input events which affect the * _hard_ kill state -- whenever something tries to change the @@ -292,9 +295,6 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill->state |= RFKILL_BLOCK_SW_SETCALL; spin_unlock_irqrestore(&rfkill->lock, flags); - if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) - return; - err = rfkill->ops->set_block(rfkill->data, blocked); spin_lock_irqsave(&rfkill->lock, flags); -- cgit v1.2.3-70-g09d2 From 06d5caf47ef4fbd9efdceae33293c42778cb7b0c Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 16 Jun 2009 15:39:51 +0100 Subject: rfkill: don't restore software blocked state on persistent devices The setting of the "persistent" flag is also made more explicit using a new rfkill_init_sw_state() function, instead of special-casing rfkill_set_sw_state() when it is called before registration. Suspend is a bit of a corner case so we try to get away without adding another hack to rfkill-input - it's going to be removed soon. If the state does change over suspend, users will simply have to prod rfkill-input twice in order to toggle the state. Userspace policy agents will be able to implement a more consistent user experience. For example, they can avoid the above problem if they toggle devices individually. Then there would be no "global state" to get out of sync. Currently there are only two rfkill drivers with persistent soft-blocked state. thinkpad-acpi already checks the software state on resume. eeepc-laptop will require modification. Signed-off-by: Alan Jenkins CC: Marcel Holtmann Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- drivers/platform/x86/eeepc-laptop.c | 8 ++++---- drivers/platform/x86/thinkpad_acpi.c | 14 ++++++------- include/linux/rfkill.h | 32 ++++++++++++++++++++++++----- net/rfkill/core.c | 40 ++++++++++++++++++++++-------------- 4 files changed, 63 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 03bf522bd7a..01682eca436 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -675,8 +675,8 @@ static int eeepc_hotk_add(struct acpi_device *device) if (!ehotk->eeepc_wlan_rfkill) goto wlan_fail; - rfkill_set_sw_state(ehotk->eeepc_wlan_rfkill, - get_acpi(CM_ASL_WLAN) != 1); + rfkill_init_sw_state(ehotk->eeepc_wlan_rfkill, + get_acpi(CM_ASL_WLAN) != 1); result = rfkill_register(ehotk->eeepc_wlan_rfkill); if (result) goto wlan_fail; @@ -693,8 +693,8 @@ static int eeepc_hotk_add(struct acpi_device *device) if (!ehotk->eeepc_bluetooth_rfkill) goto bluetooth_fail; - rfkill_set_sw_state(ehotk->eeepc_bluetooth_rfkill, - get_acpi(CM_ASL_BLUETOOTH) != 1); + rfkill_init_sw_state(ehotk->eeepc_bluetooth_rfkill, + get_acpi(CM_ASL_BLUETOOTH) != 1); result = rfkill_register(ehotk->eeepc_bluetooth_rfkill); if (result) goto bluetooth_fail; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 86e958539f4..40d64c03278 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1163,8 +1163,8 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, { struct tpacpi_rfk *atp_rfk; int res; - bool initial_sw_state = false; - int initial_sw_status; + bool sw_state = false; + int sw_status; BUG_ON(id >= TPACPI_RFK_SW_MAX || tpacpi_rfkill_switches[id]); @@ -1185,17 +1185,17 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, atp_rfk->id = id; atp_rfk->ops = tp_rfkops; - initial_sw_status = (tp_rfkops->get_status)(); - if (initial_sw_status < 0) { + sw_status = (tp_rfkops->get_status)(); + if (sw_status < 0) { printk(TPACPI_ERR "failed to read initial state for %s, error %d\n", - name, initial_sw_status); + name, sw_status); } else { - initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF); + sw_state = (sw_status == TPACPI_RFK_RADIO_OFF); if (set_default) { /* try to keep the initial state, since we ask the * firmware to preserve it across S5 in NVRAM */ - rfkill_set_sw_state(atp_rfk->rfkill, initial_sw_state); + rfkill_init_sw_state(atp_rfk->rfkill, sw_state); } } rfkill_set_hw_state(atp_rfk->rfkill, tpacpi_rfk_check_hwblock_state()); diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 16e39c7a67f..dcac724340d 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -160,8 +160,9 @@ struct rfkill * __must_check rfkill_alloc(const char *name, * the rfkill structure. Before calling this function the driver needs * to be ready to service method calls from rfkill. * - * If the software blocked state is not set before registration, - * set_block will be called to initialize it to a default value. + * If rfkill_init_sw_state() is not called before registration, + * set_block() will be called to initialize the software blocked state + * to a default value. * * If the hardware blocked state is not set before registration, * it is assumed to be unblocked. @@ -234,9 +235,11 @@ bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); * rfkill drivers that get events when the soft-blocked state changes * (yes, some platforms directly act on input but allow changing again) * use this function to notify the rfkill core (and through that also - * userspace) of the current state. It is not necessary to notify on - * resume; since hibernation can always change the soft-blocked state, - * the rfkill core will unconditionally restore the previous state. + * userspace) of the current state. + * + * Drivers should also call this function after resume if the state has + * been changed by the user. This only makes sense for "persistent" + * devices (see rfkill_init_sw_state()). * * This function can be called in any context, even from within rfkill * callbacks. @@ -246,6 +249,21 @@ bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked); */ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked); +/** + * rfkill_init_sw_state - Initialize persistent software block state + * @rfkill: pointer to the rfkill class to modify. + * @state: the current software block state to set + * + * rfkill drivers that preserve their software block state over power off + * use this function to notify the rfkill core (and through that also + * userspace) of their initial state. It should only be used before + * registration. + * + * In addition, it marks the device as "persistent". Persistent devices + * are expected to preserve preserve their own state when suspended. + */ +void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked); + /** * rfkill_set_states - Set the internal rfkill block states * @rfkill: pointer to the rfkill class to modify. @@ -307,6 +325,10 @@ static inline bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) return blocked; } +static inline void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked) +{ +} + static inline void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 868d79f8ac1..dcf8df7c573 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -56,7 +56,6 @@ struct rfkill { u32 idx; bool registered; - bool suspended; bool persistent; const struct rfkill_ops *ops; @@ -224,7 +223,7 @@ static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) static void rfkill_event(struct rfkill *rfkill) { - if (!rfkill->registered || rfkill->suspended) + if (!rfkill->registered) return; kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); @@ -508,19 +507,32 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) blocked = blocked || hwblock; spin_unlock_irqrestore(&rfkill->lock, flags); - if (!rfkill->registered) { - rfkill->persistent = true; - } else { - if (prev != blocked && !hwblock) - schedule_work(&rfkill->uevent_work); + if (!rfkill->registered) + return blocked; - rfkill_led_trigger_event(rfkill); - } + if (prev != blocked && !hwblock) + schedule_work(&rfkill->uevent_work); + + rfkill_led_trigger_event(rfkill); return blocked; } EXPORT_SYMBOL(rfkill_set_sw_state); +void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked) +{ + unsigned long flags; + + BUG_ON(!rfkill); + BUG_ON(rfkill->registered); + + spin_lock_irqsave(&rfkill->lock, flags); + __rfkill_set_sw_state(rfkill, blocked); + rfkill->persistent = true; + spin_unlock_irqrestore(&rfkill->lock, flags); +} +EXPORT_SYMBOL(rfkill_init_sw_state); + void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { unsigned long flags; @@ -718,8 +730,6 @@ static int rfkill_suspend(struct device *dev, pm_message_t state) rfkill_pause_polling(rfkill); - rfkill->suspended = true; - return 0; } @@ -728,10 +738,10 @@ static int rfkill_resume(struct device *dev) struct rfkill *rfkill = to_rfkill(dev); bool cur; - cur = !!(rfkill->state & RFKILL_BLOCK_SW); - rfkill_set_block(rfkill, cur); - - rfkill->suspended = false; + if (!rfkill->persistent) { + cur = !!(rfkill->state & RFKILL_BLOCK_SW); + rfkill_set_block(rfkill, cur); + } rfkill_resume_polling(rfkill); -- cgit v1.2.3-70-g09d2 From 464902e812025792c9e33e19e1555c343672d5cf Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 16 Jun 2009 14:54:04 +0100 Subject: rfkill: export persistent attribute in sysfs This information allows userspace to implement a hybrid policy where it can store the rfkill soft-blocked state in platform non-volatile storage if available, and if not then file-based storage can be used. Some users prefer platform non-volatile storage because of the behaviour when dual-booting multiple versions of Linux, or if the rfkill setting is changed in the BIOS setting screens, or if the BIOS responds to wireless-toggle hotkeys itself before the relevant platform driver has been loaded. Signed-off-by: Alan Jenkins Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- Documentation/rfkill.txt | 2 ++ include/linux/rfkill.h | 5 +++-- net/rfkill/core.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index c8acd8659e9..b4860509c31 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt @@ -111,6 +111,8 @@ following attributes: name: Name assigned by driver to this key (interface or driver name). type: Driver type string ("wlan", "bluetooth", etc). + persistent: Whether the soft blocked state is initialised from + non-volatile storage at startup. state: Current state of the transmitter 0: RFKILL_STATE_SOFT_BLOCKED transmitter is turned off by software diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index dcac724340d..e73e2429a1b 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -259,8 +259,9 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked); * userspace) of their initial state. It should only be used before * registration. * - * In addition, it marks the device as "persistent". Persistent devices - * are expected to preserve preserve their own state when suspended. + * In addition, it marks the device as "persistent", an attribute which + * can be read by userspace. Persistent devices are expected to preserve + * their own state when suspended. */ void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked); diff --git a/net/rfkill/core.c b/net/rfkill/core.c index dcf8df7c573..79693fe2001 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -610,6 +610,15 @@ static ssize_t rfkill_idx_show(struct device *dev, return sprintf(buf, "%d\n", rfkill->idx); } +static ssize_t rfkill_persistent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%d\n", rfkill->persistent); +} + static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) @@ -668,6 +677,7 @@ static struct device_attribute rfkill_dev_attrs[] = { __ATTR(name, S_IRUGO, rfkill_name_show, NULL), __ATTR(type, S_IRUGO, rfkill_type_show, NULL), __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), + __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL), __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), __ATTR_NULL -- cgit v1.2.3-70-g09d2 From 155cc9e4b1d60161ee53ffaf2c15b9411f086fa7 Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Tue, 16 Jun 2009 11:31:04 -0700 Subject: cfg80211: allow adding/deleting stations on mesh Commit b2a151a288 added a check that prevents adding or deleting stations on non-AP interfaces. Adding and deleting stations is supported for Mesh Point interfaces, so add Mesh Point to that check as well. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 24168560eba..2c55d25ed34 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1763,7 +1763,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } @@ -1812,7 +1813,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } -- cgit v1.2.3-70-g09d2 From 9a5e8bbc8fece7851a2a69a8676a6fd0507bc550 Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Tue, 16 Jun 2009 16:09:37 -0700 Subject: cfg80211: allow setting station parameters in mesh Mesh Point interfaces can also set parameters, for example plink_open is used to manually establish peer links from user-space (currently via iw). Add Mesh Point to the check in nl80211_set_station. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2c55d25ed34..304b3d568e0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1688,7 +1688,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } -- cgit v1.2.3-70-g09d2 From a97f4424fb4cddecf9b13c9b0e3f79924b624a7f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Jun 2009 17:23:43 +0200 Subject: cfg80211: validate station settings When I disallowed interfering with stations on non-AP interfaces, I not only forget mesh but also managed interfaces which need this for the authorized flag. Let's actually validate everything properly. This fixes an nl80211 regression introduced by the interfering, under which wpa_supplicant -Dnl80211 could not properly connect. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 94 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 304b3d568e0..241bddd0b4f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1687,14 +1687,52 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* disallow mesh-specific things */ + if (params.plink_action) + err = -EINVAL; + break; + case NL80211_IFTYPE_STATION: + /* disallow everything but AUTHORIZED flag */ + if (params.plink_action) + err = -EINVAL; + if (params.vlan) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; @@ -1729,9 +1767,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) - return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; @@ -1746,9 +1781,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); - if (!params.aid || params.aid > IEEE80211_MAX_AID) - return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_AID]) { + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (!params.aid || params.aid > IEEE80211_MAX_AID) + return -EINVAL; + } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = @@ -1763,14 +1800,39 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* all ok but must have AID */ + if (!params.aid) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.aid) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; -- cgit v1.2.3-70-g09d2 From 73e42897e8e5619eacb787d2ce69be12f47cfc21 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 20 Jun 2009 01:15:16 -0700 Subject: ipv4: fix NULL pointer + success return in route lookup path Don't drop route if we're not caching I recently got a report of an oops on a route lookup. Maxime was testing what would happen if route caching was turned off (doing so by setting making rt_caching always return 0), and found that it triggered an oops. I looked at it and found that the problem stemmed from the fact that the route lookup routines were returning success from their lookup paths (which is good), but never set the **rp pointer to anything (which is bad). This happens because in rt_intern_hash, if rt_caching returns false, we call rt_drop and return 0. This almost emulates slient success. What we should be doing is assigning *rp = rt and _not_ dropping the route. This way, during slow path lookups, when we create a new route cache entry, we don't immediately discard it, rather we just don't add it into the cache hash table, but we let this one lookup use it for the purpose of this route request. Maxime has tested and reports it prevents the oops. There is still a subsequent routing issue that I'm looking into further, but I'm confident that, even if its related to this same path, this patch makes sense to take. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cd76b3cb709..65b3a8b11a6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,16 @@ restart: now = jiffies; if (!rt_caching(dev_net(rt->u.dst.dev))) { - rt_drop(rt); - return 0; + /* + * If we're not caching, just tell the caller we + * were successful and don't touch the route. The + * caller hold the sole reference to the cache entry, and + * it will be released when the caller is done with it. + * If we drop it here, the callers have no way to resolve routes + * when we're not caching. Instead, just point *rp at rt, so + * the caller gets a single use out of the route + */ + goto report_and_exit; } rthp = &rt_hash_table[hash].chain; @@ -1217,6 +1225,8 @@ restart: rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); + +report_and_exit: if (rp) *rp = rt; else -- cgit v1.2.3-70-g09d2 From e9f029855865e917821ef6034b31e340a4cfc815 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Thu, 18 Jun 2009 22:01:24 -0400 Subject: nfs41: sunrpc: xprt_alloc_bc_request() should not use spin_lock_bh() xprt_alloc_bc_request() is always called in soft interrupt context. Grab the spin_lock instead of the bottom half spin_lock. Softirqs do not preempt other softirqs running on the same processor, so there is no need to disable bottom halves. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 5a7d342e308..553621fb2c4 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -211,6 +211,9 @@ EXPORT_SYMBOL(xprt_destroy_backchannel); * has been preallocated as well. Use xprt_alloc_bc_request to allocate * to this request. Use xprt_free_bc_request to return it. * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * * Return an available rpc_rqst, otherwise NULL if non are available. */ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) @@ -218,7 +221,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) struct rpc_rqst *req; dprintk("RPC: allocate a backchannel request\n"); - spin_lock_bh(&xprt->bc_pa_lock); + spin_lock(&xprt->bc_pa_lock); if (!list_empty(&xprt->bc_pa_list)) { req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -226,7 +229,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) } else { req = NULL; } - spin_unlock_bh(&xprt->bc_pa_lock); + spin_unlock(&xprt->bc_pa_lock); if (req != NULL) { set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); -- cgit v1.2.3-70-g09d2 From 8cc20198cfccd06cef705c14fd50bde603e2e306 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jun 2009 14:13:55 +0200 Subject: netfilter: nf_conntrack: death_by_timeout() fix death_by_timeout() might delete a conntrack from hash list and insert it in dying list. nf_ct_delete_from_lists(ct); nf_ct_insert_dying_list(ct); I believe a (lockless) reader could *catch* ct while doing a lookup and miss the end of its chain. (nulls lookup algo must check the null value at the end of lookup and should restart if the null value is not the expected one. cf Documentation/RCU/rculist_nulls.txt for details) We need to change nf_conntrack_init_net() and use a different "null" value, guaranteed not being used in regular lists. Choose very large values, since hash table uses [0..size-1] null values. Signed-off-by: Eric Dumazet Acked-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5f72b94b491..5276a2dd56f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1267,13 +1267,19 @@ err_cache: return ret; } +/* + * We need to use special "null" values, not used in hash table + */ +#define UNCONFIRMED_NULLS_VAL ((1<<30)+0) +#define DYING_NULLS_VAL ((1<<30)+1) + static int nf_conntrack_init_net(struct net *net) { int ret; atomic_set(&net->ct.count, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0); + INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; -- cgit v1.2.3-70-g09d2 From 5c8ec910e789a92229978d8fd1fce7b62e8ac711 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:14:16 +0200 Subject: netfilter: nf_conntrack: fix confirmation race condition New connection tracking entries are inserted into the hash before they are fully set up, namely the CONFIRMED bit is not set and the timer not started yet. This can theoretically lead to a race with timer, which would set the timeout value to a relative value, most likely already in the past. Perform hash insertion as the final step to fix this. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5276a2dd56f..b0b06c7a948 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -425,7 +425,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Remove from unconfirmed list */ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -433,8 +432,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Since the lookup is lockless, hash insertion must be done after + * starting the timer and setting the CONFIRMED bit. The RCU barriers + * guarantee that no other CPU can find the conntrack before the above + * stores are visible. + */ + __nf_conntrack_hash_insert(ct, hash, repl_hash); NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); -- cgit v1.2.3-70-g09d2 From 8d8890b7751387f58ce0a6428773de2fbc0fd596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:14:41 +0200 Subject: netfilter: nf_conntrack: fix conntrack lookup race The RCU protected conntrack hash lookup only checks whether the entry has a refcount of zero to decide whether it is stale. This is not sufficient, entries are explicitly removed while there is at least one reference left, possibly more. Explicitly check whether the entry has been marked as dying to fix this. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b0b06c7a948..7508f11c5b3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -335,7 +335,8 @@ begin: h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { @@ -510,7 +511,8 @@ static noinline int early_drop(struct net *net, unsigned int hash) cnt++; } - if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (ct && unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) ct = NULL; if (ct || cnt >= NF_CT_EVICTION_RANGE) break; -- cgit v1.2.3-70-g09d2 From f9ffc31251c2caa11962c9b74ce650e2167fa8d1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:15:02 +0200 Subject: netfilter: fix some sparse endianess warnings net/netfilter/xt_NFQUEUE.c:46:9: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:46:9: expected unsigned int [unsigned] [usertype] ipaddr net/netfilter/xt_NFQUEUE.c:46:9: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:68:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:68:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:68:10: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:69:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:69:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:69:10: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:70:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:70:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:70:10: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:71:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:71:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:71:10: got restricted unsigned int net/netfilter/xt_cluster.c:20:55: warning: incorrect type in return expression (different base types) net/netfilter/xt_cluster.c:20:55: expected unsigned int net/netfilter/xt_cluster.c:20:55: got restricted unsigned int const [usertype] ip net/netfilter/xt_cluster.c:20:55: warning: incorrect type in return expression (different base types) net/netfilter/xt_cluster.c:20:55: expected unsigned int net/netfilter/xt_cluster.c:20:55: got restricted unsigned int const [usertype] ip Signed-off-by: Patrick McHardy --- net/netfilter/xt_NFQUEUE.c | 8 ++++---- net/netfilter/xt_cluster.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 498b45101df..f28f6a5fc02 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -40,12 +40,12 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) static u32 hash_v4(const struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); - u32 ipaddr; + __be32 ipaddr; /* packets in either direction go into same queue */ ipaddr = iph->saddr ^ iph->daddr; - return jhash_2words(ipaddr, iph->protocol, jhash_initval); + return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval); } static unsigned int @@ -63,14 +63,14 @@ nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) static u32 hash_v6(const struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 addr[4]; + __be32 addr[4]; addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; - return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); + return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval); } static unsigned int diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 69a639f3540..225ee3ecd69 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -15,14 +15,14 @@ #include #include -static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct) +static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) { - return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; } -static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct) +static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) { - return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; + return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; } static inline u_int32_t -- cgit v1.2.3-70-g09d2 From 249556192859490b6280552d4b877064f9f5ee48 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:15:30 +0200 Subject: netfilter: nf_log: fix direct userspace memory access in proc handler Signed-off-by: Patrick McHardy --- net/netfilter/nf_log.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 2fefe147750..4e620305f28 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -47,7 +47,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { - int i; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); } else { @@ -216,7 +215,7 @@ static const struct file_operations nflog_file_ops = { #endif /* PROC_FS */ #ifdef CONFIG_SYSCTL -struct ctl_path nf_log_sysctl_path[] = { +static struct ctl_path nf_log_sysctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, @@ -228,19 +227,26 @@ static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, - void *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; + char buf[NFLOGGER_NAME_LEN]; + size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; if (write) { - if (!strcmp(buffer, "NONE")) { + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, buffer, size)) + return -EFAULT; + + if (!strcmp(buf, "NONE")) { nf_log_unbind_pf(tindex); return 0; } mutex_lock(&nf_log_mutex); - logger = __find_logger(tindex, buffer); + logger = __find_logger(tindex, buf); if (logger == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; -- cgit v1.2.3-70-g09d2 From 6d62182fea6cc6bbc8d82a691ad0608d68a54aeb Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 22 Jun 2009 14:16:45 +0200 Subject: netfilter: xt_quota: fix incomplete initialization Commit v2.6.29-rc5-872-gacc738f ("xtables: avoid pointer to self") forgot to copy the initial quota value supplied by iptables into the private structure, thus counting from whatever was in the memory kmalloc returned. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_quota.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 01dd07b764e..98fc190e8f0 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -54,6 +54,7 @@ static bool quota_mt_check(const struct xt_mtchk_param *par) if (q->master == NULL) return -ENOMEM; + q->master->quota = q->quota; return true; } -- cgit v1.2.3-70-g09d2 From 4d900f9df5f0569c2dc536701e2c11b6d50ebebf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:17:12 +0200 Subject: netfilter: xt_rateest: fix comparison with self MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As noticed by Török Edwin : Compiling the kernel with clang has shown this warning: net/netfilter/xt_rateest.c:69:16: warning: self-comparison always results in a constant value ret &= pps2 == pps2; ^ Looking at the code: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps2 == pps2; Judging from the MATCH_BPS case it seems to be a typo, with the intention of comparing pps1 with pps2. http://bugzilla.kernel.org/show_bug.cgi?id=13535 Signed-off-by: Patrick McHardy --- net/netfilter/xt_rateest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 220a1d588ee..4fc6a917f6d 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -66,7 +66,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) - ret &= pps2 == pps2; + ret &= pps1 == pps2; break; } -- cgit v1.2.3-70-g09d2 From d5fdd6babcfc2b0e6a8da1acf492a69fb54b4c47 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 23 Jun 2009 04:31:07 -0700 Subject: ipv6: Use correct data types for ICMPv6 type and code Change all the code that deals directly with ICMPv6 type and code values to use u8 instead of a signed int as that's the actual data type. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 6 +++--- include/net/protocol.h | 2 +- include/net/rawv6.h | 2 +- include/net/xfrm.h | 2 +- net/dccp/ipv6.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/icmp.c | 12 ++++++------ net/ipv6/ip6_tunnel.c | 18 +++++++++--------- net/ipv6/ipcomp6.c | 2 +- net/ipv6/mip6.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/route.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/tunnel6.c | 2 +- net/ipv6/udp.c | 6 +++--- net/ipv6/udp_impl.h | 2 +- net/ipv6/udplite.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/sctp/ipv6.c | 2 +- 20 files changed, 38 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 10d701eec48..b6a85183c33 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -175,16 +175,16 @@ struct icmp6_filter { extern void icmpv6_send(struct sk_buff *skb, - int type, int code, + u8 type, u8 code, __u32 info, struct net_device *dev); extern int icmpv6_init(void); -extern int icmpv6_err_convert(int type, int code, +extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); extern void icmpv6_param_prob(struct sk_buff *skb, - int code, int pos); + u8 code, int pos); struct flowi; struct in6_addr; diff --git a/include/net/protocol.h b/include/net/protocol.h index ffa5b8b1f1d..1089d5aabd4 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -53,7 +53,7 @@ struct inet6_protocol void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, + u8 type, u8 code, int offset, __be32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 8a22599f26b..f6b9b830df8 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -6,7 +6,7 @@ #include void raw6_icmp_error(struct sk_buff *, int nexthdr, - int type, int code, int inner_offset, __be32); + u8 type, u8 code, int inner_offset, __be32); int raw6_local_deliver(struct sk_buff *, int); extern int rawv6_rcv(struct sock *sk, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 736bca45088..9e3a3f4c1f6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1274,7 +1274,7 @@ struct xfrm_tunnel { struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info); + u8 type, u8 code, int offset, __be32 info); struct xfrm6_tunnel *next; int priority; }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 05ea7440d9e..3e70faab298 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -85,7 +85,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 52449f7a1b7..86f42a288c4 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -405,7 +405,7 @@ out: } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c2f250150db..678bb95b152 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -354,7 +354,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 36dff880718..eab62a7a8f0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -117,7 +117,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk) /* * Slightly more convenient version of icmpv6_send. */ -void icmpv6_param_prob(struct sk_buff *skb, int code, int pos) +void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); kfree_skb(skb); @@ -161,7 +161,7 @@ static int is_ineligible(struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline int icmpv6_xrlim_allow(struct sock *sk, int type, +static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi *fl) { struct dst_entry *dst; @@ -305,7 +305,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} /* * Send an ICMP message in response to a packet in error */ -void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, +void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct net_device *dev) { struct net *net = dev_net(skb->dev); @@ -590,7 +590,7 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) +static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_protocol *ipprot; int inner_offset; @@ -643,7 +643,7 @@ static int icmpv6_rcv(struct sk_buff *skb) struct in6_addr *saddr, *daddr; struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; - int type; + u8 type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -914,7 +914,7 @@ static const struct icmp6_err { }, }; -int icmpv6_err_convert(int type, int code, int *err) +int icmpv6_err_convert(u8 type, u8 code, int *err) { int fatal = 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 404d16a97d5..51f410e7775 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -394,13 +394,13 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, - int *type, int *code, int *msg, __u32 *info, int offset) + u8 *type, u8 *code, int *msg, __u32 *info, int offset) { struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; struct ip6_tnl *t; int rel_msg = 0; - int rel_type = ICMPV6_DEST_UNREACH; - int rel_code = ICMPV6_ADDR_UNREACH; + u8 rel_type = ICMPV6_DEST_UNREACH; + u8 rel_code = ICMPV6_ADDR_UNREACH; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -488,11 +488,11 @@ out: static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; - int rel_type = type; - int rel_code = code; + u8 rel_type = type; + u8 rel_code = code; __u32 rel_info = ntohl(info); int err; struct sk_buff *skb2; @@ -586,11 +586,11 @@ out: static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; - int rel_type = type; - int rel_code = code; + u8 rel_type = type; + u8 rel_code = code; __u32 rel_info = ntohl(info); int err; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3a0b3be7ece..79c172f1ff0 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -51,7 +51,7 @@ #include static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f995e19c87a..f797e8c6f3b 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -54,7 +54,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) return data + padlen; } -static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) +static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8b0b6f94806..d6c3c1c34b2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -310,7 +310,7 @@ out: static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -343,7 +343,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, } void raw6_icmp_error(struct sk_buff *skb, int nexthdr, - int type, int code, int inner_offset, __be32 info) + u8 type, u8 code, int inner_offset, __be32 info) { struct sock *sk; int hash; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 658293ea05b..1473ee0a1f5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1865,7 +1865,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { int type; struct dst_entry *dst = skb_dst(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 53b6a4192b1..58810c65b63 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -317,7 +317,7 @@ failure: } static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 669f280989c..633ad789eff 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -124,7 +124,7 @@ drop: } static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 023beda6b22..33b59bd92c4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -312,7 +312,7 @@ csum_copy_err: } void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info, + u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) { struct ipv6_pinfo *np; @@ -346,8 +346,8 @@ out: } static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, int type, - int code, int offset, __be32 info ) + struct inet6_skb_parm *opt, u8 type, + u8 code, int offset, __be32 info ) { __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 23779208c33..6bb303471e2 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -9,7 +9,7 @@ extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct udp_table *); + u8 , u8 , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index ba162a82458..4818c48688f 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -20,7 +20,7 @@ static int udplitev6_rcv(struct sk_buff *skb) static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 80193db224d..81a95c00e50 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -262,7 +262,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { /* xfrm6_tunnel native err handling */ switch (type) { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a63de3f7f18..6a4b1909414 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -133,7 +133,7 @@ static struct notifier_block sctp_inet6addr_notifier = { /* ICMP error handler. */ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; -- cgit v1.2.3-70-g09d2 From d55d87fdff8252d0e2f7c28c2d443aee17e9d70f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Jun 2009 02:25:25 +0000 Subject: net: Move rx skb_orphan call to where needed In order to get the tun driver to account packets, we need to be able to receive packets with destructors set. To be on the safe side, I added an skb_orphan call for all protocols by default since some of them (IP in particular) cannot handle receiving packets destructors properly. Now it seems that at least one protocol (CAN) expects to be able to pass skb->sk through the rx path without getting clobbered. So this patch attempts to fix this properly by moving the skb_orphan call to where it's actually needed. In particular, I've added it to skb_set_owner_[rw] which is what most users of skb->destructor call. This is actually an improvement for tun too since it means that we only give back the amount charged to the socket when the skb is passed to another socket that will also be charged accordingly. Signed-off-by: Herbert Xu Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sock.h | 2 ++ net/ax25/ax25_in.c | 3 +-- net/core/dev.c | 2 -- net/irda/af_irda.c | 3 --- net/irda/ircomm/ircomm_lmp.c | 1 + 6 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 9f80a766828..d16a304cbed 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -448,6 +448,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) { struct sctp_ulpevent *event = sctp_skb2event(skb); + skb_orphan(skb); skb->sk = sk; skb->destructor = sctp_sock_rfree; atomic_add(event->rmem_len, &sk->sk_rmem_alloc); diff --git a/include/net/sock.h b/include/net/sock.h index 570c7a12b54..7f5c41cc45a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1250,6 +1250,7 @@ static inline int sk_has_allocations(const struct sock *sk) static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { + skb_orphan(skb); skb->sk = sk; skb->destructor = sock_wfree; /* @@ -1262,6 +1263,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) { + skb_orphan(skb); skb->sk = sk; skb->destructor = sock_rfree; atomic_add(skb->truesize, &sk->sk_rmem_alloc); diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 5f1d2107a1d..de56d3983de 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -437,8 +437,7 @@ free: int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { - skb->sk = NULL; /* Initially we don't know who it's for */ - skb->destructor = NULL; /* Who initializes this, dammit?! */ + skb_orphan(skb); if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); diff --git a/net/core/dev.c b/net/core/dev.c index baf2dc13a34..60b57281227 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2310,8 +2310,6 @@ ncls: if (!skb) goto out; - skb_orphan(skb); - type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 5922febe25c..cb762c8723e 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -913,9 +913,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - /* Wow ! What is that ? Jean II */ - skb->sk = NULL; - skb->destructor = NULL; kfree_skb(skb); sk->sk_ack_backlog--; diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index 67c99d20857..7ba96618660 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -196,6 +196,7 @@ static int ircomm_lmp_data_request(struct ircomm_cb *self, /* Don't forget to refcount it - see ircomm_tty_do_softint() */ skb_get(skb); + skb_orphan(skb); skb->destructor = ircomm_lmp_flow_control; if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) { -- cgit v1.2.3-70-g09d2 From b6280b47a7a42970d098a3059f4ebe7e55e90d8d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Jun 2009 10:18:53 +0000 Subject: ipv4 routing: Ensure that route cache entries are usable and reclaimable with caching is off When route caching is disabled (rt_caching returns false), We still use route cache entries that are created and passed into rt_intern_hash once. These routes need to be made usable for the one call path that holds a reference to them, and they need to be reclaimed when they're finished with their use. To be made usable, they need to be associated with a neighbor table entry (which they currently are not), otherwise iproute_finish2 just discards the packet, since we don't know which L2 peer to send the packet to. To do this binding, we need to follow the path a bit higher up in rt_intern_hash, which calls arp_bind_neighbour, but not assign the route entry to the hash table. Currently, if caching is off, we simply assign the route to the rp pointer and are reutrn success. This patch associates us with a neighbor entry first. Secondly, we need to make sure that any single use routes like this are known to the garbage collector when caching is off. If caching is off, and we try to hash in a route, it will leak when its refcount reaches zero. To avoid this, this patch calls rt_free on the route cache entry passed into rt_intern_hash. This places us on the gc list for the route cache garbage collector, so that when its refcount reaches zero, it will be reclaimed (Thanks to Alexey for this suggestion). I've tested this on a local system here, and with these patches in place, I'm able to maintain routed connectivity to remote systems, even if I set /proc/sys/net/ipv4/rt_cache_rebuild_count to -1, which forces rt_caching to return false. Signed-off-by: Neil Horman Reported-by: Jarek Poplawski Reported-by: Maxime Bizon Signed-off-by: David S. Miller --- net/ipv4/route.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 65b3a8b11a6..278f46f5011 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1093,8 +1093,27 @@ restart: * If we drop it here, the callers have no way to resolve routes * when we're not caching. Instead, just point *rp at rt, so * the caller gets a single use out of the route + * Note that we do rt_free on this new route entry, so that + * once its refcount hits zero, we are still able to reap it + * (Thanks Alexey) + * Note also the rt_free uses call_rcu. We don't actually + * need rcu protection here, this is just our path to get + * on the route gc list. */ - goto report_and_exit; + + if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { + int err = arp_bind_neighbour(&rt->u.dst); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table failure & not caching routes.\n"); + rt_drop(rt); + return err; + } + } + + rt_free(rt); + goto skip_hashing; } rthp = &rt_hash_table[hash].chain; @@ -1211,7 +1230,8 @@ restart: #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; - printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); + printk(KERN_DEBUG "rt_cache @%02x: %pI4", + hash, &rt->rt_dst); for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); @@ -1226,7 +1246,7 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); -report_and_exit: +skip_hashing: if (rp) *rp = rt; else -- cgit v1.2.3-70-g09d2 From 245acb87729bc76ba65c7476665c01837e0cdccb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 24 Jun 2009 03:55:41 -0700 Subject: ipsec: Fix name of CAST algorithm Our CAST algorithm is called cast5, not cast128. Clearly nobody has ever used it :) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index d31ccb48773..faf54c6bf96 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -292,8 +292,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cbc(cast128)", - .compat = "cast128", + .name = "cbc(cast5)", + .compat = "cast5", .uinfo = { .encr = { -- cgit v1.2.3-70-g09d2 From c7a1a4c80f873d5d6ecd173035bb80eba489f380 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 24 Jun 2009 01:07:44 +0000 Subject: Phonet: publicize the Netlink notification function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 1 + net/phonet/pn_netlink.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 5054dc5ea2c..29d12673661 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -45,6 +45,7 @@ int phonet_address_add(struct net_device *dev, u8 addr); int phonet_address_del(struct net_device *dev, u8 addr); u8 phonet_address_get(struct net_device *dev, u8 addr); int phonet_address_lookup(struct net *net, u8 addr); +void phonet_address_notify(int event, struct net_device *dev, u8 addr); #define PN_NO_ADDR 0xff diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index cec4e595168..f8b4cee434c 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -32,7 +32,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, u32 pid, u32 seq, int event); -static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +void phonet_address_notify(int event, struct net_device *dev, u8 addr) { struct sk_buff *skb; int err = -ENOBUFS; @@ -94,7 +94,7 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) else err = phonet_address_del(dev, pnaddr); if (!err) - rtmsg_notify(nlh->nlmsg_type, dev, pnaddr); + phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); return err; } -- cgit v1.2.3-70-g09d2 From 2be6fa4c7e5731375cc5e70843a3444293c27514 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 24 Jun 2009 01:07:45 +0000 Subject: Phonet: generate Netlink RTM_DELADDR when destroying a device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Netlink address deletion events were not sent when a network device vanished neither when Phonet was unloaded. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 52 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 80a322d7790..b0d6ddd82a9 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -69,10 +69,27 @@ static struct phonet_device *__phonet_get(struct net_device *dev) return NULL; } -static void __phonet_device_free(struct phonet_device *pnd) +static void phonet_device_destroy(struct net_device *dev) { - list_del(&pnd->list); - kfree(pnd); + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); + struct phonet_device *pnd; + + ASSERT_RTNL(); + + spin_lock_bh(&pndevs->lock); + pnd = __phonet_get(dev); + if (pnd) + list_del(&pnd->list); + spin_unlock_bh(&pndevs->lock); + + if (pnd) { + u8 addr; + + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) + phonet_address_notify(RTM_DELADDR, dev, addr); + kfree(pnd); + } } struct net_device *phonet_device_get(struct net *net) @@ -126,8 +143,10 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) err = -EADDRNOTAVAIL; - else if (bitmap_empty(pnd->addrs, 64)) - __phonet_device_free(pnd); + else if (bitmap_empty(pnd->addrs, 64)) { + list_del(&pnd->list); + kfree(pnd); + } spin_unlock_bh(&pndevs->lock); return err; } @@ -181,18 +200,8 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, { struct net_device *dev = arg; - if (what == NETDEV_UNREGISTER) { - struct phonet_device_list *pndevs; - struct phonet_device *pnd; - - /* Destroy phonet-specific device data */ - pndevs = phonet_device_list(dev_net(dev)); - spin_lock_bh(&pndevs->lock); - pnd = __phonet_get(dev); - if (pnd) - __phonet_device_free(pnd); - spin_unlock_bh(&pndevs->lock); - } + if (what == NETDEV_UNREGISTER) + phonet_device_destroy(dev); return 0; } @@ -218,11 +227,12 @@ static int phonet_init_net(struct net *net) static void phonet_exit_net(struct net *net) { struct phonet_net *pnn = net_generic(net, phonet_net_id); - struct phonet_device *pnd, *n; - - list_for_each_entry_safe(pnd, n, &pnn->pndevs.list, list) - __phonet_device_free(pnd); + struct net_device *dev; + rtnl_lock(); + for_each_netdev(net, dev) + phonet_device_destroy(dev); + rtnl_unlock(); kfree(pnn); } -- cgit v1.2.3-70-g09d2 From 308ff823ebd749a94d3b6ac26b95bc0eb114c39e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Jun 2009 16:32:52 +0200 Subject: nf_conntrack: Use rcu_barrier() RCU barriers, rcu_barrier(), is inserted two places. In nf_conntrack_expect.c nf_conntrack_expect_fini() before the kmem_cache_destroy(). Firstly to make sure the callback to the nf_ct_expect_free_rcu() code is still around. Secondly because I'm unsure about the consequence of having in flight nf_ct_expect_free_rcu/kmem_cache_free() calls while doing a kmem_cache_destroy() slab destroy. And in nf_conntrack_extend.c nf_ct_extend_unregister(), inorder to wait for completion of callbacks to __nf_ct_ext_free_rcu(), which is invoked by __nf_ct_ext_add(). It might be more efficient to call rcu_barrier() in nf_conntrack_core.c nf_conntrack_cleanup_net(), but thats make it more difficult to read the code (as the callback code in located in nf_conntrack_extend.c). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 4 +++- net/netfilter/nf_conntrack_extend.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index afde8f99164..2032dfe25ca 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -617,8 +617,10 @@ err1: void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) + if (net_eq(net, &init_net)) { + rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + } nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4b2c769d555..fef95be334b 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -186,6 +186,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - synchronize_rcu(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); -- cgit v1.2.3-70-g09d2 From 1ac530b3553e0b4dc1e18a32bed57cfa84cd57cb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Jun 2009 22:29:31 +0000 Subject: tcp: missing check ACK flag of received segment in FIN-WAIT-2 state RFC0793 defined that in FIN-WAIT-2 state if the ACK bit is off drop the segment and return[Page 72]. But this check is missing in function tcp_timewait_state_process(). This cause the segment with FIN flag but no ACK has two diffent action: Case 1: Node A Node B <------------- FIN,ACK (enter FIN-WAIT-1) ACK -------------> (enter FIN-WAIT-2) FIN -------------> discard (move sk to tw list) Case 2: Node A Node B <------------- FIN,ACK (enter FIN-WAIT-1) ACK -------------> (enter FIN-WAIT-2) (move sk to tw list) FIN -------------> <------------- ACK This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43bbba7926e..f8d67ccc64f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -128,7 +128,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, goto kill_with_rst; /* Dup ACK? */ - if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || + if (!th->ack || + !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { inet_twsk_put(tw); return TCP_TW_SUCCESS; -- cgit v1.2.3-70-g09d2 From a1faa69810b2af562b70b2a71c116c7d03575dd3 Mon Sep 17 00:00:00 2001 From: Jens Rosenboom Date: Thu, 25 Jun 2009 04:55:50 +0000 Subject: ipv6: avoid wraparound for expired preferred lifetime Avoid showing wrong high values when the preferred lifetime of an address is expired. Signed-off-by: Jens Rosenboom Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8c1e86afbbf..3883b4036a7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3362,7 +3362,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - ifa->tstamp)/HZ; - preferred -= tval; + if (preferred > tval) + preferred -= tval; + else + preferred = 0; if (valid != INFINITY_LIFE_TIME) valid -= tval; } -- cgit v1.2.3-70-g09d2 From 10e85448019097e4fcfa535f612f51d0d31a34f4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:46:08 +0000 Subject: decnet: Use rcu_barrier() on module unload. The decnet module unloading as been disabled with a '#if 0' statement, because it have had issues. We add a rcu_barrier() anyhow for correctness. The maintainer (Chrissie Caulfield) will look into the unload issue when time permits. Acked-by: Paul E. McKenney Acked-by: Chrissie Caulfield Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d351b8db0df..77d40289653 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2413,6 +2413,8 @@ static void __exit decnet_exit(void) proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); + + rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */ } module_exit(decnet_exit); #endif -- cgit v1.2.3-70-g09d2 From 1f2ccd00f224a4e2d6d26f590f3e6851f3deef99 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:46:03 +0000 Subject: ipv6: Use rcu_barrier() on module unload. The ipv6 module uses rcu_call() thus it should use rcu_barrier() on module unload. Acked-by: Paul E. McKenney Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 85b3d0036af..caa0278d30a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1284,6 +1284,8 @@ static void __exit inet6_exit(void) proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } module_exit(inet6_exit); -- cgit v1.2.3-70-g09d2 From 473c22d759e73cbbe604f41105b497817cc2ee8e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:45:48 +0000 Subject: bridge: Use rcu_barrier() instead of syncronize_net() on unload. When unloading modules that uses call_rcu() callbacks, then we must use rcu_barrier(). This module uses syncronize_net() which is not enough to be sure that all callback has been completed. Acked-by: Paul E. McKenney Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/bridge/br.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br.c b/net/bridge/br.c index 9aac5213105..e1241c76239 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -93,7 +93,7 @@ static void __exit br_deinit(void) unregister_pernet_subsys(&br_net_ops); - synchronize_net(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ br_netfilter_fini(); #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) -- cgit v1.2.3-70-g09d2 From 75de874f5c35f679c6370fccc2bf4930e638ef3b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:45:58 +0000 Subject: sunrpc: Use rcu_barrier() on unload. The sunrpc module uses rcu_call() thus it should use rcu_barrier() on module unload. Have not verified that the possibility for new call_rcu() callbacks has been disabled. As a hint for checking, the functions calling call_rcu() (unx_destroy_cred and generic_destroy_cred) are registered as crdestroy function pointer in struct rpc_credops. Acked-by: Paul E. McKenney Acked-by: Trond Myklebust Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sunrpc/sunrpc_syms.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 843629f5576..adaa81982f7 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -66,6 +66,7 @@ cleanup_sunrpc(void) #ifdef CONFIG_PROC_FS rpc_proc_exit(); #endif + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_LICENSE("GPL"); module_init(init_sunrpc); -- cgit v1.2.3-70-g09d2 From 4a27096bbe2cad4c6e78802a0d9dfe0e598a1129 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:45:53 +0000 Subject: mac80211: Use rcu_barrier() on unload. The mac80211 module uses rcu_call() thus it should use rcu_barrier() on module unload. The rcu_barrier() is placed in mech.c ieee80211_stop_mesh() which is invoked from ieee80211_stop() in case vif.type == NL80211_IFTYPE_MESH_POINT. Acked-by: Paul E. McKenney Acked-by: Johannes Berg Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/mac80211/mesh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index fc712e60705..11cf45bce38 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -494,7 +494,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) * should it be using the interface and enqueuing * frames at this very time on another CPU. */ - synchronize_rcu(); + rcu_barrier(); /* Wait for RX path and call_rcu()'s */ skb_queue_purge(&sdata->u.mesh.skb_queue); } -- cgit v1.2.3-70-g09d2 From 71f9dacd2e4d233029e9e956ca3f79531f411827 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Jun 2009 19:22:37 -0700 Subject: inet: Call skb_orphan before tproxy activates As transparent proxying looks up the socket early and assigns it to the skb for later processing, we must drop any existing socket ownership prior to that in order to distinguish between the case where tproxy is active and where it is not. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 3 +++ net/ipv6/ip6_input.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 490ce20faf3..db46b4b5b2b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -440,6 +440,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c3a07d75b5f..6d6a4277c67 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -139,6 +139,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt rcu_read_unlock(); + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: -- cgit v1.2.3-70-g09d2 From ff780cd8f2fa928b193554f593b36d1243554212 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Jun 2009 19:27:04 -0700 Subject: gro: Flush GRO packets in napi_disable_pending path When NAPI is disabled while we're in net_rx_action, we end up calling __napi_complete without flushing GRO packets. This is a bug as it would cause the GRO packets to linger, of course it also literally BUGs to catch error like this :) This patch changes it to napi_complete, with the obligatory IRQ reenabling. This should be safe because we've only just disabled IRQs and it does not materially affect the test conditions in between. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 60b57281227..70c27e0c7c3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2823,9 +2823,11 @@ static void net_rx_action(struct softirq_action *h) * move the instance around on the list at-will. */ if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) - __napi_complete(n); - else + if (unlikely(napi_disable_pending(n))) { + local_irq_enable(); + napi_complete(n); + local_irq_disable(); + } else list_move_tail(&n->poll_list, list); } -- cgit v1.2.3-70-g09d2 From a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 Jun 2009 14:07:56 +0200 Subject: netfilter: tcp conntrack: fix unacknowledged data detection with NAT When NAT helpers change the TCP packet size, the highest seen sequence number needs to be corrected. This is currently only done upwards, when the packet size is reduced the sequence number is unchanged. This causes TCP conntrack to falsely detect unacknowledged data and decrease the timeout. Fix by updating the highest seen sequence number in both directions after packet mangling. Tested-by: Krzysztof Piotr Oledzki Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 4 ++-- net/ipv4/netfilter/nf_nat_helper.c | 17 +++++++++++------ net/netfilter/nf_conntrack_proto_tcp.c | 6 +++--- 3 files changed, 16 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a632689b61b..cbdd6284996 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -258,8 +258,8 @@ static inline bool nf_ct_kill(struct nf_conn *ct) /* Update TCP window tracking data when NAT mangles the packet */ extern void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir); + struct nf_conn *ct, int dir, + s16 offset); /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 155c008626c..09172a65d9b 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -191,7 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, ct, ctinfo); /* Tell TCP window tracking about seq change */ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo)); + ct, CTINFO2DIR(ctinfo), + (int)rep_len - (int)match_len); nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } @@ -377,6 +378,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, struct tcphdr *tcph; int dir; __be32 newseq, newack; + s16 seqoff, ackoff; struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way, *other_way; @@ -390,15 +392,18 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); + seqoff = this_way->offset_after; else - newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); + seqoff = this_way->offset_before; if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); + ackoff = other_way->offset_after; else - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); @@ -413,7 +418,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); return 1; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 33fc0a443f3..97a82ba7537 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,8 +720,8 @@ static bool tcp_in_window(const struct nf_conn *ct, /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir) + struct nf_conn *ct, int dir, + s16 offset) { const struct tcphdr *tcph = (const void *)skb->data + dataoff; const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; @@ -734,7 +734,7 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, /* * We have to worry for the ack in the reply packet only... */ - if (after(end, ct->proto.tcp.seen[dir].td_end)) + if (ct->proto.tcp.seen[dir].td_end + offset == end) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; spin_unlock_bh(&ct->lock); -- cgit v1.2.3-70-g09d2 From d6d3f08b0fd998b647a05540cedd11a067b72867 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 29 Jun 2009 14:31:46 +0200 Subject: netfilter: xtables: conntrack match revision 2 As reported by Philip, the UNTRACKED state bit does not fit within the 8-bit state_mask member. Enlarge state_mask and give status_mask a few more bits too. Reported-by: Philip Craig References: http://markmail.org/thread/b7eg6aovfh4agyz7 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_conntrack.h | 13 +++++++ net/netfilter/xt_conntrack.c | 66 ++++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 3430c775194..7ae05338e94 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -81,4 +81,17 @@ struct xt_conntrack_mtinfo1 { __u8 state_mask, status_mask; }; +struct xt_conntrack_mtinfo2 { + union nf_inet_addr origsrc_addr, origsrc_mask; + union nf_inet_addr origdst_addr, origdst_mask; + union nf_inet_addr replsrc_addr, replsrc_mask; + union nf_inet_addr repldst_addr, repldst_mask; + __u32 expires_min, expires_max; + __u16 l4proto; + __be16 origsrc_port, origdst_port; + __be16 replsrc_port, repldst_port; + __u16 match_flags, invert_flags; + __u16 state_mask, status_mask; +}; + #endif /*_XT_CONNTRACK_H*/ diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0b7139f3dd7..fc581800698 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -129,7 +129,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, @@ -138,7 +138,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, @@ -147,7 +147,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, @@ -156,7 +156,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, @@ -164,7 +164,7 @@ conntrack_mt_repldst(const struct nf_conn *ct, } static inline bool -ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, +ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; @@ -204,7 +204,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, static bool conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = par->matchinfo; + const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -278,6 +278,16 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } +static bool +conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo; + struct xt_match_param newpar = *par; + + newpar.matchinfo = *info; + return conntrack_mt(skb, &newpar); +} + static bool conntrack_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -288,11 +298,45 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par) return true; } +static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_conntrack_mtinfo1 *info = par->matchinfo; + struct xt_conntrack_mtinfo2 *up; + int ret = conntrack_mt_check(par); + + if (ret < 0) + return ret; + + up = kmalloc(sizeof(*up), GFP_KERNEL); + if (up == NULL) { + nf_ct_l3proto_module_put(par->family); + return -ENOMEM; + } + + /* + * The strategy here is to minimize the overhead of v1 matching, + * by prebuilding a v2 struct and putting the pointer into the + * v1 dataspace. + */ + memcpy(up, info, offsetof(typeof(*info), state_mask)); + up->state_mask = info->state_mask; + up->status_mask = info->status_mask; + *(void **)info = up; + return true; +} + static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_l3proto_module_put(par->family); } +static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_conntrack_mtinfo2 **info = par->matchinfo; + kfree(*info); + conntrack_mt_destroy(par); +} + #ifdef CONFIG_COMPAT struct compat_xt_conntrack_info { @@ -363,6 +407,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .revision = 1, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), + .match = conntrack_mt_v1, + .checkentry = conntrack_mt_check_v1, + .destroy = conntrack_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "conntrack", + .revision = 2, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo2), .match = conntrack_mt, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, -- cgit v1.2.3-70-g09d2 From 932c1329acebc03ef5efa3647c9c3a967b59d0c4 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:00:08 +0400 Subject: nl802154: fix Oops in ieee802154_nl_get_dev ieee802154_nl_get_dev() lacks check for the existance of the device that was returned by dev_get_XXX, thus resulting in Oops for non-existing devices. Fix it. Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 105ad10876a..332b947ae81 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -276,6 +276,9 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) else return NULL; + if (!dev) + return NULL; + if (dev->type != ARPHRD_IEEE802154) { dev_put(dev); return NULL; -- cgit v1.2.3-70-g09d2 From dfd06fe8246c0425f8d6850b8e2c872b0d691ec3 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:02:09 +0400 Subject: nl802154: add module license and description Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 332b947ae81..27eda9fdf3c 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -524,3 +524,6 @@ static void __exit ieee802154_nl_exit(void) } module_exit(ieee802154_nl_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ieee 802.15.4 configuration interface"); + -- cgit v1.2.3-70-g09d2 From 8e5b9dda99cc86bdbd822935fcc37c5808e271b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:03:30 +0000 Subject: tcp: Stop non-TSO packets morphing into TSO If a socket starts out on a non-TSO route, and then switches to a TSO route, then the tail on the tx queue can morph into a TSO packet, causing mischief because the rest of the stack does not expect a partially linear TSO packet. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before declaring a packet as TSO. Reported-by: Johannes Berg Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 416fc4c2e7e..5bdf08d312d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -725,7 +725,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ -- cgit v1.2.3-70-g09d2 From 6828b92bd21acd65113dfe0541f19f5df0d9668f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:06:41 +0000 Subject: tcp: Do not tack on TSO data to non-TSO packet If a socket starts out on a non-TSO route, and then switches to a TSO route, then we will tack on data to the tail of the tx queue even if it started out life as non-TSO. This is suboptimal because all of it will then be copied and checksummed unnecessarily. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before appending extra data beyond the MSS. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17b89c523f9..7870a535dac 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -903,13 +903,17 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, iov++; while (seglen > 0) { - int copy; + int copy = 0; + int max = size_goal; skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; + } - if (!tcp_send_head(sk) || - (copy = size_goal - skb->len) <= 0) { - + if (copy <= 0) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -930,6 +934,7 @@ new_segment: skb_entail(sk, skb); copy = size_goal; + max = size_goal; } /* Try to append data to the end of skb. */ @@ -1028,7 +1033,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < size_goal || (flags & MSG_OOB)) + if (skb->len < max || (flags & MSG_OOB)) continue; if (forced_push(tp)) { -- cgit v1.2.3-70-g09d2 From 1802571b9865c0fc1d8d0fa39cf73275f3a75af3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 18:42:53 +0000 Subject: xfrm: use xfrm_addr_cmp() instead of compare addresses directly Clean up to use xfrm_addr_cmp() instead of compare addresses directly. Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 57 ++++++++------------------------------------------- 1 file changed, 8 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f1f86565f1..f2f7c638083 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -668,22 +668,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -699,26 +687,11 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_addre hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -1001,25 +974,11 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family != family || x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - } - xfrm_state_hold(x); return x; } -- cgit v1.2.3-70-g09d2 From ff0ac74afb5b9916641723a78796d4ee7937c2ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 22:49:37 +0000 Subject: sctp: xmit sctp packet always return no route error Commit 'net: skb->dst accessors'(adf30907d63893e4208dfe3f5c88ae12bc2f25d5) broken the sctp protocol stack, the sctp packet can never be sent out after Eric Dumazet's patch, which have typo in the sctp code. Signed-off-by: Wei Yongjun Acked-by: Eric Dumazet Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index b7641144451..b94c2119056 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -407,7 +407,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } dst = dst_clone(tp->dst); skb_dst_set(nskb, dst); - if (dst) + if (!dst) goto no_route; /* Build the SCTP header. */ -- cgit v1.2.3-70-g09d2 From 008440e3ad4b72f5048d1b1f6f5ed894fdc5ad08 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 30 Jun 2009 12:47:19 -0700 Subject: ipv4: Fix fib_trie rebalancing, part 3 Alas current delaying of freeing old tnodes by RCU in trie_rebalance is still not enough because we can free a top tnode before updating a t->trie pointer. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 012cf5a6858..00a54b246df 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1021,6 +1021,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) (struct node *)tn, wasfull); tp = node_parent((struct node *) tn); + if (!tp) + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); if (!tp) break; -- cgit v1.2.3-70-g09d2 From f8a68e752bc4e39644843403168137663c984524 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Jun 2009 16:27:17 +0000 Subject: Revert "ipv4: arp announce, arp_proxy and windows ip conflict verification" This reverts commit 73ce7b01b4496a5fbf9caf63033c874be692333f. After discovering that we don't listen to gratuitious arps in 2.6.30 I tracked the failure down to this commit. The patch makes absolutely no sense. RFC2131 RFC3927 and RFC5227. are all in agreement that an arp request with sip == 0 should be used for the probe (to prevent learning) and an arp request with sip == tip should be used for the gratitous announcement that people can learn from. It appears the author of the broken patch got those two cases confused and modified the code to drop all gratuitous arp traffic. Ouch! Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/arp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8a3881e28ac..c29d75d8f1b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -801,11 +801,8 @@ static int arp_process(struct sk_buff *skb) * cache. */ - /* - * Special case: IPv4 duplicate address detection packet (RFC2131) - * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) - */ - if (sip == 0 || tip == sip) { + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ + if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) -- cgit v1.2.3-70-g09d2 From 15da4b1612d608a47e095439b3dd1d77ffe20e0c Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Wed, 1 Jul 2009 08:50:54 +0000 Subject: net/9p: Fix crash due to bad mount parameters. It is not safe to use match_int without checking the token type returned by match_token (especially when the token type returned is Opt_err and args is empty). Fix it. Signed-off-by: Abhishek Kulkarni Signed-off-by: David S. Miller --- net/9p/trans_fd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a2a1814c7a8..8c2588e4edc 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -735,12 +735,14 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - r = match_int(&args[0], &option); - if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; - continue; + if (token != Opt_err) { + r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } } switch (token) { case Opt_port: -- cgit v1.2.3-70-g09d2 From c615c9f3f3cea60279b1bb38e8ef27bd575ecd0c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Jul 2009 16:57:23 +0000 Subject: xfrm4: fix the ports decode of sctp protocol The SCTP pushed the skb data above the sctp chunk header, so the check of pskb_may_pull(skb, xprth + 4 - skb->data) in _decode_session4() will never return 0 because xprth + 4 - skb->data < 0, the ports decode of sctp will always fail. Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 60d918c96a4..0071ee6f441 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -136,7 +136,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; fl->fl_ip_sport = ports[!!reverse]; -- cgit v1.2.3-70-g09d2 From 59cae0092e4da753b5a2adb32933e0d1b223bcc5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 2 Jul 2009 16:59:49 +0000 Subject: xfrm6: fix the proto and ports decode of sctp protocol The SCTP pushed the skb above the sctp chunk header, so the check of pskb_may_pull(skb, nh + offset + 1 - skb->data) in _decode_session6() will never return 0 and the ports decode of sctp will always fail. (nh + offset + 1 - skb->data < 0) Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index b4b16a43f27..3a3c677bc0f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -157,7 +157,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); - while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) { + while (nh + offset + 1 < skb->data || + pskb_may_pull(skb, nh + offset + 1 - skb->data)) { nh = skb_network_header(skb); exthdr = (struct ipv6_opt_hdr *)(nh + offset); @@ -177,7 +178,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_TCP: case IPPROTO_SCTP: case IPPROTO_DCCP: - if (!onlyproto && pskb_may_pull(skb, nh + offset + 4 - skb->data)) { + if (!onlyproto && (nh + offset + 4 < skb->data || + pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; fl->fl_ip_sport = ports[!!reverse]; -- cgit v1.2.3-70-g09d2 From a1ed05263b74921742b454ef52c30b609ec6940f Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Thu, 2 Jul 2009 07:10:52 +0000 Subject: IPv6: preferred lifetime of address not getting updated There's a bug in addrconf_prefix_rcv() where it won't update the preferred lifetime of an IPv6 address if the current valid lifetime of the address is less than 2 hours (the minimum value in the RA). For example, If I send a router advertisement with a prefix that has valid lifetime = preferred lifetime = 2 hours we'll build this address: 3: eth0: mtu 1500 qlen 1000 inet6 2001:1890:1109:a20:217:8ff:fe7d:4718/64 scope global dynamic valid_lft 7175sec preferred_lft 7175sec If I then send the same prefix with valid lifetime = preferred lifetime = 0 it will be ignored since the minimum valid lifetime is 2 hours: 3: eth0: mtu 1500 qlen 1000 inet6 2001:1890:1109:a20:217:8ff:fe7d:4718/64 scope global dynamic valid_lft 7161sec preferred_lft 7161sec But according to RFC 4862 we should always reset the preferred lifetime even if the valid lifetime is invalid, which would cause the address to immediately get deprecated. So with this patch we'd see this: 5: eth0: mtu 1500 qlen 1000 inet6 2001:1890:1109:a20:21f:29ff:fe5a:ef04/64 scope global deprecated dynamic valid_lft 7163sec preferred_lft 0sec The comment winds-up being 5x the size of the code to fix the problem. Update the preferred lifetime of IPv6 addresses derived from a prefix info option in a router advertisement even if the valid lifetime in the option is invalid, as specified in RFC 4862 Section 5.5.3e. Fixes an issue where an address will not immediately become deprecated. Reported by Jens Rosenboom. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3883b4036a7..43b3c9f89c1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1916,8 +1916,32 @@ ok: update_lft = 1; else if (stored_lft <= MIN_VALID_LIFETIME) { /* valid_lft <= stored_lft is always true */ - /* XXX: IPsec */ - update_lft = 0; + /* + * RFC 4862 Section 5.5.3e: + * "Note that the preferred lifetime of + * the corresponding address is always + * reset to the Preferred Lifetime in + * the received Prefix Information + * option, regardless of whether the + * valid lifetime is also reset or + * ignored." + * + * So if the preferred lifetime in + * this advertisement is different + * than what we have stored, but the + * valid lifetime is invalid, just + * reset prefered_lft. + * + * We must set the valid lifetime + * to the stored lifetime since we'll + * be updating the timestamp below, + * else we'll set it back to the + * minumum. + */ + if (prefered_lft != ifp->prefered_lft) { + valid_lft = stored_lft; + update_lft = 1; + } } else { valid_lft = MIN_VALID_LIFETIME; if (valid_lft < prefered_lft) @@ -3085,7 +3109,7 @@ restart: spin_unlock(&ifp->lock); continue; } else if (age >= ifp->prefered_lft) { - /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ + /* jiffies - ifp->tstamp > age >= ifp->prefered_lft */ int deprecate = 0; if (!(ifp->flags&IFA_F_DEPRECATED)) { -- cgit v1.2.3-70-g09d2 From 1ded3f59f35a2642852b3e2a1c0fa8a97777e9af Mon Sep 17 00:00:00 2001 From: Stephane Contri Date: Thu, 2 Jul 2009 23:26:48 +0000 Subject: dsa: fix 88e6xxx statistics counter snapshotting The bit that tells us whether a statistics counter snapshot operation has completed is located in the GLOBAL register block, not in the GLOBAL2 register block, so fix up mv88e6xxx_stats_wait() to poll the right register address. Signed-off-by: Stephane Contri Signed-off-by: Lennert Buytenhek Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/dsa/mv88e6xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c index 4e4d8b5ad03..efe661a9def 100644 --- a/net/dsa/mv88e6xxx.c +++ b/net/dsa/mv88e6xxx.c @@ -418,7 +418,7 @@ static int mv88e6xxx_stats_wait(struct dsa_switch *ds) int i; for (i = 0; i < 10; i++) { - ret = REG_READ(REG_GLOBAL2, 0x1d); + ret = REG_READ(REG_GLOBAL, 0x1d); if ((ret & 0x8000) == 0) return 0; } -- cgit v1.2.3-70-g09d2 From 1bc4ee4088c9a502db0e9c87f675e61e57fa1734 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 5 Jul 2009 19:45:48 +0000 Subject: sctp: fix warning at inet_sock_destruct() while release sctp socket Commit 'net: Move rx skb_orphan call to where needed' broken sctp protocol with warning at inet_sock_destruct(). Actually, sctp can do this right with sctp_sock_rfree_frag() and sctp_skb_set_owner_r_frag() pair. sctp_sock_rfree_frag(skb); sctp_skb_set_owner_r_frag(skb, newsk); This patch not revert the commit d55d87fdff8252d0e2f7c28c2d443aee17e9d70f, instead remove the sctp_sock_rfree_frag() function. ------------[ cut here ]------------ WARNING: at net/ipv4/af_inet.c:151 inet_sock_destruct+0xe0/0x142() Modules linked in: sctp ipv6 dm_mirror dm_region_hash dm_log dm_multipath scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd [last unloaded: scsi_wait_scan] Pid: 1808, comm: sctp_test Not tainted 2.6.31-rc2 #40 Call Trace: [] warn_slowpath_common+0x6a/0x81 [] ? inet_sock_destruct+0xe0/0x142 [] warn_slowpath_null+0x12/0x15 [] inet_sock_destruct+0xe0/0x142 [] __sk_free+0x19/0xcc [] sk_free+0x18/0x1a [] sctp_close+0x192/0x1a1 [sctp] [] inet_release+0x47/0x4d [] sock_release+0x19/0x5e [] sock_close+0x21/0x25 [] __fput+0xde/0x189 [] fput+0x18/0x1a [] filp_close+0x56/0x60 [] put_files_struct+0x5d/0xa1 [] exit_files+0x39/0x3d [] do_exit+0x1a5/0x5dd [] ? d_kill+0x35/0x3b [] ? dequeue_signal+0xa6/0x115 [] do_group_exit+0x63/0x8a [] get_signal_to_deliver+0x2e1/0x2f9 [] do_notify_resume+0x7c/0x6b5 [] ? autoremove_wake_function+0x0/0x34 [] ? __d_free+0x3d/0x40 [] ? d_free+0x2a/0x3c [] ? vfs_write+0x103/0x117 [] ? sys_socketcall+0x178/0x182 [] work_notifysig+0x13/0x19 ---[ end trace 9db92c463e789fba ]--- Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 35ba035970a..971890dbfea 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6652,21 +6652,6 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) finish_wait(sk->sk_sleep, &wait); } -static void sctp_sock_rfree_frag(struct sk_buff *skb) -{ - struct sk_buff *frag; - - if (!skb->data_len) - goto done; - - /* Don't forget the fragments. */ - skb_walk_frags(skb, frag) - sctp_sock_rfree_frag(frag); - -done: - sctp_sock_rfree(skb); -} - static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) { struct sk_buff *frag; @@ -6776,7 +6761,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sctp_sock_rfree_frag(skb); __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); sctp_skb_set_owner_r_frag(skb, newsk); @@ -6807,7 +6791,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - sctp_sock_rfree_frag(skb); __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); sctp_skb_set_owner_r_frag(skb, newsk); @@ -6822,15 +6805,11 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } - sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) { - sctp_sock_rfree_frag(skb); + sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) sctp_skb_set_owner_r_frag(skb, newsk); - } - sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) { - sctp_sock_rfree_frag(skb); + sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) sctp_skb_set_owner_r_frag(skb, newsk); - } /* Set the type of socket to indicate that it is peeled off from the * original UDP-style socket or created with the accept() call on a -- cgit v1.2.3-70-g09d2 From 1f5fc70a250cc18f066072119e9fbbc20dad865f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 20 Jun 2009 12:31:11 +0200 Subject: Wireless: nl80211, fix lock imbalance Don't forget to unlock cfg80211_mutex in one fail path of nl80211_set_wiphy. Signed-off-by: Jiri Slaby Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 241bddd0b4f..43bdb1372ca 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -447,6 +447,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev = __cfg80211_drv_from_info(info); if (IS_ERR(rdev)) { + mutex_unlock(&cfg80211_mutex); result = PTR_ERR(rdev); goto unlock; } -- cgit v1.2.3-70-g09d2 From 59615b5f9d1323898ca94e88e595b5b04115076a Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Thu, 25 Jun 2009 16:07:42 -0700 Subject: mac80211: fix allocation in mesh_queue_preq We allocate a PREQ queue node in mesh_queue_preq, however the allocation may cause us to sleep. Use GFP_ATOMIC to prevent this. [ 1869.126498] BUG: scheduling while atomic: ping/1859/0x10000100 [ 1869.127164] Modules linked in: ath5k mac80211 ath [ 1869.128310] Pid: 1859, comm: ping Not tainted 2.6.30-wl #1 [ 1869.128754] Call Trace: [ 1869.129293] [] __schedule_bug+0x48/0x4d [ 1869.129866] [] __schedule+0x77/0x67a [ 1869.130544] [] ? release_console_sem+0x17d/0x185 [ 1869.131568] [] ? mesh_queue_preq+0x2b/0x165 [mac80211] [ 1869.132318] [] schedule+0x8/0x1f [ 1869.132807] [] __cond_resched+0x16/0x2f [ 1869.133478] [] _cond_resched+0x27/0x32 [ 1869.134191] [] kmem_cache_alloc+0x1c/0xcf [ 1869.134714] [] ? printk+0x15/0x17 [ 1869.135670] [] mesh_queue_preq+0x2b/0x165 [mac80211] [ 1869.136731] [] mesh_nexthop_lookup+0xee/0x12d [mac80211] [ 1869.138130] [] ieee80211_xmit+0xe6/0x2b2 [mac80211] [ 1869.138935] [] ? ath5k_hw_setup_rx_desc+0x0/0x66 [ath5k] [ 1869.139831] [] ? ath5k_tasklet_rx+0xba/0x506 [ath5k] [ 1869.140863] [] ieee80211_subif_start_xmit+0x6c9/0x6e4 [mac80211] [ 1869.141665] [] ? handle_level_irq+0x78/0x9d [ 1869.142390] [] dev_hard_start_xmit+0x168/0x1c7 [ 1869.143092] [] __qdisc_run+0xe1/0x1b7 [ 1869.143612] [] qdisc_run+0x18/0x1a [ 1869.144248] [] dev_queue_xmit+0x16a/0x25a [ 1869.144785] [] ? _read_unlock_bh+0xe/0x10 [ 1869.145465] [] neigh_resolve_output+0x19c/0x1c7 [ 1869.146182] [] ? ip_finish_output+0x0/0x51 [ 1869.146697] [] ip_finish_output2+0x182/0x1bc [ 1869.147358] [] ip_finish_output+0x4d/0x51 [ 1869.147863] [] ip_output+0x80/0x85 [ 1869.148515] [] dst_output+0x9/0xb [ 1869.149141] [] ip_local_out+0x17/0x1a [ 1869.149632] [] ip_push_pending_frames+0x1f3/0x255 [ 1869.150343] [] raw_sendmsg+0x5e6/0x667 [ 1869.150883] [] ? insert_work+0x6a/0x73 [ 1869.151834] [] ? ieee80211_invoke_rx_handlers+0x17da/0x1ae8 [mac80211] [ 1869.152630] [] inet_sendmsg+0x3b/0x48 [ 1869.153232] [] __sock_sendmsg+0x45/0x4e [ 1869.153740] [] sock_sendmsg+0xb8/0xce [ 1869.154519] [] ? ath5k_hw_setup_rx_desc+0x0/0x66 [ath5k] [ 1869.155289] [] ? autoremove_wake_function+0x0/0x30 [ 1869.155859] [] ? __copy_from_user_ll+0x11/0xce [ 1869.156573] [] ? copy_from_user+0x31/0x54 [ 1869.157235] [] ? verify_iovec+0x40/0x6e [ 1869.157778] [] sys_sendmsg+0x14d/0x1a5 [ 1869.158714] [] ? __ieee80211_rx+0x49e/0x4ee [mac80211] [ 1869.159641] [] ? ath5k_rxbuf_setup+0x6d/0x8d [ath5k] [ 1869.160543] [] ? ath5k_hw_setup_rx_desc+0x0/0x66 [ath5k] [ 1869.161434] [] ? ath5k_hw_get_rxdp+0xe/0x10 [ath5k] [ 1869.162319] [] ? ath5k_tasklet_rx+0xba/0x506 [ath5k] [ 1869.163063] [] ? enable_8259A_irq+0x40/0x43 [ 1869.163594] [] ? __dequeue_entity+0x23/0x27 [ 1869.164793] [] ? __switch_to+0x2b/0x105 [ 1869.165442] [] ? finish_task_switch+0x5b/0x74 [ 1869.166129] [] sys_socketcall+0x14b/0x17b [ 1869.166612] [] syscall_call+0x7/0xb Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/mac80211/mesh_hwmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 003cb470ac8..f49ef288e2e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -637,7 +637,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; - preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_KERNEL); + preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { printk(KERN_DEBUG "Mesh HWMP: could not allocate PREQ node\n"); return; -- cgit v1.2.3-70-g09d2 From 2dce4c2b5f0b43bd25bf9ea6ded06b7f8a54c91f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 2 Jul 2009 15:46:41 +0200 Subject: cfg80211: fix refcount leak The code in cfg80211's cfg80211_bss_update erroneously grabs a reference to the BSS, which means that it will never be freed. Signed-off-by: Johannes Berg Cc: stable@kernel.org [2.6.29, 2.6.30] Signed-off-by: John W. Linville --- net/wireless/scan.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e95b638b919..f8e71b30000 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -366,7 +366,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, res); if (found) { - kref_get(&found->ref); found->pub.beacon_interval = res->pub.beacon_interval; found->pub.tsf = res->pub.tsf; found->pub.signal = res->pub.signal; -- cgit v1.2.3-70-g09d2 From 3938b45c1c75e53d45eb65ac253f12e86239c9ba Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 3 Jul 2009 08:25:08 +0300 Subject: mac80211: minstrel: avoid accessing negative indices in rix_to_ndx() If rix is not found in mi->r[], i will become -1 after the loop. This value is eventually used to access arrays, so we were accessing arrays with a negative index, which is obviously not what we want to do. This patch fixes this potential problem. Signed-off-by: Luciano Coelho Acked-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index b218b98fba7..37771abd8f5 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -66,7 +66,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) for (i = rix; i >= 0; i--) if (mi->r[i].rix == rix) break; - WARN_ON(mi->r[i].rix != rix); + WARN_ON(i < 0); return i; } @@ -181,6 +181,9 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, break; ndx = rix_to_ndx(mi, ar[i].idx); + if (ndx < 0) + continue; + mi->r[ndx].attempts += ar[i].count; if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0)) -- cgit v1.2.3-70-g09d2 From 345aa031207d02d7438c1aa96ed9315911ecd745 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 7 Jul 2009 19:39:16 -0700 Subject: ipv4: Fix fib_trie rebalancing, part 4 (root thresholds) Pawel Staszewski wrote:
Some time ago i report this: http://bugzilla.kernel.org/show_bug.cgi?id=6648 and now with 2.6.29 / 2.6.29.1 / 2.6.29.3 and 2.6.30 it back dmesg output: oprofile: using NMI interrupt. Fix inflate_threshold_root. Now=15 size=11 bits ... Fix inflate_threshold_root. Now=15 size=11 bits cat /proc/net/fib_triestat Basic info: size of leaf: 40 bytes, size of tnode: 56 bytes. Main: Aver depth: 2.28 Max depth: 6 Leaves: 276539 Prefixes: 289922 Internal nodes: 66762 1: 35046 2: 13824 3: 9508 4: 4897 5: 2331 6: 1149 7: 5 9: 1 18: 1 Pointers: 691228 Null ptrs: 347928 Total size: 35709 kB
It seems, the current threshold for root resizing is too aggressive, and it causes misleading warnings during big updates, but it might be also responsible for memory problems, especially with non-preempt configs, when RCU freeing is delayed long after call_rcu. It should be also mentioned that because of non-atomic changes during resizing/rebalancing the current lookup algorithm can miss valid leaves so it's additional argument to shorten these activities even at a cost of a minimally longer searching. This patch restores values before the patch "[IPV4]: fib_trie root node settings", commit: 965ffea43d4ebe8cd7b9fee78d651268dd7d23c5 from v2.6.22. Pawel's report:
I dont see any big change of (cpu load or faster/slower routing/propagating routes from bgpd or something else) - in avg there is from 2% to 3% more of CPU load i dont know why but it is - i change from "preempt" to "no preempt" 3 times and check this my "mpstat -P ALL 1 30" always avg cpu load was from 2 to 3% more compared to "no preempt" [...] cat /proc/net/fib_triestat Basic info: size of leaf: 20 bytes, size of tnode: 36 bytes. Main: Aver depth: 2.44 Max depth: 6 Leaves: 277814 Prefixes: 291306 Internal nodes: 66420 1: 32737 2: 14850 3: 10332 4: 4871 5: 2313 6: 942 7: 371 8: 3 17: 1 Pointers: 599098 Null ptrs: 254865 Total size: 18067 kB
According to this and other similar reports average depth is slightly increased (~0.2), and root nodes are shorter (log 17 vs. 18), but there is no visible performance decrease. So, until memory handling is improved or added parameters for changing this individually, this patch resets to safer defaults. Reported-by: Pawel Staszewski Reported-by: Jorge Boncompte [DTI2] Signed-off-by: Jarek Poplawski Tested-by: Pawel Staszewski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 00a54b246df..63c2fa7b68c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -316,8 +316,8 @@ static inline void check_tnode(const struct tnode *tn) static const int halve_threshold = 25; static const int inflate_threshold = 50; -static const int halve_threshold_root = 8; -static const int inflate_threshold_root = 15; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) -- cgit v1.2.3-70-g09d2 From 1b614fb9a00e97b1eab54d4e442d405229c059dd Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 8 Jul 2009 20:09:44 -0700 Subject: netpoll: Fix carrier detection for drivers that are using phylib Using early netconsole and gianfar driver this error pops up: netconsole: timeout waiting for carrier It appears that net/core/netpoll.c:netpoll_setup() is using cond_resched() in a loop waiting for a carrier. The thing is that cond_resched() is a no-op when system_state != SYSTEM_RUNNING, and so drivers/net/phy/phy.c's state_queue is never scheduled, therefore link detection doesn't work. I belive that the main problem is in cond_resched()[1], but despite how the cond_resched() story ends, it might be a good idea to call msleep(1) instead of cond_resched(), as suggested by Andrew Morton. [1] http://lkml.org/lkml/2009/7/7/463 Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9675f312830..df30feb2fc7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -740,7 +740,7 @@ int netpoll_setup(struct netpoll *np) np->name); break; } - cond_resched(); + msleep(1); } /* If carrier appears to come up instantly, we don't -- cgit v1.2.3-70-g09d2 From a57de0b4336e48db2811a2030bb68dba8dd09d88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Jul 2009 12:09:13 +0000 Subject: net: adding memory barrier to the poll and receive callbacks Adding memory barrier after the poll_wait function, paired with receive callbacks. Adding fuctions sock_poll_wait and sk_has_sleeper to wrap the memory barrier. Without the memory barrier, following race can happen. The race fires, when following code paths meet, and the tp->rcv_nxt and __add_wait_queue updates stay in CPU caches. CPU1 CPU2 sys_select receive packet ... ... __add_wait_queue update tp->rcv_nxt ... ... tp->rcv_nxt check sock_def_readable ... { schedule ... if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep) ... } If there was no cache the code would work ok, since the wait_queue and rcv_nxt are opposit to each other. Meaning that once tp->rcv_nxt is updated by CPU2, the CPU1 either already passed the tp->rcv_nxt check and sleeps, or will get the new value for tp->rcv_nxt and will return with new data mask. In both cases the process (CPU1) is being added to the wait queue, so the waitqueue_active (CPU2) call cannot miss and will wake up CPU1. The bad case is when the __add_wait_queue changes done by CPU1 stay in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 will then endup calling schedule and sleep forever if there are no more data on the socket. Calls to poll_wait in following modules were ommited: net/bluetooth/af_bluetooth.c net/irda/af_irda.c net/irda/irnet/irnet_ppp.c net/mac80211/rc80211_pid_debugfs.c net/phonet/socket.c net/rds/af_rds.c net/rfkill/core.c net/sunrpc/cache.c net/sunrpc/rpc_pipe.c net/tipc/socket.c Signed-off-by: Jiri Olsa Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/atm/common.c | 6 ++--- net/core/datagram.c | 2 +- net/core/sock.c | 8 +++---- net/dccp/output.c | 2 +- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 2 +- net/iucv/af_iucv.c | 4 ++-- net/rxrpc/af_rxrpc.c | 4 ++-- net/unix/af_unix.c | 8 +++---- 10 files changed, 85 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 352f06bbd7a..4eb8409249f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -54,6 +54,7 @@ #include #include +#include #include #include @@ -1241,6 +1242,71 @@ static inline int sk_has_allocations(const struct sock *sk) return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk); } +/** + * sk_has_sleeper - check if there are any waiting processes + * @sk: socket + * + * Returns true if socket has waiting processes + * + * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * barrier call. They were added due to the race found within the tcp code. + * + * Consider following tcp code paths: + * + * CPU1 CPU2 + * + * sys_select receive packet + * ... ... + * __add_wait_queue update tp->rcv_nxt + * ... ... + * tp->rcv_nxt check sock_def_readable + * ... { + * schedule ... + * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + * wake_up_interruptible(sk->sk_sleep) + * ... + * } + * + * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay + * in its cache, and so does the tp->rcv_nxt update on CPU2 side. The CPU1 + * could then endup calling schedule and sleep forever if there are no more + * data on the socket. + */ +static inline int sk_has_sleeper(struct sock *sk) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier is paired in the sock_poll_wait. + */ + smp_mb(); + return sk->sk_sleep && waitqueue_active(sk->sk_sleep); +} + +/** + * sock_poll_wait - place memory barrier behind the poll_wait call. + * @filp: file + * @wait_address: socket wait queue + * @p: poll_table + * + * See the comments in the sk_has_sleeper function. + */ +static inline void sock_poll_wait(struct file *filp, + wait_queue_head_t *wait_address, poll_table *p) +{ + if (p && wait_address) { + poll_wait(filp, wait_address, p); + /* + * We need to be sure we are in sync with the + * socket flags modification. + * + * This memory barrier is paired in the sk_has_sleeper. + */ + smp_mb(); + } +} + /* * Queue a received datagram if it will fit. Stream and sequenced * protocols can't normally use this as they need to fit buffers in diff --git a/net/atm/common.c b/net/atm/common.c index c1c97936192..8c4d843eb17 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -92,7 +92,7 @@ static void vcc_sock_destruct(struct sock *sk) static void vcc_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); } @@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (vcc_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); @@ -594,7 +594,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/core/datagram.c b/net/core/datagram.c index 58abee1f1df..b0fe69211ee 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -712,7 +712,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ diff --git a/net/core/sock.c b/net/core/sock.c index b0ba569bc97..6354863b1c6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1715,7 +1715,7 @@ EXPORT_SYMBOL(sock_no_sendpage); static void sock_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); } @@ -1723,7 +1723,7 @@ static void sock_def_wakeup(struct sock *sk) static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_poll(sk->sk_sleep, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); @@ -1732,7 +1732,7 @@ static void sock_def_error_report(struct sock *sk) static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -1747,7 +1747,7 @@ static void sock_def_write_space(struct sock *sk) * progress. --DaveM */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/dccp/output.c b/net/dccp/output.c index c0e88c16d08..c96119fda68 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -196,7 +196,7 @@ void dccp_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 314a1b5c033..94ca8eaace7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -311,7 +311,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7870a535dac..91145244ea6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -339,7 +339,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6be5f92d109..49c15b48408 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -306,7 +306,7 @@ static inline int iucv_below_msglim(struct sock *sk) static void iucv_sock_wake_msglim(struct sock *sk) { read_lock(&sk->sk_callback_lock); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_all(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); read_unlock(&sk->sk_callback_lock); @@ -1256,7 +1256,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask = 0; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index eac5e7bb736..bfe493ebf27 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -63,7 +63,7 @@ static void rxrpc_write_space(struct sock *sk) _enter("%p", sk); read_lock(&sk->sk_callback_lock); if (rxrpc_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -588,7 +588,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 36d4e44d623..fc3ebb90691 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -315,7 +315,7 @@ static void unix_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + if (sk_has_sleeper(sk)) wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -1985,7 +1985,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ @@ -2022,7 +2022,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk->sk_sleep, wait); mask = 0; /* exceptional events? */ @@ -2053,7 +2053,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer_get(sk); if (other) { if (unix_peer(other) != sk) { - poll_wait(file, &unix_sk(other)->peer_wait, + sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); if (unix_recvq_full(other)) writable = 0; -- cgit v1.2.3-70-g09d2 From e912b1142be8f1e2c71c71001dc992c6e5eb2ec1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jul 2009 19:36:05 +0000 Subject: net: sk_prot_alloc() should not blindly overwrite memory Some sockets use SLAB_DESTROY_BY_RCU, and our RCU code correctness depends on sk->sk_nulls_node.next being always valid. A NULL value is not allowed as it might fault a lockless reader. Current sk_prot_alloc() implementation doesnt respect this hypothesis, calling kmem_cache_alloc() with __GFP_ZERO. Just call memset() around the forbidden field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 6354863b1c6..ba5d2116aea 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -939,8 +939,23 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, struct kmem_cache *slab; slab = prot->slab; - if (slab != NULL) - sk = kmem_cache_alloc(slab, priority); + if (slab != NULL) { + sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); + if (!sk) + return sk; + if (priority & __GFP_ZERO) { + /* + * caches using SLAB_DESTROY_BY_RCU should let + * sk_node.next un-modified. Special care is taken + * when initializing object to zero. + */ + if (offsetof(struct sock, sk_node.next) != 0) + memset(sk, 0, offsetof(struct sock, sk_node.next)); + memset(&sk->sk_node.pprev, 0, + prot->obj_size - offsetof(struct sock, + sk_node.pprev)); + } + } else sk = kmalloc(prot->obj_size, priority); -- cgit v1.2.3-70-g09d2 From e51a67a9c8a2ea5c563f8c2ba6613fe2100ffe67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jul 2009 14:20:42 +0000 Subject: net: ip_push_pending_frames() fix After commit 2b85a34e911bf483c27cfdd124aeb1605145dc80 (net: No more expensive sock_hold()/sock_put() on each tx) we do not take any more references on sk->sk_refcnt on outgoing packets. I forgot to delete two __sock_put() from ip_push_pending_frames() and ip6_push_pending_frames(). Reported-by: Emil S Tantilov Signed-off-by: Eric Dumazet Tested-by: Emil S Tantilov Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24702628266..7d082105472 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1243,7 +1243,6 @@ int ip_push_pending_frames(struct sock *sk) skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; - __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7c76e3d1821..87f8419a68f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1484,7 +1484,6 @@ int ip6_push_pending_frames(struct sock *sk) skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; - __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } -- cgit v1.2.3-70-g09d2 From f2ba025b2036e52a176cddcf91b15ac2b10c644a Mon Sep 17 00:00:00 2001 From: Sascha Hlusiak Date: Sat, 11 Jul 2009 20:30:52 -0700 Subject: sit: fix regression: do not release skb->dst before xmit The sit module makes use of skb->dst in it's xmit function, so since 93f154b594fe47 ("net: release dst entry in dev_hard_start_xmit()") sit tunnels are broken, because the flag IFF_XMIT_DST_RELEASE is not unset. This patch unsets that flag for sit devices to fix this regression. Signed-off-by: Sascha Hlusiak Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 68e52308e55..98b7327d094 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1018,6 +1018,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); dev->flags = IFF_NOARP; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; -- cgit v1.2.3-70-g09d2 From 405f55712dfe464b3240d7816cc4fe4174831be2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 11 Jul 2009 22:08:37 +0400 Subject: headers: smp_lock.h redux * Remove smp_lock.h from files which don't need it (including some headers!) * Add smp_lock.h to files which do need it * Make smp_lock.h include conditional in hardirq.h It's needed only for one kernel_locked() usage which is under CONFIG_PREEMPT This will make hardirq.h inclusion cheaper for every PREEMPT=n config (which includes allmodconfig/allyesconfig, BTW) Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- arch/alpha/kernel/ptrace.c | 1 - arch/blackfin/kernel/ptrace.c | 1 - arch/blackfin/kernel/sys_bfin.c | 1 - arch/cris/kernel/sys_cris.c | 1 - arch/ia64/kernel/ptrace.c | 1 - arch/m32r/kernel/ptrace.c | 1 - arch/microblaze/kernel/ptrace.c | 1 - arch/microblaze/kernel/signal.c | 1 - arch/microblaze/kernel/sys_microblaze.c | 1 - arch/mips/kernel/ptrace32.c | 1 - arch/mips/mm/hugetlbpage.c | 1 - arch/mn10300/kernel/ptrace.c | 1 - arch/mn10300/kernel/signal.c | 1 - arch/mn10300/kernel/sys_mn10300.c | 1 - arch/mn10300/kernel/traps.c | 1 - arch/mn10300/mm/fault.c | 1 - arch/mn10300/mm/misalignment.c | 1 - arch/powerpc/kernel/ptrace32.c | 1 - arch/s390/kernel/dis.c | 1 - arch/s390/kernel/ptrace.c | 1 - arch/s390/mm/fault.c | 1 - arch/sh/mm/tlb-sh3.c | 1 - arch/sparc/kernel/ptrace_32.c | 1 - arch/sparc/kernel/ptrace_64.c | 1 - arch/sparc/kernel/time_64.c | 1 - arch/sparc/kernel/traps_32.c | 1 - drivers/block/DAC960.c | 1 + drivers/block/cciss.c | 1 + drivers/block/loop.c | 1 - drivers/bluetooth/hci_vhci.c | 1 - drivers/char/amiserial.c | 1 + drivers/char/cyclades.c | 1 + drivers/char/epca.c | 1 + drivers/char/isicom.c | 1 + drivers/char/istallion.c | 1 + drivers/char/moxa.c | 1 + drivers/char/mxser.c | 1 + drivers/char/n_hdlc.c | 1 + drivers/char/n_r3964.c | 1 + drivers/char/pty.c | 1 + drivers/char/rio/rio_linux.c | 1 + drivers/char/riscom8.c | 1 + drivers/char/rocket.c | 1 + drivers/char/serial167.c | 1 + drivers/char/specialix.c | 1 + drivers/char/sx.c | 1 + drivers/char/synclink.c | 1 + drivers/char/synclink_gt.c | 1 + drivers/char/synclinkmp.c | 1 + drivers/char/tpm/tpm.c | 1 - drivers/char/tty_ioctl.c | 1 - drivers/char/tty_ldisc.c | 1 - drivers/char/vt.c | 1 + drivers/char/vt_ioctl.c | 1 + drivers/gpio/vr41xx_giu.c | 1 - drivers/hid/usbhid/hid-core.c | 1 - drivers/isdn/hisax/hfc_usb.c | 1 - drivers/isdn/i4l/isdn_tty.c | 1 + drivers/isdn/mISDN/stack.c | 1 + drivers/media/dvb/bt8xx/dst_ca.c | 1 + drivers/media/dvb/dvb-core/dvbdev.h | 1 - drivers/media/dvb/ttpci/av7110.c | 1 - drivers/media/radio/radio-mr800.c | 1 + drivers/media/radio/radio-si470x.c | 1 + drivers/media/video/bt8xx/bttv-driver.c | 1 + drivers/media/video/cx23885/cx23885-417.c | 1 + drivers/media/video/cx23885/cx23885-video.c | 1 + drivers/media/video/cx88/cx88-blackbird.c | 1 + drivers/media/video/cx88/cx88-video.c | 1 + drivers/media/video/dabusb.c | 1 + drivers/media/video/pwc/pwc-if.c | 1 + drivers/media/video/pwc/pwc.h | 1 - drivers/media/video/s2255drv.c | 1 + drivers/media/video/saa5246a.c | 1 - drivers/media/video/saa5249.c | 1 - drivers/media/video/saa7134/saa7134-empress.c | 1 + drivers/media/video/se401.c | 1 + drivers/media/video/stk-webcam.c | 1 + drivers/media/video/stradis.c | 1 + drivers/media/video/stv680.c | 1 + drivers/media/video/usbvideo/vicam.c | 1 + drivers/media/video/usbvision/usbvision-video.c | 1 + drivers/media/video/v4l2-dev.c | 1 - drivers/media/video/zoran/zoran_driver.c | 1 + drivers/misc/sgi-gru/grufile.c | 1 - drivers/misc/sgi-gru/grukservices.c | 1 - drivers/net/irda/irtty-sir.c | 1 - drivers/pci/hotplug/cpci_hotplug_core.c | 1 - drivers/pci/hotplug/cpqphp_ctrl.c | 1 - drivers/pci/hotplug/cpqphp_sysfs.c | 1 + drivers/pci/hotplug/pciehp_ctrl.c | 1 - drivers/pci/syscall.c | 1 - drivers/s390/block/dasd_ioctl.c | 1 + drivers/scsi/qla2xxx/qla_mid.c | 1 - drivers/telephony/ixj.c | 1 + drivers/telephony/phonedev.c | 1 - drivers/usb/class/cdc-wdm.c | 1 - drivers/usb/gadget/amd5536udc.c | 1 - drivers/usb/gadget/langwell_udc.c | 1 - drivers/usb/gadget/s3c2410_udc.c | 1 - drivers/usb/host/r8a66597-hcd.c | 1 - drivers/usb/misc/iowarrior.c | 1 + drivers/usb/misc/rio500.c | 1 + drivers/usb/misc/usblcd.c | 1 + drivers/usb/musb/cppi_dma.h | 1 - drivers/usb/musb/musb_core.h | 1 - drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/mos7840.c | 1 + drivers/usb/serial/usb-serial.c | 1 + drivers/video/fbmem.c | 1 - fs/adfs/super.c | 1 + fs/afs/super.c | 1 + fs/autofs4/dev-ioctl.c | 1 - fs/bfs/dir.c | 1 - fs/bfs/file.c | 1 - fs/btrfs/compression.c | 1 - fs/btrfs/file.c | 1 - fs/btrfs/inode.c | 1 - fs/btrfs/ioctl.c | 1 - fs/btrfs/super.c | 1 - fs/char_dev.c | 1 - fs/compat.c | 1 - fs/compat_ioctl.c | 1 + fs/exofs/super.c | 1 + fs/ext2/ioctl.c | 1 - fs/ext4/ioctl.c | 1 - fs/fat/dir.c | 1 - fs/fat/namei_msdos.c | 1 - fs/fat/namei_vfat.c | 1 - fs/fcntl.c | 1 - fs/freevxfs/vxfs_super.c | 1 + fs/hfs/super.c | 1 + fs/hfsplus/super.c | 1 + fs/hpfs/dir.c | 1 + fs/hpfs/file.c | 1 + fs/hpfs/hpfs_fn.h | 1 - fs/hpfs/inode.c | 1 + fs/hpfs/namei.c | 1 + fs/jffs2/super.c | 1 + fs/lockd/clntproc.c | 1 + fs/lockd/svc4proc.c | 1 + fs/lockd/svcproc.c | 1 + fs/nfs/delegation.c | 1 + fs/nfs/dir.c | 1 - fs/nfs/file.c | 1 - fs/nfs/inode.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/read.c | 1 - fs/nfsd/nfsctl.c | 1 - fs/nfsd/nfssvc.c | 1 - fs/nilfs2/dir.c | 1 - fs/ocfs2/ioctl.c | 1 - fs/reiserfs/xattr.c | 1 - fs/squashfs/super.c | 1 + fs/ubifs/ioctl.c | 1 - fs/xfs/linux-2.6/xfs_file.c | 1 - include/linux/crash_dump.h | 1 - include/linux/hardirq.h | 2 ++ include/linux/quotaops.h | 1 - include/linux/sunrpc/xdr.h | 1 - kernel/power/user.c | 1 - kernel/trace/blktrace.c | 1 + kernel/trace/trace.c | 1 + net/appletalk/ddp.c | 1 + net/ipx/af_ipx.c | 1 + net/irda/af_irda.c | 1 + net/irda/irnet/irnet.h | 1 - net/irda/irnet/irnet_ppp.c | 1 + net/sunrpc/clnt.c | 1 - net/sunrpc/sched.c | 1 - net/sunrpc/svc_xprt.c | 1 + net/wanrouter/wanmain.c | 1 + net/x25/af_x25.c | 1 + 173 files changed, 81 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 1e9ad52c460..e072041d19f 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index d76618db50d..6a387eec6b6 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c index a8f1329c15a..3da60fb13ce 100644 --- a/arch/blackfin/kernel/sys_bfin.c +++ b/arch/blackfin/kernel/sys_bfin.c @@ -29,7 +29,6 @@ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include #include #include #include diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c index a79fbd87021..2ad962c7e88 100644 --- a/arch/cris/kernel/sys_cris.c +++ b/arch/cris/kernel/sys_cris.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 92c9689b7d9..9daa87fdb01 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index bf0abe9e1f7..98b8feb12ed 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index b86aa623e36..53ff39af6a5 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 493819c25fb..1c80e4fc40c 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 8c9ebac5da1..e000bce09b2 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index c4f9ac17474..32644b4a071 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 471c09aa161..8c2834f5919 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index e143339ad28..cf847dabc1b 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 9f7572a0f57..feb2f2e810d 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index bca5a84dc72..29d196b83d2 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index 0dfdc500112..91365adba4f 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index a62e1e138bc..53bb17d0f06 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include /* For unblank_screen() */ diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index 94c4a435806..30016251f65 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 297632cba04..8a6daf4129f 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index d2f270c995d..db943a7ec51 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 490b39934d6..43acd73105b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 74eb26bf197..e5e119fe03b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c index 7fbfd5a11ff..17cb7c3adf2 100644 --- a/arch/sh/mm/tlb-sh3.c +++ b/arch/sh/mm/tlb-sh3.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 8ce6285a06d..7e3dfd9bb97 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index a941c610e7c..4ae91dc2feb 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 5c12e79b4bd..da1218e8ee8 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 358283341b4..c0490c7bbde 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 668dc234b8e..1e6b7c14f69 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 65a0655e7fc..a52cc7fe45e 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 801f4ab8330..5757188cd1f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 1df9dda2e37..d5cde6d86f8 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 72429b6b2fa..6c32fbf0716 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -81,6 +81,7 @@ static char *serial_version = "4.30"; #include #include #include +#include #include #include diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index f3366d3f06c..2dafc2da064 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -633,6 +633,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/epca.c b/drivers/char/epca.c index abef1f7d84f..ff647ca1c48 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 621d1184673..4f1f4cd670d 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 0c999f5bb3d..ab2f3349c5c 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 65b6ff2442c..dd0083bbb64 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 52d953eb30c..dbf8d52f31d 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index 1c43c8cdee2..c68118efad8 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -97,6 +97,7 @@ #include #include #include +#include #include /* used in new tty drivers */ #include /* used in new tty drivers */ #include diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 2e99158ebb8..6934025a1ac 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include /* used in new tty drivers */ diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 9d1b4f548f6..6e6942c45f5 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index ce81da5b2da..d58c2eb07f0 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c index 21766045123..171711acf5c 100644 --- a/drivers/char/riscom8.c +++ b/drivers/char/riscom8.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 63d5b628477..0e29a23ec4c 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c index f1f24f0ee26..51e7a46787b 100644 --- a/drivers/char/serial167.c +++ b/drivers/char/serial167.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index e72be4190a4..bfe4cdb2feb 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -87,6 +87,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/sx.c b/drivers/char/sx.c index 518f2a25d91..a81ec4fcf6f 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -216,6 +216,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index afded3a2379..813552f1488 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index a2e67e6df3a..91f20a92fdd 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 6f727e3c53a..8d4a2a8a0a7 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index ccdd828adce..b0603b2e568 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "tpm.h" diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index b24f6c6a1ea..ad6ba4ed280 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index 913aa8d3f1c..0ef0dc97ba2 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/char/vt.c b/drivers/char/vt.c index d9113b4c76e..7947bd1b4cf 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 7539bed0f7e..95189f288f8 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpio/vr41xx_giu.c b/drivers/gpio/vr41xx_giu.c index b70e06133e7..b16c9a8c03f 100644 --- a/drivers/gpio/vr41xx_giu.c +++ b/drivers/gpio/vr41xx_giu.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 76c4bbe9dcc..3c1fcb7640a 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index 8df889b0c1a..9de54202c90 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include "hisax.h" diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index b4d4522e507..2881a66c1aa 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -13,6 +13,7 @@ #include #include +#include #include "isdn_common.h" #include "isdn_tty.h" #ifdef CONFIG_ISDN_AUDIO diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index e2f45019ebf..3e1532a180f 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -17,6 +17,7 @@ #include #include +#include #include "core.h" static u_int *debug; diff --git a/drivers/media/dvb/bt8xx/dst_ca.c b/drivers/media/dvb/bt8xx/dst_ca.c index 4601b059b2b..0e246eaad05 100644 --- a/drivers/media/dvb/bt8xx/dst_ca.c +++ b/drivers/media/dvb/bt8xx/dst_ca.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "dvbdev.h" diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h index 79927305e84..487919bea7a 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.h +++ b/drivers/media/dvb/dvb-core/dvbdev.h @@ -27,7 +27,6 @@ #include #include #include -#include #define DVB_MAJOR 212 diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c index d1d959ed37b..8d65c652ba5 100644 --- a/drivers/media/dvb/ttpci/av7110.c +++ b/drivers/media/dvb/ttpci/av7110.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c index 837467f9380..575bf9d8941 100644 --- a/drivers/media/radio/radio-mr800.c +++ b/drivers/media/radio/radio-mr800.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c index 46d21632961..e85f318b4d2 100644 --- a/drivers/media/radio/radio-si470x.c +++ b/drivers/media/radio/radio-si470x.c @@ -127,6 +127,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index 5eb1464af67..d147d29bb0d 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include "bttvp.h" diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c index 2943bfd32a9..428f0c45e6b 100644 --- a/drivers/media/video/cx23885/cx23885-417.c +++ b/drivers/media/video/cx23885/cx23885-417.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c index 70836af3ab4..5d609333630 100644 --- a/drivers/media/video/cx23885/cx23885-video.c +++ b/drivers/media/video/cx23885/cx23885-video.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c index 44eacfb0d0d..356d6896da3 100644 --- a/drivers/media/video/cx88/cx88-blackbird.c +++ b/drivers/media/video/cx88/cx88-blackbird.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c index b12770848c0..2bb54c3ef5c 100644 --- a/drivers/media/video/cx88/cx88-video.c +++ b/drivers/media/video/cx88/cx88-video.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/dabusb.c b/drivers/media/video/dabusb.c index ec2f45dde16..0664d111085 100644 --- a/drivers/media/video/dabusb.c +++ b/drivers/media/video/dabusb.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c index db25c3034c1..8d17cf61330 100644 --- a/drivers/media/video/pwc/pwc-if.c +++ b/drivers/media/video/pwc/pwc-if.c @@ -62,6 +62,7 @@ #include #include #include +#include #ifdef CONFIG_USB_PWC_INPUT_EVDEV #include #endif diff --git a/drivers/media/video/pwc/pwc.h b/drivers/media/video/pwc/pwc.h index 0be6f814f53..0b658dee05a 100644 --- a/drivers/media/video/pwc/pwc.h +++ b/drivers/media/video/pwc/pwc.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index 6be845ccc7d..9e3262c0ba3 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c index 155804b061e..b624a4c01fd 100644 --- a/drivers/media/video/saa5246a.c +++ b/drivers/media/video/saa5246a.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c index 271d6e931b7..12835fb82c9 100644 --- a/drivers/media/video/saa5249.c +++ b/drivers/media/video/saa5249.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index add1757f893..296788c3bf0 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "saa7134-reg.h" diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c index c8f05297d0f..85ffc2cba03 100644 --- a/drivers/media/video/se401.c +++ b/drivers/media/video/se401.c @@ -31,6 +31,7 @@ static const char version[] = "0.24"; #include #include #include +#include #include #include #include "se401.h" diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c index 2e593704727..4d6785e6345 100644 --- a/drivers/media/video/stk-webcam.c +++ b/drivers/media/video/stk-webcam.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c index 0eb313082c9..eaada39c76f 100644 --- a/drivers/media/video/stradis.c +++ b/drivers/media/video/stradis.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c index 75f286f7a2e..8b4e7dafce7 100644 --- a/drivers/media/video/stv680.c +++ b/drivers/media/video/stv680.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c index 8d73979596f..45fce39ec9a 100644 --- a/drivers/media/video/usbvideo/vicam.c +++ b/drivers/media/video/usbvideo/vicam.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c index 90b58914f98..90d9b5c0e9a 100644 --- a/drivers/media/video/usbvision/usbvision-video.c +++ b/drivers/media/video/usbvision/usbvision-video.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c index 31eac66411d..a7f1b69a7da 100644 --- a/drivers/media/video/v4l2-dev.c +++ b/drivers/media/video/v4l2-dev.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c index 3d7df32a3d8..bcdefb1bcb3 100644 --- a/drivers/media/video/zoran/zoran_driver.c +++ b/drivers/media/video/zoran/zoran_driver.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index fa2d93a9fb8..aed609832bc 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index eedbf9c3276..79689b10f93 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index d53aa958213..20f9bc62668 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index a5b9f6ae507..d703e73fffa 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index 2fa47af992a..0ff689afa75 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c index 8450f4a6568..e6089bdb6e5 100644 --- a/drivers/pci/hotplug/cpqphp_sysfs.c +++ b/drivers/pci/hotplug/cpqphp_sysfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "cpqphp.h" diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index ff4034502d2..8aab8edf123 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include "../pci.h" diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index ec22284eed3..e1c1ec54089 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include "pci.h" diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 4ce3f72ee1c..df918ef2796 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 650bcef08f2..cd78c501803 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c index a913efc6966..40de151f278 100644 --- a/drivers/telephony/ixj.c +++ b/drivers/telephony/ixj.c @@ -257,6 +257,7 @@ #include /* everything... */ #include /* error codes */ #include +#include #include #include #include diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c index b52cc830c0b..f3873f650bb 100644 --- a/drivers/telephony/phonedev.c +++ b/drivers/telephony/phonedev.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 0fe434505ac..ba589d4ca8b 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index 826f3adde5d..77352ccc245 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c index 6829d596135..a3913519fd5 100644 --- a/drivers/usb/gadget/langwell_udc.c +++ b/drivers/usb/gadget/langwell_udc.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 9a2b8920532..a9b452fe622 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 56976cc0352..e18f74946e6 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 3c5fe5cee05..90e1a8dedfa 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c index deb95bb49fd..d645f3899fe 100644 --- a/drivers/usb/misc/rio500.c +++ b/drivers/usb/misc/rio500.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c index e0ff9ccd866..29092b8e59c 100644 --- a/drivers/usb/misc/usblcd.c +++ b/drivers/usb/misc/usblcd.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/musb/cppi_dma.h b/drivers/usb/musb/cppi_dma.h index 8a39de3e6e4..59bf949e589 100644 --- a/drivers/usb/musb/cppi_dma.h +++ b/drivers/usb/musb/cppi_dma.h @@ -5,7 +5,6 @@ #include #include -#include #include #include diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index f3772ca3b2c..381d648a36b 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 5f08702f672..5a8ae274d25 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index c40f95c1951..c31940a307f 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index a84216464ca..0c39b55aeef 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 53ea05645ff..a85c818be94 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/adfs/super.c b/fs/adfs/super.c index aad92f0a104..6910a98bd73 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "adfs.h" #include "dir_f.h" diff --git a/fs/afs/super.c b/fs/afs/super.c index ad0514d0115..e1ea1c240b6 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index f3da2eb51f5..00bf8fcb245 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 54bd07d44e6..1e41aadb106 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include "bfs.h" diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 6a021265f01..88b9a3ff44e 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -11,7 +11,6 @@ #include #include -#include #include "bfs.h" #undef DEBUG diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index de1e2fd3208..9d8ba4d54a3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7c3cd248d8d..4b833972273 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7ffa3d34ea1..791eab19e33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9f4db848db1..bd88f25889f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9f179d4832d..6d6d06cb6df 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/char_dev.c b/fs/char_dev.c index b7c9d5187a7..a173551e19d 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/fs/compat.c b/fs/compat.c index fbadb947727..94502dab972 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 626c7483b4d..f28f070a60f 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/exofs/super.c b/fs/exofs/super.c index a343b4ea62f..5ab10c3bbeb 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -31,6 +31,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include #include #include diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 7cb4badef92..e7431309bdc 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bb415408fdb..24a6abb2aef 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 38ff75a0fe2..530b4ca0151 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 82f88733b68..bbc94ae4fd7 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 73471b7ecc8..cb6e8355711 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include "fat.h" diff --git a/fs/fcntl.c b/fs/fcntl.c index a040b764f8e..ae413086db9 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index cdbd1654e4c..1e8af939b3e 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 6f833dc8e91..f7fcbe49da7 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "hfs_fs.h" diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9fc3af0c0da..c0759fe0855 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 6916c41d701..8865c94f55f 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -6,6 +6,7 @@ * directory VFS functions */ +#include #include "hpfs_fn.h" static int hpfs_dir_release(struct inode *inode, struct file *filp) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 64ab5225920..3efabff0036 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -6,6 +6,7 @@ * file VFS functions */ +#include #include "hpfs_fn.h" #define BLOCKS(size) (((size) + 511) >> 9) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index c2ea31bae31..701ca54c086 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "hpfs.h" diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 39a1bfbea31..fe703ae46bc 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -6,6 +6,7 @@ * inode VFS functions */ +#include #include "hpfs_fn.h" void hpfs_init_inode(struct inode *i) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index b649232dde9..82b9c4ba9ed 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -6,6 +6,7 @@ * adding & removing files & directories */ #include +#include #include "hpfs_fn.h" static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 07a22caf268..0035c021395 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index f2fdcbce143..4336adba952 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1725037374c..bd173a6ca3b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3688e55901f..e1d28ddd216 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index af05b918cb5..6dd48a4405b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 89f98e9a024..38d42c29fb9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0055b813ec2..05062329b67 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 64f87194d39..bd7938eda6a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92ce4351781..ff0c080db59 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 96c4ebfa46f..73ea5e8d66c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -18,7 +18,6 @@ #include #include #include -#include #include diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1250fb978ac..6d0847562d8 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d4c9884cd54..492c79b7800 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 54100acc110..1a4fa04cf07 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -43,7 +43,6 @@ */ #include -#include #include "nilfs.h" #include "page.h" diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 9fcd36dcc9a..467b413bec2 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -7,7 +7,6 @@ #include #include -#include #define MLOG_MASK_PREFIX ML_INODE #include diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f3d47d85684..6925b835a43 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 3b52770f46f..cb5fc57e370 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 6db7a6be6c9..8aacd64957a 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -25,7 +25,6 @@ /* This file implements EXT2-compatible extended attribute ioctl() calls */ #include -#include #include #include "ubifs.h" diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f4e25544157..0542fd50764 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -41,7 +41,6 @@ #include "xfs_ioctl.h" #include -#include static struct vm_operations_struct xfs_file_vm_ops; diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 2dac064d835..0026f267da2 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -3,7 +3,6 @@ #ifdef CONFIG_CRASH_DUMP #include -#include #include #include diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 45257475623..8246c697863 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -2,7 +2,9 @@ #define LINUX_HARDIRQ_H #include +#ifdef CONFIG_PREEMPT #include +#endif #include #include #include diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 7bc45759368..26361c4c037 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -7,7 +7,6 @@ #ifndef _LINUX_QUOTAOPS_ #define _LINUX_QUOTAOPS_ -#include #include static inline struct quota_info *sb_dqopt(struct super_block *sb) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index d8910b68e1b..b99c625fddf 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -12,7 +12,6 @@ #include #include #include -#include /* * Buffer adjustment diff --git a/kernel/power/user.c b/kernel/power/user.c index ed97375daae..bf0014d6a5f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 39af8af6fc3..1090b0aed9b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3aa0a0dfdfa..8bc8d8afea6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 590b8396362..bfbe13786bb 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -54,6 +54,7 @@ #include #include #include +#include #include /* For TIOCOUTQ/INQ */ #include #include diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 417b0e30949..f1118d92a19 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index cb762c8723e..80cf29aae09 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index bccf4d0059f..b001c361ad3 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -241,7 +241,6 @@ #include #include -#include #include #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 6d8ae03c14f..68cbcb19cbd 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,6 +13,7 @@ * 2) as a control channel (write commands, read events) */ +#include #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5bc2f45bddf..ebfcf9b8990 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1102ce1251f..8f459abe97c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 6f33d33cc06..27d44332f01 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 466e2d22d25..258daa80ad9 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -48,6 +48,7 @@ #include #include /* support for loadable modules */ #include /* kmalloc(), kfree() */ +#include #include #include /* inline mem*, str* functions */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 21cdc872004..5e6c072c64d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From ee686ca919193d7c1f87f907e732df5e2f942523 Mon Sep 17 00:00:00 2001 From: Andreas Jaggi Date: Tue, 14 Jul 2009 09:35:59 -0700 Subject: gre: fix ToS/DiffServ inherit bug Fixes two bugs: - ToS/DiffServ inheritance was unintentionally activated when using impair fixed ToS values - ECN bit was lost during ToS/DiffServ inheritance Signed-off-by: Andreas Jaggi Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 44e2a3d2359..cb4a0f4bd5e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,10 +735,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } tos = tiph->tos; - if (tos&1) { + if (tos == 1) { + tos = 0; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; - tos &= ~1; } { -- cgit v1.2.3-70-g09d2 From a17d1720aa35623a9bef3707b36242706714bca5 Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Tue, 14 Jul 2009 13:24:10 -0500 Subject: 9p: default 9p transport module fix The default 9p transport module is not chosen unless an option parameter (any) is passed to mount, which thus returns a ENOPROTOSUPPORT. This fix moves the check out of parse_opts into p9_client_create. Signed-off-by: Abhishek Kulkarni Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index dd43a8289b0..783a4107740 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -117,9 +117,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) } } - if (!clnt->trans_mod) - clnt->trans_mod = v9fs_get_default_trans(); - kfree(options); return ret; } @@ -689,6 +686,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err < 0) goto error; + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, -- cgit v1.2.3-70-g09d2 From eedfe1c4289216af5a0a7f38e6b2c4d3f07c087f Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Tue, 14 Jul 2009 13:25:41 -0500 Subject: 9p: Possible regression in p9_client_stat Fix a possible regression with p9_client_stat where it can try to kfree an ERR_PTR after an erroneous p9pdu_readf. Also remove an unnecessary data buffer increment in p9_client_read. Signed-off-by: Abhishek Kulkarni Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 783a4107740..787ccddb85e 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1098,7 +1098,6 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (data) { memmove(data, dataptr, count); - data += count; } if (udata) { @@ -1192,9 +1191,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); if (err) { - ret = ERR_PTR(err); p9pdu_dump(1, req->rc); - goto free_and_error; + p9_free_req(clnt, req); + goto error; } P9_DPRINTK(P9_DEBUG_9P, @@ -1211,8 +1210,6 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) p9_free_req(clnt, req); return ret; -free_and_error: - p9_free_req(clnt, req); error: kfree(ret); return ERR_PTR(err); -- cgit v1.2.3-70-g09d2 From f7e5cc0c40dff92bad2894153f675c6c542ba2f0 Mon Sep 17 00:00:00 2001 From: Lothar Waßmann Date: Tue, 14 Jul 2009 23:10:21 +0000 Subject: net/can bugfix: use after free bug in can protocol drivers Fix a use after free bug in can protocol drivers The release functions of the can protocol drivers lack a call to sock_orphan() which leads to referencing freed memory under certain circumstances. This patch fixes a bug reported here: https://lists.berlios.de/pipermail/socketcan-users/2009-July/000985.html Signed-off-by: Lothar Wassmann Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 3 +++ net/can/raw.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d7f32643a..1d17e41b892 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1469,6 +1469,9 @@ static int bcm_release(struct socket *sock) bo->ifindex = 0; } + sock_orphan(sk); + sock->sk = NULL; + release_sock(sk); sock_put(sk); diff --git a/net/can/raw.c b/net/can/raw.c index 6aa154e806a..3482546e888 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -306,6 +306,9 @@ static int raw_release(struct socket *sock) ro->bound = 0; ro->count = 0; + sock_orphan(sk); + sock->sk = NULL; + release_sock(sk); sock_put(sk); -- cgit v1.2.3-70-g09d2 From b13bb2e9933b9dfa25c81d959d847c843481111e Mon Sep 17 00:00:00 2001 From: Lothar Waßmann Date: Tue, 14 Jul 2009 23:12:25 +0000 Subject: net/can: add module alias to can protocol drivers Add appropriate MODULE_ALIAS() to facilitate autoloading of can protocol drivers Signed-off-by: Lothar Wassmann Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 1 + net/can/raw.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 1d17e41b892..72720c71035 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -75,6 +75,7 @@ static __initdata const char banner[] = KERN_INFO MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp "); +MODULE_ALIAS("can-proto-2"); /* easy access to can_frame payload */ static inline u64 GET_U64(const struct can_frame *cp) diff --git a/net/can/raw.c b/net/can/raw.c index 3482546e888..f4cc44548bd 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -62,6 +62,7 @@ static __initdata const char banner[] = MODULE_DESCRIPTION("PF_CAN raw protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Urs Thuermann "); +MODULE_ALIAS("can-proto-1"); #define MASK_ALL 0 -- cgit v1.2.3-70-g09d2 From aa6a03eb0ae859c1371555ef381de4c96ca1e4e6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jul 2009 14:01:54 +0200 Subject: netfilter: xt_osf: fix nf_log_packet() arguments The first argument is the address family, the second one the hook number. Signed-off-by: Patrick McHardy --- net/netfilter/xt_osf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 863e40977a4..0f482e2440b 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -330,7 +330,8 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + nf_log_packet(p->family, p->hooknum, skb, + p->in, p->out, NULL, "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), @@ -345,7 +346,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, "Remote OS is not known: %pi4:%u -> %pi4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); -- cgit v1.2.3-70-g09d2 From 941297f443f871b8c3372feccf27a8733f6ce9e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Jul 2009 14:03:40 +0200 Subject: netfilter: nf_conntrack: nf_conntrack_alloc() fixes When a slab cache uses SLAB_DESTROY_BY_RCU, we must be careful when allocating objects, since slab allocator could give a freed object still used by lockless readers. In particular, nf_conntrack RCU lookups rely on ct->tuplehash[xxx].hnnode.next being always valid (ie containing a valid 'nulls' value, or a valid pointer to next object in hash chain.) kmem_cache_zalloc() setups object with NULL values, but a NULL value is not valid for ct->tuplehash[xxx].hnnode.next. Fix is to call kmem_cache_alloc() and do the zeroing ourself. As spotted by Patrick, we also need to make sure lookup keys are committed to memory before setting refcount to 1, or a lockless reader could get a reference on the old version of the object. Its key re-check could then pass the barrier. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- Documentation/RCU/rculist_nulls.txt | 7 ++++++- net/netfilter/nf_conntrack_core.c | 21 ++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt index 93cb28d05dc..18f9651ff23 100644 --- a/Documentation/RCU/rculist_nulls.txt +++ b/Documentation/RCU/rculist_nulls.txt @@ -83,11 +83,12 @@ not detect it missed following items in original chain. obj = kmem_cache_alloc(...); lock_chain(); // typically a spin_lock() obj->key = key; -atomic_inc(&obj->refcnt); /* * we need to make sure obj->key is updated before obj->next + * or obj->refcnt */ smp_wmb(); +atomic_set(&obj->refcnt, 1); hlist_add_head_rcu(&obj->obj_node, list); unlock_chain(); // typically a spin_unlock() @@ -159,6 +160,10 @@ out: obj = kmem_cache_alloc(cachep); lock_chain(); // typically a spin_lock() obj->key = key; +/* + * changes to obj->key must be visible before refcnt one + */ +smp_wmb(); atomic_set(&obj->refcnt, 1); /* * insert obj in RCU way (readers might be traversing chain) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7508f11c5b3..b5869b9574b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -561,23 +561,38 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, } } - ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); + /* + * Do not use kmem_cache_zalloc(), as this cache uses + * SLAB_DESTROY_BY_RCU. + */ + ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } - + /* + * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next + * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. + */ + memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, + sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); spin_lock_init(&ct->lock); - atomic_set(&ct->ct_general.use, 1); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; + ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); #ifdef CONFIG_NET_NS ct->ct_net = net; #endif + /* + * changes to lookup keys must be done before setting refcnt to 1 + */ + smp_wmb(); + atomic_set(&ct->ct_general.use, 1); return ct; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); -- cgit v1.2.3-70-g09d2 From 4dc6dc7162c08b9965163c9ab3f9375d4adff2c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jul 2009 23:13:10 +0000 Subject: net: sock_copy() fixes Commit e912b1142be8f1e2c71c71001dc992c6e5eb2ec1 (net: sk_prot_alloc() should not blindly overwrite memory) took care of not zeroing whole new socket at allocation time. sock_copy() is another spot where we should be very careful. We should not set refcnt to a non null value, until we are sure other fields are correctly setup, or a lockless reader could catch this socket by mistake, while not fully (re)initialized. This patch puts sk_node & sk_refcnt to the very beginning of struct sock to ease sock_copy() & sk_prot_alloc() job. We add appropriate smp_wmb() before sk_refcnt initializations to match our RCU requirements (changes to sock keys should be committed to memory before sk_refcnt setting) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 32 +++++++++++++++++++------------- net/core/sock.c | 20 ++++++++++++++++++-- 2 files changed, 37 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 2c0da9239b9..950409dcec3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -104,15 +104,15 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets + * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol + * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 - * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables - * @skc_refcnt: reference count - * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @@ -120,17 +120,21 @@ struct net; * for struct sock and struct inet_timewait_sock. */ struct sock_common { - unsigned short skc_family; - volatile unsigned char skc_state; - unsigned char skc_reuse; - int skc_bound_dev_if; + /* + * first fields are not copied in sock_copy() + */ union { struct hlist_node skc_node; struct hlist_nulls_node skc_nulls_node; }; - struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; + unsigned short skc_family; + volatile unsigned char skc_state; + unsigned char skc_reuse; + int skc_bound_dev_if; + struct hlist_node skc_bind_node; struct proto *skc_prot; #ifdef CONFIG_NET_NS struct net *skc_net; @@ -208,15 +212,17 @@ struct sock { * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; +#define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node +#define sk_refcnt __sk_common.skc_refcnt + +#define sk_copy_start __sk_common.skc_hash +#define sk_hash __sk_common.skc_hash #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if -#define sk_node __sk_common.skc_node -#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node -#define sk_refcnt __sk_common.skc_refcnt -#define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net kmemcheck_bitfield_begin(flags); diff --git a/net/core/sock.c b/net/core/sock.c index ba5d2116aea..d9eec153d53 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -919,13 +919,19 @@ static inline void sock_lock_init(struct sock *sk) af_family_keys + sk->sk_family); } +/* + * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, + * even temporarly, because of RCU lookups. sk_node should also be left as is. + */ static void sock_copy(struct sock *nsk, const struct sock *osk) { #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif - - memcpy(nsk, osk, osk->sk_prot->obj_size); + BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != + sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); + memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, + osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); @@ -1140,6 +1146,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) + */ + smp_wmb(); atomic_set(&newsk->sk_refcnt, 2); /* @@ -1855,6 +1866,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) + */ + smp_wmb(); atomic_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); -- cgit v1.2.3-70-g09d2 From e3afe7b75ed8f809c1473ea9b39267487c187ccb Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Thu, 16 Jul 2009 05:04:51 +0000 Subject: tcp: Fix MD5 signature checking on IPv4 mapped sockets Fix MD5 signature checking so that an IPv4 active open to an IPv6 socket can succeed. In particular, use the correct address family's signature generation function for the SYN/ACK. Reported-by: Stephen Hemminger Signed-off-by: John Dykstra Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 19f4150f4d4..88af8430647 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1425,6 +1425,11 @@ struct tcp_request_sock_ops { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); + int (*calc_md5_hash) (char *location, + struct tcp_md5sig_key *md5, + struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); #endif }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a1ca2698c8..7c107eb876c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1160,6 +1160,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .md5_lookup = tcp_v4_reqsk_md5_lookup, + .calc_md5_hash = tcp_v4_md5_hash_skb, }; #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5bdf08d312d..bd62712848f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2261,7 +2261,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ if (md5) { - tp->af_specific->calc_md5_hash(md5_hash_location, + tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location, md5, NULL, req, skb); } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 58810c65b63..ae3d6575356 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -896,6 +896,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { #ifdef CONFIG_TCP_MD5SIG static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .md5_lookup = tcp_v6_reqsk_md5_lookup, + .calc_md5_hash = tcp_v6_md5_hash_skb, }; #endif -- cgit v1.2.3-70-g09d2 From e547bc1eccf539b7403138d8ded913ffd2b7fd0d Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Fri, 17 Jul 2009 09:23:22 +0000 Subject: tcp: Use correct peer adr when copying MD5 keys When the TCP connection handshake completes on the passive side, a variety of state must be set up in the "child" sock, including the key if MD5 authentication is being used. Fix TCP for both address families to label the key with the peer's destination address, rather than the address from the listening sock, which is usually the wildcard. Reported-by: Stephen Hemminger Signed-off-by: John Dykstra Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7c107eb876c..6d88219c5e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1374,7 +1374,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); if (newkey != NULL) - tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, + tcp_v4_md5_do_add(newsk, newinet->daddr, newkey, key->keylen); newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ae3d6575356..d849dd53b78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1442,7 +1442,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); if (newkey != NULL) - tcp_v6_md5_do_add(newsk, &inet6_sk(sk)->daddr, + tcp_v6_md5_do_add(newsk, &newnp->daddr, newkey, key->keylen); } #endif -- cgit v1.2.3-70-g09d2 From f249fb783092471a4808e5fc5bda071d2724810d Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Mon, 20 Jul 2009 00:47:04 +0000 Subject: Fix error return for setsockopt(SO_TIMESTAMPING) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I guess it should be -EINVAL rather than EINVAL. I have not checked when the bug came in. Perhaps a candidate for -stable? Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index d9eec153d53..bbb25be7ddf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -631,7 +631,7 @@ set_rcvbuf: case SO_TIMESTAMPING: if (val & ~SOF_TIMESTAMPING_MASK) { - ret = EINVAL; + ret = -EINVAL; break; } sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, -- cgit v1.2.3-70-g09d2 From e2e414d92397c366396d13f627a98a20be92e509 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 Jul 2009 11:38:14 +0200 Subject: mac80211: disable mesh My kvm instance was complaining a lot about sleeping in atomic contexts in the mesh code, and it turns out that both mesh_path_add() and mpp_path_add() need to be able to sleep (they even use synchronize_rcu()!). I put in a might_sleep() to annotate that, but I see no way, at least right now, of actually making sure those functions are only called from process context since they are both called during TX and RX and the mesh code itself even calls them with rcu_read_lock() "held". Therefore, let's disable it completely for now. It's possible that I'm only seeing this because the hwsim's beaconing is broken and thus the peers aren't discovered right away, but it is possible that this happens even if beaconing is working, for a peer that doesn't exist or so. It should be possible to solve this by deferring the freeing of the tables to call_rcu() instead of using synchronize_rcu(), and also using atomic allocations, but maybe it makes more sense to rework the code to not call these from atomic contexts and defer more of the work to the workqueue. Right now, I can't work on either of those solutions though. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 1 + net/mac80211/mesh_pathtbl.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index ba2643a43c7..7836ee92898 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -83,6 +83,7 @@ endmenu config MAC80211_MESH bool "Enable mac80211 mesh networking (pre-802.11s) support" depends on MAC80211 && EXPERIMENTAL + depends on BROKEN ---help--- This options enables support of Draft 802.11s mesh networking. The implementation is based on Draft 1.08 of the Mesh Networking diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 3c72557df45..02f8709a181 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -175,6 +175,8 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) int err = 0; u32 hash_idx; + might_sleep(); + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) /* never add ourselves as neighbours */ return -ENOTSUPP; @@ -265,6 +267,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) int err = 0; u32 hash_idx; + might_sleep(); if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) /* never add ourselves as neighbours */ -- cgit v1.2.3-70-g09d2 From f54c142725ad2ba33c3ee627873cb6966bf05447 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 Jul 2009 21:41:39 +0200 Subject: rfkill: allow toggling soft state in sysfs again Apparently there actually _are_ tools that try to set this in sysfs even though it wasn't supposed to be used this way without claiming first. Guess what: now that I've cleaned it all up it doesn't matter and we can simply allow setting the soft-block state in sysfs. Signed-off-by: Johannes Berg Tested-By: Darren Salt Signed-off-by: John W. Linville --- net/rfkill/core.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 79693fe2001..6896c0b45b4 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -648,15 +648,26 @@ static ssize_t rfkill_state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - /* - * The intention was that userspace can only take control over - * a given device when/if rfkill-input doesn't control it due - * to user_claim. Since user_claim is currently unsupported, - * we never support changing the state from userspace -- this - * can be implemented again later. - */ + struct rfkill *rfkill = to_rfkill(dev); + unsigned long state; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = strict_strtoul(buf, 0, &state); + if (err) + return err; + + if (state != RFKILL_USER_STATE_SOFT_BLOCKED && + state != RFKILL_USER_STATE_UNBLOCKED) + return -EINVAL; + + mutex_lock(&rfkill_global_mutex); + rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED); + mutex_unlock(&rfkill_global_mutex); - return -EPERM; + return err ?: count; } static ssize_t rfkill_claim_show(struct device *dev, -- cgit v1.2.3-70-g09d2 From 8ef86c7bfac5b44529b73b84bc50d3cf574bfb4b Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Fri, 10 Jul 2009 16:42:29 -0400 Subject: mac80211: fix injection in monitor mode The location of the 802.11 header is calculated incorrectly due to a wrong placement of parentheses. Found by kmemcheck. Signed-off-by: Pavel Roskin Signed-off-by: John W. Linville --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d238a8939a0..3a8922cd103 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1455,7 +1455,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) monitor_iface = UNKNOWN_ADDRESS; len_rthdr = ieee80211_get_radiotap_len(skb->data); - hdr = (struct ieee80211_hdr *)skb->data + len_rthdr; + hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); hdrlen = ieee80211_hdrlen(hdr->frame_control); /* check the header is complete in the frame */ -- cgit v1.2.3-70-g09d2 From 48ab3578a65c5168ecaaa3b21292b643b7bcc2d5 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Sun, 12 Jul 2009 17:03:13 +0100 Subject: rfkill: fix rfkill_set_states() to set the hw state The point of this function is to set the software and hardware state at the same time. When I tried to use it, I found it was only setting the software state. Signed-off-by: Alan Jenkins Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/rfkill/core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 6896c0b45b4..2fc4a1724eb 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -549,6 +549,10 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) swprev = !!(rfkill->state & RFKILL_BLOCK_SW); hwprev = !!(rfkill->state & RFKILL_BLOCK_HW); __rfkill_set_sw_state(rfkill, sw); + if (hw) + rfkill->state |= RFKILL_BLOCK_HW; + else + rfkill->state &= ~RFKILL_BLOCK_HW; spin_unlock_irqrestore(&rfkill->lock, flags); -- cgit v1.2.3-70-g09d2 From 35946a571099a50d2595c8866f07617d29558f53 Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Mon, 13 Jul 2009 17:00:10 -0700 Subject: mac80211: use correct address for mesh Path Error For forwarded frames, we save the precursor address in addr1 in case it needs to be used to send a Path Error. mesh_path_discard_frame, however, was using addr2 instead of addr1 to send Path Error frames, so correct that and also make the comment regarding this more clear. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/mac80211/mesh_pathtbl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 02f8709a181..479597e8858 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -494,8 +494,10 @@ void mesh_path_tx_pending(struct mesh_path *mpath) * @skb: frame to discard * @sdata: network subif the frame was to be sent through * - * If the frame was beign forwarded from another MP, a PERR frame will be sent - * to the precursor. + * If the frame was being forwarded from another MP, a PERR frame will be sent + * to the precursor. The precursor's address (i.e. the previous hop) was saved + * in addr1 of the frame-to-be-forwarded, and would only be overwritten once + * the destination is successfully resolved. * * Locking: the function must me called within a rcu_read_lock region */ @@ -510,7 +512,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, u8 *ra, *da; da = hdr->addr3; - ra = hdr->addr2; + ra = hdr->addr1; mpath = mesh_path_lookup(da, sdata); if (mpath) dsn = ++mpath->dsn; -- cgit v1.2.3-70-g09d2 From 6c95e2a2f0f0bf4c8880d5b74b2f7f359d352d03 Mon Sep 17 00:00:00 2001 From: Niko Jokinen Date: Wed, 15 Jul 2009 11:00:53 +0300 Subject: nl80211: Memory leak fixed Potential memory leak via msg pointer in nl80211_get_key() function. Signed-off-by: Niko Jokinen Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 43bdb1372ca..634496b3ed7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -997,7 +997,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(hdr)) { err = PTR_ERR(hdr); - goto out; + goto free_msg; } cookie.msg = msg; @@ -1011,7 +1011,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) &cookie, get_key_callback); if (err) - goto out; + goto free_msg; if (cookie.error) goto nla_put_failure; @@ -1022,6 +1022,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) nla_put_failure: err = -ENOBUFS; + free_msg: nlmsg_free(msg); out: cfg80211_put_dev(drv); -- cgit v1.2.3-70-g09d2 From 9e81eccf199d910e5ea8db377a43478e4eccd033 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 19 Jul 2009 05:05:37 +0200 Subject: cfg80211: double free in __cfg80211_scan_done This patch fixes a double free corruption in __cfg80211_scan_done: ================================================ BUG kmalloc-512: Object already free ------------------------------------------------ INFO: Allocated in load_elf_binary+0x18b/0x19af age=6 INFO: Freed in load_elf_binary+0x104e/0x19af age=5 INFO: Slab 0xffffea0001bae4c0 objects=14 used=7 INFO: Object 0xffff88007e8a9918 @offset=6424 fp=0xffff88007e8a9488 Bytes b4 0xffff88007e8a9908: 00 00 00 00 00 00 00 00 5a 5a [...] Pid: 28705, comm: rmmod Tainted: P C 2.6.31-rc2-wl #1 Call Trace: [] print_trailer+0x14e/0x16e [] object_err+0x42/0x61 [] __slab_free+0x2af/0x396 [] ? wiphy_unregister+0x92/0x142 [cfg80211] [] kfree+0x13c/0x17a [] ? wiphy_unregister+0x92/0x142 [cfg80211] [] wiphy_unregister+0x92/0x142 [cfg80211] [] ieee80211_unregister_hw+0xc8/0xff [mac80211] [] p54_unregister_common+0x31/0x66 [p54common] [...] FIX kmalloc-512: Object at 0xffff88007e8a9918 not freed The code path which leads to the *funny* double free: request = rdev->scan_req; dev = dev_get_by_index(&init_net, request->ifidx); /* * the driver was unloaded recently and * therefore dev_get_by_index will return NULL! */ if (!dev) goto out; [...] rdev->scan_req = NULL; /* not executed... */ [...] out: kfree(request); Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- net/wireless/scan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f8e71b30000..9271118e1fc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -35,8 +35,6 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) else nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev); - wiphy_to_dev(request->wiphy)->scan_req = NULL; - #ifdef CONFIG_WIRELESS_EXT if (!aborted) { memset(&wrqu, 0, sizeof(wrqu)); @@ -48,6 +46,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) dev_put(dev); out: + wiphy_to_dev(request->wiphy)->scan_req = NULL; kfree(request); } EXPORT_SYMBOL(cfg80211_scan_done); -- cgit v1.2.3-70-g09d2 From dcf777f6ed9799c5ac90ac17a5c369e6b73ca92e Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 26 Jul 2009 19:11:14 -0700 Subject: NET: ROSE: Don't use static buffer. The use of a static buffer in rose2asc() to return its result is not threadproof and can result in corruption if multiple threads are trying to use one of the procfs files based on rose2asc(). Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/rose.h | 2 +- net/rose/af_rose.c | 18 ++++++++---------- net/rose/rose_route.c | 23 ++++++++++++----------- 3 files changed, 21 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/rose.h b/include/net/rose.h index cbd5364b2c8..5ba9f02731e 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -156,7 +156,7 @@ extern int sysctl_rose_maximum_vcs; extern int sysctl_rose_window_size; extern int rosecmp(rose_address *, rose_address *); extern int rosecmpm(rose_address *, rose_address *, unsigned short); -extern const char *rose2asc(const rose_address *); +extern char *rose2asc(char *buf, const rose_address *); extern struct sock *rose_find_socket(unsigned int, struct rose_neigh *); extern void rose_kill_by_neigh(struct rose_neigh *); extern unsigned int rose_new_lci(struct rose_neigh *); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 6bd8e93869e..f0a76f6bca7 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -92,23 +92,21 @@ static void rose_set_lockdep_key(struct net_device *dev) /* * Convert a ROSE address into text. */ -const char *rose2asc(const rose_address *addr) +char *rose2asc(char *buf, const rose_address *addr) { - static char buffer[11]; - if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 && addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 && addr->rose_addr[4] == 0x00) { - strcpy(buffer, "*"); + strcpy(buf, "*"); } else { - sprintf(buffer, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF, + sprintf(buf, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF, addr->rose_addr[1] & 0xFF, addr->rose_addr[2] & 0xFF, addr->rose_addr[3] & 0xFF, addr->rose_addr[4] & 0xFF); } - return buffer; + return buf; } /* @@ -1437,7 +1435,7 @@ static void rose_info_stop(struct seq_file *seq, void *v) static int rose_info_show(struct seq_file *seq, void *v) { - char buf[11]; + char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1455,8 +1453,8 @@ static int rose_info_show(struct seq_file *seq, void *v) devname = dev->name; seq_printf(seq, "%-10s %-9s ", - rose2asc(&rose->dest_addr), - ax2asc(buf, &rose->dest_call)); + rose2asc(rsbuf, &rose->dest_addr), + ax2asc(buf, &rose->dest_call)); if (ax25cmp(&rose->source_call, &null_ax25_address) == 0) callsign = "??????-?"; @@ -1465,7 +1463,7 @@ static int rose_info_show(struct seq_file *seq, void *v) seq_printf(seq, "%-10s %-9s %-5s %3.3X %05d %d %d %d %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n", - rose2asc(&rose->source_addr), + rose2asc(rsbuf, &rose->source_addr), callsign, devname, rose->lci & 0x0FFF, diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index a81066a1010..9478d9b3d97 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1104,6 +1104,7 @@ static void rose_node_stop(struct seq_file *seq, void *v) static int rose_node_show(struct seq_file *seq, void *v) { + char rsbuf[11]; int i; if (v == SEQ_START_TOKEN) @@ -1112,13 +1113,13 @@ static int rose_node_show(struct seq_file *seq, void *v) const struct rose_node *rose_node = v; /* if (rose_node->loopback) { seq_printf(seq, "%-10s %04d 1 loopback\n", - rose2asc(&rose_node->address), - rose_node->mask); + rose2asc(rsbuf, &rose_node->address), + rose_node->mask); } else { */ seq_printf(seq, "%-10s %04d %d", - rose2asc(&rose_node->address), - rose_node->mask, - rose_node->count); + rose2asc(rsbuf, &rose_node->address), + rose_node->mask, + rose_node->count); for (i = 0; i < rose_node->count; i++) seq_printf(seq, " %05d", @@ -1267,7 +1268,7 @@ static void rose_route_stop(struct seq_file *seq, void *v) static int rose_route_show(struct seq_file *seq, void *v) { - char buf[11]; + char buf[11], rsbuf[11]; if (v == SEQ_START_TOKEN) seq_puts(seq, @@ -1279,7 +1280,7 @@ static int rose_route_show(struct seq_file *seq, void *v) seq_printf(seq, "%3.3X %-10s %-9s %05d ", rose_route->lci1, - rose2asc(&rose_route->src_addr), + rose2asc(rsbuf, &rose_route->src_addr), ax2asc(buf, &rose_route->src_call), rose_route->neigh1->number); else @@ -1289,10 +1290,10 @@ static int rose_route_show(struct seq_file *seq, void *v) if (rose_route->neigh2) seq_printf(seq, "%3.3X %-10s %-9s %05d\n", - rose_route->lci2, - rose2asc(&rose_route->dest_addr), - ax2asc(buf, &rose_route->dest_call), - rose_route->neigh2->number); + rose_route->lci2, + rose2asc(rsbuf, &rose_route->dest_addr), + ax2asc(buf, &rose_route->dest_call), + rose_route->neigh2->number); else seq_puts(seq, "000 * * 00000\n"); -- cgit v1.2.3-70-g09d2 From c587aea951a56d29741a3cef4ea3e142c93b3207 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 23 Jul 2009 23:06:32 +0000 Subject: net/bridge: use kobject_put to release kobject in br_add_if error path kobject_init_and_add will alloc memory for kobj->name, so in br_add_if error path, simply use kobject_del will not free memory for kobj->name. Fix by using kobject_put instead, kobject_put will internally calls kobject_del and frees memory for kobj->name. Signed-off-by: Xiaotian Feng Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8a96672e2c5..eb404dc3ed6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -424,7 +424,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err2: br_fdb_delete_by_port(br, p, 1); err1: - kobject_del(&p->kobj); + kobject_put(&p->kobj); err0: dev_set_promiscuity(dev, -1); put_back: -- cgit v1.2.3-70-g09d2 From 0cbb0a781a42f131e9c6836554f402cb85f8f38b Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Mon, 27 Jul 2009 10:49:44 -0700 Subject: net: irda: init spinlock after memcpy irttp_dup() copies a tsap_cb struct, but does not initialize the spinlock in the new structure, which confuses lockdep. Signed-off-by: Deepak Saxena Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: David S. Miller --- net/irda/irttp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/irda/irttp.c b/net/irda/irttp.c index ecf4eb2717c..9cb79f95bf6 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1453,6 +1453,7 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance) } /* Dup */ memcpy(new, orig, sizeof(struct tsap_cb)); + spin_lock_init(&new->lock); /* We don't need the old instance any more */ spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); -- cgit v1.2.3-70-g09d2 From 78f1a8b758d57c2d2c9f3db7199cd30803854c82 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 27 Jul 2009 08:38:25 -0700 Subject: mac80211: do not queue work after suspend in the dynamic ps timer Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aca22b00b6a..07e7e41816b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -721,7 +721,7 @@ void ieee80211_dynamic_ps_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; - if (local->quiescing) + if (local->quiescing || local->suspended) return; queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); -- cgit v1.2.3-70-g09d2 From 89c3a8aca28e6d57f2ae945d97858a372d624b81 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 Jul 2009 18:10:17 +0200 Subject: mac80211: fix suspend Jan reported that his b43-based laptop hangs during suspend. The problem turned out to be mac80211 asking the driver to stop the hardware before removing interfaces, and interface removal caused b43 to touch the hardware (while down, which causes the hang). This patch fixes mac80211 to do reorder these operations to have them in the correct order -- first remove interfaces and then stop the hardware. Some more code is necessary to be able to do so in a race-free manner, in particular it is necessary to not process frames received during quiescing. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=13337. Reported-by: Jan Scholz Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/pm.c | 24 +++++++++++++++--------- net/mac80211/rx.c | 12 ++++++++++++ 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7a549f9deb9..5e3d476972f 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -55,15 +55,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) rcu_read_unlock(); - /* flush again, in case driver queued work */ - flush_workqueue(local->hw.workqueue); - - /* stop hardware - this must stop RX */ - if (local->open_count) { - ieee80211_led_radio(local, false); - drv_stop(local); - } - /* remove STAs */ spin_lock_irqsave(&local->sta_lock, flags); list_for_each_entry(sta, &local->sta_list, list) { @@ -111,7 +102,22 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) drv_remove_interface(local, &conf); } + /* stop hardware - this must stop RX */ + if (local->open_count) { + ieee80211_led_radio(local, false); + drv_stop(local); + } + + /* + * flush again, in case driver queued work -- it + * shouldn't be doing (or cancel everything in the + * stop callback) that but better safe than sorry. + */ + flush_workqueue(local->hw.workqueue); + local->suspended = true; + /* need suspended to be visible before quiescing is false */ + barrier(); local->quiescing = false; return 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index de5bba7f910..0936fc24942 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2453,6 +2453,18 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } + /* + * If we're suspending, it is possible although not too likely + * that we'd be receiving frames after having already partially + * quiesced the stack. We can't process such frames then since + * that might, for example, cause stations to be added or other + * driver callbacks be invoked. + */ + if (unlikely(local->quiescing || local->suspended)) { + kfree_skb(skb); + return; + } + if (status->flag & RX_FLAG_HT) { /* rate_idx is MCS index */ if (WARN_ON(status->rate_idx < 0 || -- cgit v1.2.3-70-g09d2 From 1c29b3ff4f2d847464f7be3a0e179c6dfc69bd02 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 29 Jul 2009 15:04:10 -0700 Subject: net-dccp: suppress warning about large allocations from DCCP The DCCP protocol tries to allocate some large hash tables during initialisation using the largest size possible. This can be larger than what the page allocator can provide so it prints a warning. However, the caller is able to handle the situation so this patch suppresses the warning. Signed-off-by: Mel Gorman Acked-by: Arnaldo Carvalho de Melo Cc: "David S. Miller" Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/proto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 94ca8eaace7..3281013ce03 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1066,7 +1066,7 @@ static int __init dccp_init(void) (dccp_hashinfo.ehash_size - 1)) dccp_hashinfo.ehash_size--; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) - __get_free_pages(GFP_ATOMIC, ehash_order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { @@ -1091,7 +1091,7 @@ static int __init dccp_init(void) bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC, bhash_order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { -- cgit v1.2.3-70-g09d2 From ca7daea612b480ecf0fc5bd1630b88447fe73fc5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 30 Jul 2009 04:38:19 +0000 Subject: net/netlabel: Add kmalloc NULL tests The test on map4 should be a test on map6. The semantic match that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression *x; identifier f; constant char *C; @@ x = \(kmalloc\|kcalloc\|kzalloc\)(...); ... when != x == NULL when != x != NULL when != (x || ...) ( kfree(x) | f(...,C,...,x,...) | *f(...,x,...) | *x->f ) // Signed-off-by: Julia Lawall Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_kapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b0e582f2d37..16e6c4378ff 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -151,7 +151,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, addr6 = addr; mask6 = mask; map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); - if (map4 == NULL) + if (map6 == NULL) goto cfg_unlbl_map_add_failure; map6->type = NETLBL_NLTYPE_UNLABELED; ipv6_addr_copy(&map6->list.addr, addr6); -- cgit v1.2.3-70-g09d2 From a3e8ee682003685b8b9c98c89340a42e48c3e813 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Wed, 29 Jul 2009 23:46:59 +0000 Subject: ipv4: ARP neigh procfs buffer overflow If arp_format_neigh_entry() can be called with n->dev->addr_len == 0, then a write to hbuffer[-1] occurs. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/ipv4/arp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c29d75d8f1b..090e9991ac2 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1304,7 +1304,9 @@ static void arp_format_neigh_entry(struct seq_file *seq, hbuffer[k++] = hex_asc_lo(n->ha[j]); hbuffer[k++] = ':'; } - hbuffer[--k] = 0; + if (k != 0) + --k; + hbuffer[k] = 0; #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) } #endif -- cgit v1.2.3-70-g09d2 From 144586301f6af5ae5943a002f030d8c626fa4fdd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Jul 2009 02:36:15 +0000 Subject: net: net_assign_generic() fix memcpy() should take into account size of pointers, not only number of pointers to copy. Signed-off-by: Eric Dumazet Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b7292a2719d..197283072cc 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -488,7 +488,7 @@ int net_assign_generic(struct net *net, int id, void *data) */ ng->len = id; - memcpy(&ng->ptr, &old_ng->ptr, old_ng->len); + memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); rcu_assign_pointer(net->gen, ng); call_rcu(&old_ng->rcu, net_generic_release); -- cgit v1.2.3-70-g09d2 From a6ac65db2329e7685299666f5f7b6093c7b0f3a0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 30 Jul 2009 01:06:12 +0000 Subject: net: restore the original spinlock to protect unicast list There is a path when an assetion in dev_unicast_sync() appears. igmp6_group_added -> dev_mc_add -> __dev_set_rx_mode -> -> vlan_dev_set_rx_mode -> dev_unicast_sync Therefore we cannot protect this list with rtnl. This patch restores the original protecting this list with spinlock. Signed-off-by: Jiri Pirko Tested-by: Meelis Roos Signed-off-by: David S. Miller --- net/core/dev.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 70c27e0c7c3..43e61ba7bd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3865,10 +3865,12 @@ int dev_unicast_delete(struct net_device *dev, void *addr) ASSERT_RTNL(); + netif_addr_lock_bh(dev); err = __hw_addr_del(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -3889,10 +3891,12 @@ int dev_unicast_add(struct net_device *dev, void *addr) ASSERT_RTNL(); + netif_addr_lock_bh(dev); err = __hw_addr_add(&dev->uc, addr, dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3949,7 +3953,8 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, * @from: source device * * Add newly added addresses to the destination device and release - * addresses that have no users left. + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. @@ -3958,14 +3963,14 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - ASSERT_RTNL(); - if (to->addr_len != from->addr_len) return -EINVAL; + netif_addr_lock_bh(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3981,28 +3986,30 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - ASSERT_RTNL(); - if (to->addr_len != from->addr_len) return; + netif_addr_lock_bh(from); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); static void dev_unicast_flush(struct net_device *dev) { - /* rtnl_mutex must be held here */ - + netif_addr_lock_bh(dev); __hw_addr_flush(&dev->uc); + netif_addr_unlock_bh(dev); } static void dev_unicast_init(struct net_device *dev) { - /* rtnl_mutex must be held here */ - + netif_addr_lock_bh(dev); __hw_addr_init(&dev->uc); + netif_addr_unlock_bh(dev); } -- cgit v1.2.3-70-g09d2 From af0d3b103bcfa877343ee338de12002cd50c9ee5 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 3 Aug 2009 04:26:16 +0000 Subject: bluetooth: rfcomm_init bug fix rfcomm tty may be used before rfcomm_tty_driver initilized, The problem is that now socket layer init before tty layer, if userspace program do socket callback right here then oops will happen. reporting in: http://marc.info/?l=linux-bluetooth&m=124404919324542&w=2 make 3 changes: 1. remove #ifdef in rfcomm/core.c, make it blank function when rfcomm tty not selected in rfcomm.h 2. tune the rfcomm_init error patch to ensure tty driver initilized before rfcomm socket usage. 3. remove __exit for rfcomm_cleanup_sockets because above change need call it in a __init function. Reported-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Signed-off-by: Dave Young Signed-off-by: David S. Miller --- include/net/bluetooth/rfcomm.h | 12 +++++++++++- net/bluetooth/rfcomm/core.c | 27 +++++++++++++++++++-------- net/bluetooth/rfcomm/sock.c | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 80072611d26..c274993234e 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,17 @@ struct rfcomm_dev_list_req { }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); + +#ifdef CONFIG_BT_RFCOMM_TTY int rfcomm_init_ttys(void); void rfcomm_cleanup_ttys(void); - +#else +static inline int rfcomm_init_ttys(void) +{ + return 0; +} +static inline void rfcomm_cleanup_ttys(void) +{ +} +#endif #endif /* __RFCOMM_H */ diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e50566ebf9f..94b3388c188 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2080,28 +2080,41 @@ static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL); /* ---- Initialization ---- */ static int __init rfcomm_init(void) { + int ret; + l2cap_load(); hci_register_cb(&rfcomm_cb); rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); if (IS_ERR(rfcomm_thread)) { - hci_unregister_cb(&rfcomm_cb); - return PTR_ERR(rfcomm_thread); + ret = PTR_ERR(rfcomm_thread); + goto out_thread; } if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0) BT_ERR("Failed to create RFCOMM info file"); - rfcomm_init_sockets(); + ret = rfcomm_init_ttys(); + if (ret) + goto out_tty; -#ifdef CONFIG_BT_RFCOMM_TTY - rfcomm_init_ttys(); -#endif + ret = rfcomm_init_sockets(); + if (ret) + goto out_sock; BT_INFO("RFCOMM ver %s", VERSION); return 0; + +out_sock: + rfcomm_cleanup_ttys(); +out_tty: + kthread_stop(rfcomm_thread); +out_thread: + hci_unregister_cb(&rfcomm_cb); + + return ret; } static void __exit rfcomm_exit(void) @@ -2112,9 +2125,7 @@ static void __exit rfcomm_exit(void) kthread_stop(rfcomm_thread); -#ifdef CONFIG_BT_RFCOMM_TTY rfcomm_cleanup_ttys(); -#endif rfcomm_cleanup_sockets(); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7f482784e9f..0b85e811685 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1132,7 +1132,7 @@ error: return err; } -void __exit rfcomm_cleanup_sockets(void) +void rfcomm_cleanup_sockets(void) { class_remove_file(bt_class, &class_attr_rfcomm); -- cgit v1.2.3-70-g09d2 From cd3468bad96c00b5a512f551674f36776129520e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Jul 2009 22:07:44 +0200 Subject: cfg80211: add two missing NULL pointer checks These pointers can be NULL, the is_mesh() case isn't ever hit in the current kernel, but cmp_ies() can be hit under certain conditions. Signed-off-by: Johannes Berg Cc: stable@kernel.org [2.6.29, 2.6.30] Signed-off-by: John W. Linville --- net/wireless/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9271118e1fc..7e595ce24ee 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -118,7 +118,7 @@ static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2) if (!ie1 && !ie2) return 0; - if (!ie1) + if (!ie1 || !ie2) return -1; r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1])); @@ -171,6 +171,8 @@ static bool is_mesh(struct cfg80211_bss *a, ie = find_ie(WLAN_EID_MESH_CONFIG, a->information_elements, a->len_information_elements); + if (!ie) + return false; if (ie[1] != IEEE80211_MESH_CONFIG_LEN) return false; -- cgit v1.2.3-70-g09d2 From 371842448c05b42d11a4be1c8e4e81d62ecc7534 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 30 Jul 2009 17:43:48 -0700 Subject: cfg80211: fix regression on beacon world roaming feature A regression was added through patch a4ed90d6: "cfg80211: respect API on orig_flags on channel for beacon hint" We did indeed respect _orig flags but the intention was not clearly stated in the commit log. This patch fixes firmware issues picked up by iwlwifi when we lift passive scan of beaconing restrictions on channels its EEPROM has been configured to always enable. By doing so though we also disallowed beacon hints on devices registering their wiphy with custom world regulatory domains enabled, this happens to be currently ath5k, ath9k and ar9170. The passive scan and beacon restrictions on those devices would never be lifted even if we did find a beacon and the hardware did support such enhancements when world roaming. Since Johannes indicates iwlwifi firmware cannot be changed to allow beacon hinting we set up a flag now to specifically allow drivers to disable beacon hints for devices which cannot use them. We enable the flag on iwlwifi to disable beacon hints and by default enable it for all other drivers. It should be noted beacon hints lift passive scan flags and beacon restrictions when we receive a beacon from an AP on any 5 GHz non-DFS channels, and channels 12-14 on the 2.4 GHz band. We don't bother with channels 1-11 as those channels are allowed world wide. This should fix world roaming for ath5k, ath9k and ar9170, thereby improving scan time when we receive the first beacon from any AP, and also enabling beaconing operation (AP/IBSS/Mesh) on cards which would otherwise not be allowed to do so. Drivers not using custom regulatory stuff (wiphy_apply_custom_regulatory()) were not affected by this as the orig_flags for the channels would have been cleared upon wiphy registration. I tested this with a world roaming ath5k card. Cc: Jouni Malinen Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 3 +++ drivers/net/wireless/iwlwifi/iwl3945-base.c | 3 +++ include/net/cfg80211.h | 5 +++++ net/wireless/reg.c | 9 +++++---- net/wireless/reg.h | 3 ++- 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 6ab07165ea2..18b135f510e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -1332,6 +1332,9 @@ int iwl_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; + /* Firmware does not support this */ + hw->wiphy->disable_beacon_hints = true; + hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; /* we create the 802.11 header and a zero-length SSID element */ hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 2f50ab60bfd..523843369ca 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -3968,6 +3968,9 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; + /* Firmware does not support this */ + hw->wiphy->disable_beacon_hints = true; + hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; /* we create the 802.11 header and a zero-length SSID element */ hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1a21895b732..d1892d66701 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -979,6 +979,10 @@ struct cfg80211_ops { * channels at a later time. This can be used for devices which do not * have calibration information gauranteed for frequencies or settings * outside of its regulatory domain. + * @disable_beacon_hints: enable this if your driver needs to ensure that + * passive scan flags and beaconing flags may not be lifted by cfg80211 + * due to regulatory beacon hints. For more information on beacon + * hints read the documenation for regulatory_hint_found_beacon() * @reg_notifier: the driver's regulatory notification callback * @regd: the driver's regulatory domain, if one was requested via * the regulatory_hint() API. This can be used by the driver @@ -1004,6 +1008,7 @@ struct wiphy { bool custom_regulatory; bool strict_regulatory; + bool disable_beacon_hints; enum cfg80211_signal_type signal_type; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5e14371cda7..75a406d3361 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1089,17 +1089,18 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (wiphy->disable_beacon_hints) + return; + chan_before.center_freq = chan->center_freq; chan_before.flags = chan->flags; - if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && - !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) { + if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; channel_changed = true; } - if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && - !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) { + if (chan->flags & IEEE80211_CHAN_NO_IBSS) { chan->flags &= ~IEEE80211_CHAN_NO_IBSS; channel_changed = true; } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e37829a49dc..4e167a8e11b 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -30,7 +30,8 @@ int set_regdom(const struct ieee80211_regdomain *rd); * non-radar 5 GHz channels. * * Drivers do not need to call this, cfg80211 will do it for after a scan - * on a newly found BSS. + * on a newly found BSS. If you cannot make use of this feature you can + * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, -- cgit v1.2.3-70-g09d2 From 0bf52b981770cbf006323bab5177f2858a196766 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 4 Aug 2009 21:16:58 +0000 Subject: net: Fix spinlock use in alloc_netdev_mq() -tip testing found this lockdep warning: [ 2.272010] calling net_dev_init+0x0/0x164 @ 1 [ 2.276033] device class 'net': registering [ 2.280191] INFO: trying to register non-static key. [ 2.284005] the code is fine but needs lockdep annotation. [ 2.284005] turning off the locking correctness validator. [ 2.284005] Pid: 1, comm: swapper Not tainted 2.6.31-rc5-tip #1145 [ 2.284005] Call Trace: [ 2.284005] [<7958eb4e>] ? printk+0xf/0x11 [ 2.284005] [<7904f83c>] __lock_acquire+0x11b/0x622 [ 2.284005] [<7908c9b7>] ? alloc_debug_processing+0xf9/0x144 [ 2.284005] [<7904e2be>] ? mark_held_locks+0x3a/0x52 [ 2.284005] [<7908dbc4>] ? kmem_cache_alloc+0xa8/0x13f [ 2.284005] [<7904e475>] ? trace_hardirqs_on_caller+0xa2/0xc3 [ 2.284005] [<7904fdf6>] lock_acquire+0xb3/0xd0 [ 2.284005] [<79489678>] ? alloc_netdev_mq+0xf5/0x1ad [ 2.284005] [<79591514>] _spin_lock_bh+0x2d/0x5d [ 2.284005] [<79489678>] ? alloc_netdev_mq+0xf5/0x1ad [ 2.284005] [<79489678>] alloc_netdev_mq+0xf5/0x1ad [ 2.284005] [<793a38f2>] ? loopback_setup+0x0/0x74 [ 2.284005] [<798eecd0>] loopback_net_init+0x20/0x5d [ 2.284005] [<79483efb>] register_pernet_device+0x23/0x4b [ 2.284005] [<798f5c9f>] net_dev_init+0x115/0x164 [ 2.284005] [<7900104f>] do_one_initcall+0x4a/0x11a [ 2.284005] [<798f5b8a>] ? net_dev_init+0x0/0x164 [ 2.284005] [<79066f6d>] ? register_irq_proc+0x8c/0xa8 [ 2.284005] [<798cc29a>] do_basic_setup+0x42/0x52 [ 2.284005] [<798cc30a>] kernel_init+0x60/0xa1 [ 2.284005] [<798cc2aa>] ? kernel_init+0x0/0xa1 [ 2.284005] [<79003e03>] kernel_thread_helper+0x7/0x10 [ 2.284078] device: 'lo': device_add [ 2.288248] initcall net_dev_init+0x0/0x164 returned 0 after 11718 usecs [ 2.292010] calling neigh_init+0x0/0x66 @ 1 [ 2.296010] initcall neigh_init+0x0/0x66 returned 0 after 0 usecs it's using an zero-initialized spinlock. This is a side-effect of: dev_unicast_init(dev); in alloc_netdev_mq() making use of dev->addr_list_lock. The device has just been allocated freshly, it's not accessible anywhere yet so no locking is needed at all - in fact it's wrong to lock it here (the lock isnt initialized yet). This bug was introduced via: | commit a6ac65db2329e7685299666f5f7b6093c7b0f3a0 | Date: Thu Jul 30 01:06:12 2009 +0000 | | net: restore the original spinlock to protect unicast list Signed-off-by: Ingo Molnar Acked-by: Jiri Pirko Tested-by: Mark Brown Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 43e61ba7bd9..6a94475aee8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4007,9 +4007,7 @@ static void dev_unicast_flush(struct net_device *dev) static void dev_unicast_init(struct net_device *dev) { - netif_addr_lock_bh(dev); __hw_addr_init(&dev->uc); - netif_addr_unlock_bh(dev); } -- cgit v1.2.3-70-g09d2 From 476181cb05c6a3aea3ef42309388e255c934a06f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 4 Aug 2009 21:44:39 +0000 Subject: dccp: missing destroy of percpu counter variable while unload module percpu counter dccp_orphan_count is init in dccp_init() by percpu_counter_init() while dccp module is loaded, but the destroy of it is missing while dccp module is unloaded. We can get the kernel WARNING about this. Reproduct by the following commands: $ modprobe dccp $ rmmod dccp $ modprobe dccp WARNING: at lib/list_debug.c:26 __list_add+0x27/0x5c() Hardware name: VMware Virtual Platform list_add corruption. next->prev should be prev (c080c0c4), but was (null). (next =ca7188cc). Modules linked in: dccp(+) nfsd lockd nfs_acl auth_rpcgss exportfs sunrpc Pid: 1956, comm: modprobe Not tainted 2.6.31-rc5 #55 Call Trace: [] warn_slowpath_common+0x6a/0x81 [] ? __list_add+0x27/0x5c [] warn_slowpath_fmt+0x29/0x2c [] __list_add+0x27/0x5c [] __percpu_counter_init+0x4d/0x5d [] dccp_init+0x19/0x2ed [dccp] [] do_one_initcall+0x4f/0x111 [] ? dccp_init+0x0/0x2ed [dccp] [] ? notifier_call_chain+0x26/0x48 [] ? __blocking_notifier_call_chain+0x45/0x51 [] sys_init_module+0xac/0x1bd [] sysenter_do_call+0x12/0x22 Signed-off-by: Wei Yongjun Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/proto.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 94ca8eaace7..6294f57162b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1159,6 +1159,7 @@ static void __exit dccp_fini(void) kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); + percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); -- cgit v1.2.3-70-g09d2 From cf39c4c572dc54adbdf8933d1e6cd87ee94d8fc0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jul 2009 08:03:18 -0700 Subject: phonet: phonet_device_get() fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/phonet/pn_dev.c: In function `phonet_device_get': net/phonet/pn_dev.c:99: warning: 'dev' might be used uninitialized in this function Signed-off-by: Eric Dumazet Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index b0d6ddd82a9..c2b77a69869 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -96,7 +96,7 @@ struct net_device *phonet_device_get(struct net *net) { struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; - struct net_device *dev; + struct net_device *dev = NULL; spin_lock_bh(&pndevs->lock); list_for_each_entry(pnd, &pndevs->list, list) { -- cgit v1.2.3-70-g09d2 From 45a41d147a3a31bb218201b0dd70cfe4e9ed5105 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sun, 2 Aug 2009 14:12:01 +0400 Subject: af_ieee802154: fix ioctl processing fix two errors in ioctl processing: 1) if the ioctl isn't supported one should return -ENOIOCTLCMD 2) don't call ndo_do_ioctl if the device doesn't provide it Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/af_ieee802154.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 3bb6bdb1dac..af661805b9f 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -136,7 +136,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, unsigned int cmd) { struct ifreq ifr; - int ret = -EINVAL; + int ret = -ENOIOCTLCMD; struct net_device *dev; if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) @@ -146,8 +146,10 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, dev_load(sock_net(sk), ifr.ifr_name); dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); - if (dev->type == ARPHRD_IEEE802154 || - dev->type == ARPHRD_IEEE802154_PHY) + + if ((dev->type == ARPHRD_IEEE802154 || + dev->type == ARPHRD_IEEE802154_PHY) && + dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) -- cgit v1.2.3-70-g09d2 From a9dfac3353b03432e3d46a0dde6795588c46256d Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Mon, 3 Aug 2009 17:53:00 +0400 Subject: af_ieee802154: provide dummy get/setsockopt Provide dummt get/setsockopt implementations to stop these syscalls from oopsing on our sockets. Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/dgram.c | 14 ++++++++++++++ net/ieee802154/raw.c | 14 ++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'net') diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 14d39840dd6..ba8b214dda8 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -377,6 +377,18 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) return ret; } +static int dgram_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int dgram_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user optlen) +{ + return -EOPNOTSUPP; +} + struct proto ieee802154_dgram_prot = { .name = "IEEE-802.15.4-MAC", .owner = THIS_MODULE, @@ -391,5 +403,7 @@ struct proto ieee802154_dgram_prot = { .connect = dgram_connect, .disconnect = dgram_disconnect, .ioctl = dgram_ioctl, + .getsockopt = dgram_getsockopt, + .setsockopt = dgram_setsockopt, }; diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index fca44d59f97..9315977c4c6 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -238,6 +238,18 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) read_unlock(&raw_lock); } +static int raw_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + return -EOPNOTSUPP; +} + +static int raw_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user optlen) +{ + return -EOPNOTSUPP; +} + struct proto ieee802154_raw_prot = { .name = "IEEE-802.15.4-RAW", .owner = THIS_MODULE, @@ -250,5 +262,7 @@ struct proto ieee802154_raw_prot = { .unhash = raw_unhash, .connect = raw_connect, .disconnect = raw_disconnect, + .getsockopt = raw_getsockopt, + .setsockopt = raw_setsockopt, }; -- cgit v1.2.3-70-g09d2 From 17ac2e9c58b69a1e25460a568eae1b0dc0188c25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Aug 2009 03:34:06 +0000 Subject: rose: Fix rose_getname() leak rose_getname() can leak kernel memory to user. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/rose/af_rose.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index f0a76f6bca7..e5f478ca3d6 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -954,6 +954,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr, struct rose_sock *rose = rose_sk(sk); int n; + memset(srose, 0, sizeof(*srose)); if (peer != 0) { if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; -- cgit v1.2.3-70-g09d2 From 80922bbb12a105f858a8f0abb879cb4302d0ecaa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Aug 2009 03:48:36 +0000 Subject: econet: Fix econet_getname() leak econet_getname() can leak kernel memory to user. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/econet/af_econet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 2e1f836d424..f0bbc57926c 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -520,6 +520,7 @@ static int econet_getname(struct socket *sock, struct sockaddr *uaddr, if (peer) return -EOPNOTSUPP; + memset(sec, 0, sizeof(*sec)); mutex_lock(&econet_mutex); sk = sock->sk; -- cgit v1.2.3-70-g09d2 From f6b97b29513950bfbf621a83d85b6f86b39ec8db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Aug 2009 03:31:07 +0000 Subject: netrom: Fix nr_getname() leak nr_getname() can leak kernel memory to user. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ce51ce012cd..ce1a34b99c2 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -847,6 +847,7 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr, sax->fsa_ax25.sax25_family = AF_NETROM; sax->fsa_ax25.sax25_ndigis = 1; sax->fsa_ax25.sax25_call = nr->user_addr; + memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater)); sax->fsa_digipeater[0] = nr->dest_addr; *uaddr_len = sizeof(struct full_sockaddr_ax25); } else { -- cgit v1.2.3-70-g09d2 From 3d392475c873c10c10d6d96b94d092a34ebd4791 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Aug 2009 02:27:43 +0000 Subject: appletalk: fix atalk_getname() leak atalk_getname() can leak 8 bytes of kernel memory to user Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 590b8396362..9ef6ff26eb0 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1237,6 +1237,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, return -ENOBUFS; *uaddr_len = sizeof(struct sockaddr_at); + memset(&sat.sat_zero, 0, sizeof(sat.sat_zero)); if (peer) { if (sk->sk_state != TCP_ESTABLISHED) -- cgit v1.2.3-70-g09d2 From 09384dfc76e526c3993c09c42e016372dc9dd22c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Aug 2009 03:55:04 +0000 Subject: irda: Fix irda_getname() leak irda_getname() can leak kernel memory to user. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/irda/af_irda.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index cb762c8723e..3ec2b434ea7 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -714,6 +714,7 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); + memset(&saddr, 0, sizeof(saddr)); if (peer) { if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; -- cgit v1.2.3-70-g09d2 From b79a79471bd31d737c939a6ddc347417047b4320 Mon Sep 17 00:00:00 2001 From: Jussi Mäki Date: Thu, 6 Aug 2009 21:38:14 +0000 Subject: Fix xfrm hash collisions by changing __xfrm4_daddr_saddr_hash to hash addresses with addition This patch fixes hash collisions in cases where number of entries have incrementing IP source and destination addresses from single respective subnets (i.e. 192.168.0.1-172.16.0.1, 192.168.0.2-172.16.0.2, and so on.). Signed-off-by: Jussi Maki Signed-off-by: David S. Miller --- net/xfrm/xfrm_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index d401dc8f05e..e5195c99f71 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -16,7 +16,7 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) { - return ntohl(daddr->a4 ^ saddr->a4); + return ntohl(daddr->a4 + saddr->a4); } static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -- cgit v1.2.3-70-g09d2 From e84b90ae5eb3c112d1f208964df1d8156a538289 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Aug 2009 20:27:04 +0000 Subject: can: Fix raw_getname() leak raw_getname() can leak 10 bytes of kernel memory to user (two bytes hole between can_family and can_ifindex, 8 bytes at the end of sockaddr_can structure) Signed-off-by: Eric Dumazet Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/raw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index f4cc44548bd..db3152df7d2 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -401,6 +401,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr, if (peer) return -EOPNOTSUPP; + memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = ro->ifindex; -- cgit v1.2.3-70-g09d2 From 418372b0ab7a3bbcc59d71e8e4d322ef18263dab Mon Sep 17 00:00:00 2001 From: Rafael Laufer Date: Fri, 7 Aug 2009 05:17:17 +0000 Subject: sctp: fix missing destroy of percpu counter variable in sctp_proc_exit() Commit 1748376b6626acf59c24e9592ac67b3fe2a0e026, net: Use a percpu_counter for sockets_allocated added percpu_counter function calls to sctp_proc_init code path, but forgot to add them to sctp_proc_exit(). This resulted in a following Ooops when performing this test # modprobe sctp # rmmod -f sctp # modprobe sctp [ 573.862512] BUG: unable to handle kernel paging request at f8214a24 [ 573.862518] IP: [] __percpu_counter_init+0x3f/0x70 [ 573.862530] *pde = 37010067 *pte = 00000000 [ 573.862534] Oops: 0002 [#1] SMP [ 573.862537] last sysfs file: /sys/module/libcrc32c/initstate [ 573.862540] Modules linked in: sctp(+) crc32c libcrc32c binfmt_misc bridge stp bnep lp snd_hda_codec_analog snd_hda_intel snd_hda_codec snd_hwdep snd_pcm_oss snd_mixer_oss arc4 joydev snd_pcm ecb pcmcia snd_seq_dummy snd_seq_oss iwlagn iwlcore snd_seq_midi snd_rawmidi snd_seq_midi_event yenta_socket rsrc_nonstatic thinkpad_acpi snd_seq snd_timer snd_seq_device mac80211 psmouse sdhci_pci sdhci nvidia(P) ppdev video snd soundcore serio_raw pcspkr iTCO_wdt iTCO_vendor_support led_class ricoh_mmc pcmcia_core intel_agp nvram agpgart usbhid parport_pc parport output snd_page_alloc cfg80211 btusb ohci1394 ieee1394 e1000e [last unloaded: sctp] [ 573.862589] [ 573.862593] Pid: 5373, comm: modprobe Tainted: P R (2.6.31-rc3 #6) 7663B15 [ 573.862596] EIP: 0060:[] EFLAGS: 00010286 CPU: 1 [ 573.862599] EIP is at __percpu_counter_init+0x3f/0x70 [ 573.862602] EAX: f8214a20 EBX: f80faa14 ECX: c48c0000 EDX: f80faa20 [ 573.862604] ESI: f80a7000 EDI: 00000000 EBP: f69d5ef0 ESP: f69d5eec [ 573.862606] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 573.862610] Process modprobe (pid: 5373, ti=f69d4000 task=c2130c70 task.ti=f69d4000) [ 573.862612] Stack: [ 573.862613] 00000000 f69d5f18 f80a70a8 f80fa9fc 00000000 fffffffc f69d5f30 c018e2d4 [ 573.862619] <0> 00000000 f80a7000 00000000 f69d5f88 c010112b 00000000 c07029c0 fffffffb [ 573.862626] <0> 00000000 f69d5f38 c018f83f f69d5f54 c0557cad f80fa860 00000001 c07010c0 [ 573.862634] Call Trace: [ 573.862644] [] ? sctp_init+0xa8/0x7d4 [sctp] [ 573.862650] [] ? marker_update_probe_range+0x184/0x260 [ 573.862659] [] ? sctp_init+0x0/0x7d4 [sctp] [ 573.862662] [] ? do_one_initcall+0x2b/0x160 [ 573.862666] [] ? tracepoint_module_notify+0x2f/0x40 [ 573.862671] [] ? notifier_call_chain+0x2d/0x70 [ 573.862678] [] ? __blocking_notifier_call_chain+0x4d/0x60 [ 573.862682] [] ? sys_init_module+0xb1/0x1f0 [ 573.862686] [] ? sysenter_do_call+0x12/0x28 [ 573.862688] Code: 89 48 08 b8 04 00 00 00 e8 df aa ec ff ba f4 ff ff ff 85 c0 89 43 14 74 31 b8 b0 18 71 c0 e8 19 b9 24 00 a1 c4 18 71 c0 8d 53 0c <89> 50 04 89 43 0c b8 b0 18 71 c0 c7 43 10 c4 18 71 c0 89 15 c4 [ 573.862725] EIP: [] __percpu_counter_init+0x3f/0x70 SS:ESP 0068:f69d5eec [ 573.862730] CR2: 00000000f8214a24 [ 573.862734] ---[ end trace 39c4e0b55e7cf54d ]--- Signed-off-by: Rafael Laufer Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 79cbd47f4df..a76da657244 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -160,6 +160,7 @@ static void sctp_proc_exit(void) remove_proc_entry("sctp", init_net.proc_net); } #endif + percpu_counter_destroy(&sctp_sockets_allocated); } /* Private helper to extract ipv4 address and stash them in -- cgit v1.2.3-70-g09d2 From e694958388c50148389b0e9b9e9e8945cf0f1b98 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Aug 2009 08:28:36 -0700 Subject: Make sock_sendpage() use kernel_sendpage() kernel_sendpage() does the proper default case handling for when the socket doesn't have a native sendpage implementation. Now, arguably this might be something that we could instead solve by just specifying that all protocols should do it themselves at the protocol level, but we really only care about the common protocols. Does anybody really care about sendpage on something like Appletalk? Not likely. Acked-by: David S. Miller Acked-by: Julien TINNES Acked-by: Tavis Ormandy Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 791d71a36a9..6d471655904 100644 --- a/net/socket.c +++ b/net/socket.c @@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, if (more) flags |= MSG_MORE; - return sock->ops->sendpage(sock, page, offset, size, flags); + return kernel_sendpage(sock, page, offset, size, flags); } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, -- cgit v1.2.3-70-g09d2 From 416fbdff2137e8d8cc8f23f517bee3a26b11526f Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 11 Aug 2009 13:10:33 -0700 Subject: mac80211: fix panic when splicing unprepared TIDs We splice skbs from the pending queue for a TID onto the local pending queue when tearing down a block ack request. This is not necessary unless we actually have received a request to start a block ack request (rate control, for example). If we never received that request we should not be splicing the tid pending queue as it would be null, causing a panic. Not sure yet how exactly we allowed through a call when the tid state does not have at least HT_ADDBA_REQUESTED_MSK set, that will require some further review as it is not quite obvious. For more information see the bug report: http://bugzilla.kernel.org/show_bug.cgi?id=13922 This fixes this oops: BUG: unable to handle kernel NULL pointer dereference at 00000030 IP: [] ieee80211_agg_splice_packets+0x40/0xc0 [mac80211] *pdpt = 0000000002d1e001 *pde = 0000000000000000 Thread overran stack, or stack corrupted Oops: 0000 [#1] SMP last sysfs file: /sys/module/aes_generic/initstate Modules linked in: Pid: 0, comm: swapper Not tainted (2.6.31-rc5-wl #2) Dell DV051 EIP: 0060:[] EFLAGS: 00010292 CPU: 0 EIP is at ieee80211_agg_splice_packets+0x40/0xc0 [mac80211] EAX: 00000030 EBX: 0000004c ECX: 00000003 EDX: 00000000 ESI: c1c98000 EDI: f745a1c0 EBP: c076be58 ESP: c076be38 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 0, ti=c076a000 task=c0709160 task.ti=c076a000) Stack: Call Trace: [] ? ieee80211_stop_tx_ba_cb+0xab/0x150 [mac80211] [] ? ieee80211_tasklet_handler+0xce/0x110 [mac80211] [] ? net_rx_action+0xef/0x1d0 [] ? tasklet_action+0x58/0xc0 [] ? __do_softirq+0xc2/0x190 [] ? handle_IRQ_event+0x58/0x140 [] ? ack_apic_level+0x7e/0x270 [] ? do_softirq+0x3d/0x40 [] ? irq_exit+0x65/0x90 [] ? do_IRQ+0x4f/0xc0 [] ? irq_exit+0x7d/0x90 [] ? smp_apic_timer_interrupt+0x57/0x90 [] ? common_interrupt+0x29/0x30 [] ? mwait_idle+0xbe/0x100 [] ? cpu_idle+0x52/0x90 [] ? rest_init+0x55/0x60 [] ? start_kernel+0x315/0x37d [] ? unknown_bootoption+0x0/0x1f9 [] ? i386_start_kernel+0x79/0x81 Code: EIP: [] ieee80211_agg_splice_packets+0x40/0xc0 [mac80211] SS:ESP 0068:c076be38 CR2: 0000000000000030 Cc: stable@kernel.org Testedy-by: Jack Lau Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 9e5762ad307..a24e59816b9 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -381,6 +381,14 @@ static void ieee80211_agg_splice_packets(struct ieee80211_local *local, &local->hw, queue, IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)) + return; + + if (WARN(!sta->ampdu_mlme.tid_tx[tid], + "TID %d gone but expected when splicing aggregates from" + "the pending queue\n", tid)) + return; + if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) { spin_lock_irqsave(&local->queue_stop_reason_lock, flags); /* mark queue as pending, it is stopped already */ -- cgit v1.2.3-70-g09d2 From 8cdb045632e5ee22854538619ac6f150eb0a4894 Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Fri, 14 Aug 2009 16:33:56 -0700 Subject: gre: Fix MTU calculation for bound GRE tunnels The GRE header length should be subtracted when the tunnel MTU is calculated. This just corrects for the associativity change introduced by commit 42aa916265d740d66ac1f17290366e9494c884c2 ("gre: Move MTU setting out of ipgre_tunnel_bind_dev"). Signed-off-by: Tom Goff Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index cb4a0f4bd5e..82c11dd10a6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -951,7 +951,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) addend += 4; } dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len - addend; + mtu -= dev->hard_header_len + addend; if (mtu < 68) mtu = 68; -- cgit v1.2.3-70-g09d2 From 523d2f6982136d332c9b7dd00e9e16da1091f060 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jul 2009 21:26:43 +0200 Subject: mac80211: fix todo lock The key todo lock can be taken from different locks that require it to be _bh to avoid lock inversion due to (soft)irqs. This should fix the two problems reported by Bob and Gabor: http://mid.gmane.org/20090619113049.GB18956@hash.localnet http://mid.gmane.org/4A3FA376.8020307@openwrt.org Signed-off-by: Johannes Berg Cc: Bob Copeland Cc: Gabor Juhos Signed-off-by: John W. Linville --- net/mac80211/key.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ce267565e18..659a42d529e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -67,6 +67,8 @@ static DECLARE_WORK(todo_work, key_todo); * * @key: key to add to do item for * @flag: todo flag(s) + * + * Must be called with IRQs or softirqs disabled. */ static void add_todo(struct ieee80211_key *key, u32 flag) { @@ -140,9 +142,9 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) ret = drv_set_key(key->local, SET_KEY, &sdata->vif, sta, &key->conf); if (!ret) { - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); } if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) @@ -164,12 +166,12 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) if (!key || !key->local->ops->set_key) return; - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); return; } - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); sta = get_sta_for_key(key); sdata = key->sdata; @@ -188,9 +190,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) wiphy_name(key->local->hw.wiphy), key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); } static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, @@ -437,14 +439,14 @@ void ieee80211_key_link(struct ieee80211_key *key, __ieee80211_key_replace(sdata, sta, old_key, key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - /* free old key later */ add_todo(old_key, KEY_FLAG_TODO_DELETE); add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS); if (netif_running(sdata->dev)) add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD); + + spin_unlock_irqrestore(&sdata->local->key_lock, flags); } static void __ieee80211_key_free(struct ieee80211_key *key) @@ -547,7 +549,7 @@ static void __ieee80211_key_todo(void) */ synchronize_rcu(); - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); while (!list_empty(&todo_list)) { key = list_first_entry(&todo_list, struct ieee80211_key, todo); list_del_init(&key->todo); @@ -558,7 +560,7 @@ static void __ieee80211_key_todo(void) KEY_FLAG_TODO_HWACCEL_REMOVE | KEY_FLAG_TODO_DELETE); key->flags &= ~todoflags; - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); work_done = false; @@ -591,9 +593,9 @@ static void __ieee80211_key_todo(void) WARN_ON(!work_done); - spin_lock(&todo_lock); + spin_lock_bh(&todo_lock); } - spin_unlock(&todo_lock); + spin_unlock_bh(&todo_lock); } void ieee80211_key_todo(void) -- cgit v1.2.3-70-g09d2 From 0e15597ebfe00e28857185f46aba00f400480ffe Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Sun, 19 Jul 2009 13:41:55 -0600 Subject: 9p: minor comment fixes Fix the comments -- mostly the improper and/or missing descriptions of function parameters. Signed-off-by: Abhishek Kulkarni Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 3 +-- net/9p/client.c | 14 +++++++------- net/9p/trans_fd.c | 8 ++++---- net/9p/trans_rdma.c | 9 +++++---- net/9p/trans_virtio.c | 11 ++++------- 5 files changed, 21 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0c8af1abf60..f22668afd0d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended) /** * v9fs_blank_wstat - helper function to setup a 9P stat structure - * @v9ses: 9P session info (for determining extended mode) * @wstat: structure to initialize * */ @@ -410,9 +409,9 @@ v9fs_open_created(struct inode *inode, struct file *file) * @v9ses: session information * @dir: directory that dentry is being created in * @dentry: dentry that is being created + * @extension: 9p2000.u extension string to support devices, etc. * @perm: create permissions * @mode: open mode - * @extension: 9p2000.u extension string to support devices, etc. * */ static struct p9_fid * diff --git a/net/9p/client.c b/net/9p/client.c index 787ccddb85e..7bbd2d5ae8d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -60,9 +60,9 @@ static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); /** - * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount - * @v9ses: existing v9fs session information + * parse_options - parse mount options into client structure + * @opts: options string passed from mount + * @clnt: existing v9fs client information * * Return 0 upon success, -ERRNO upon failure */ @@ -232,7 +232,7 @@ EXPORT_SYMBOL(p9_tag_lookup); /** * p9_tag_init - setup tags structure and contents - * @tags: tags structure from the client struct + * @c: v9fs client struct * * This initializes the tags structure for each client instance. * @@ -258,7 +258,7 @@ error: /** * p9_tag_cleanup - cleans up tags structure and reclaims resources - * @tags: tags structure from the client struct + * @c: v9fs client struct * * This frees resources associated with the tags structure * @@ -430,8 +430,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) /** * p9_client_flush - flush (cancel) a request - * c: client state - * req: request to cancel + * @c: client state + * @oldreq: request to cancel * * This sents a flush for a particular requests and links * the flush request to the original request. The current diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 8c2588e4edc..8d934dd7fd5 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -119,8 +119,8 @@ struct p9_poll_wait { * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer + * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads - * @poll_waddr: ???? * @pt: poll state * @rq: current read work * @wq: current write work @@ -700,9 +700,9 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) } /** - * parse_options - parse mount options into session structure - * @options: options string passed from mount - * @opts: transport-specific structure to parse options into + * parse_opts - parse mount options into p9_fd_opts structure + * @params: options string passed from mount + * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index ac4990041eb..65cb29db03f 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -67,14 +67,15 @@ * @pd: Protection Domain pointer * @qp: Queue Pair pointer * @cq: Completion Queue pointer + * @dm_mr: DMA Memory Region pointer * @lkey: The local access only memory region key * @timeout: Number of uSecs to wait for connection management events * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. + * @rq_count: Count of requests in the Receive Queue. * @addr: The remote peer's address * @req_lock: Protects the active request list - * @send_wait: Wait list when the SQ fills up * @cm_done: Completion event for connection management tracking */ struct p9_trans_rdma { @@ -154,9 +155,9 @@ static match_table_t tokens = { }; /** - * parse_options - parse mount options into session structure - * @options: options string passed from mount - * @opts: transport-specific structure to parse options into + * parse_opts - parse mount options into rdma options structure + * @params: options string passed from mount + * @opts: rdma transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index a49484e67e1..9bf0b737aa5 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -57,11 +57,9 @@ static int chan_index; * @initialized: whether the channel is initialized * @inuse: whether the channel is in use * @lock: protects multiple elements within this structure + * @client: client instance * @vdev: virtio dev associated with this channel * @vq: virtio queue associated with this channel - * @tagpool: accounting for tag ids (and request slots) - * @reqs: array of request slots - * @max_tag: current number of request_slots allocated * @sg: scatter gather list which is used to pack a request (protected?) * * We keep all per-channel information in a structure. @@ -92,7 +90,7 @@ static unsigned int rest_of_page(void *data) /** * p9_virtio_close - reclaim resources of a channel - * @trans: transport state + * @client: client instance * * This reclaims a channel by freeing its resources and * reseting its inuse flag. @@ -181,9 +179,8 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) /** * p9_virtio_request - issue a request - * @t: transport state - * @tc: &p9_fcall request to transmit - * @rc: &p9_fcall to put reponse into + * @client: client instance issuing the request + * @req: request to be issued * */ -- cgit v1.2.3-70-g09d2 From 0aad37ef3deed118d3816e1d1a600eb2ec9dcb87 Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Mon, 17 Aug 2009 16:38:45 -0500 Subject: net/9p: insulate the client against an invalid error code sent by a 9p server A looney tunes server sending an invalid error code (which is !IS_ERR_VALUE) can result in a client oops. So fix it by adding a check and converting unknown or invalid error codes to -ESERVERFAULT. Signed-off-by: Abhishek Kulkarni Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 7 +------ net/9p/error.c | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 7bbd2d5ae8d..5bf5f227dbe 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -411,14 +411,9 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (c->dotu) err = -ecode; - if (!err) { + if (!err || !IS_ERR_VALUE(err)) err = p9_errstr2errno(ename, strlen(ename)); - /* string match failed */ - if (!err) - err = -ESERVERFAULT; - } - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); kfree(ename); diff --git a/net/9p/error.c b/net/9p/error.c index fdebe431406..52518512a93 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -239,7 +239,7 @@ int p9_errstr2errno(char *errstr, int len) errstr[len] = 0; printk(KERN_ERR "%s: server reported unknown error %s\n", __func__, errstr); - errno = 1; + errno = ESERVERFAULT; } return -errno; -- cgit v1.2.3-70-g09d2 From c6ba973b8fa97422aab4204f7d79f1d413cde925 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 17 Aug 2009 18:05:32 -0700 Subject: NETROM: Fix use of static buffer The static variable used by nr_call_to_digi might result in corruption if multiple threads are trying to usee a node or neighbour via ioctl. Fixed by having the caller pass a structure in. This is safe because nr_add_node rsp. nr_add_neigh will allocate a permanent structure, if needed. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- net/netrom/nr_route.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index e943c16552a..4eb1ac9a767 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -630,23 +630,23 @@ out: return dev; } -static ax25_digi *nr_call_to_digi(int ndigis, ax25_address *digipeaters) +static ax25_digi *nr_call_to_digi(ax25_digi *digi, int ndigis, + ax25_address *digipeaters) { - static ax25_digi ax25_digi; int i; if (ndigis == 0) return NULL; for (i = 0; i < ndigis; i++) { - ax25_digi.calls[i] = digipeaters[i]; - ax25_digi.repeated[i] = 0; + digi->calls[i] = digipeaters[i]; + digi->repeated[i] = 0; } - ax25_digi.ndigi = ndigis; - ax25_digi.lastrepeat = -1; + digi->ndigi = ndigis; + digi->lastrepeat = -1; - return &ax25_digi; + return digi; } /* @@ -656,6 +656,7 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) { struct nr_route_struct nr_route; struct net_device *dev; + ax25_digi digi; int ret; switch (cmd) { @@ -673,13 +674,15 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) ret = nr_add_node(&nr_route.callsign, nr_route.mnemonic, &nr_route.neighbour, - nr_call_to_digi(nr_route.ndigis, nr_route.digipeaters), + nr_call_to_digi(&digi, nr_route.ndigis, + nr_route.digipeaters), dev, nr_route.quality, nr_route.obs_count); break; case NETROM_NEIGH: ret = nr_add_neigh(&nr_route.callsign, - nr_call_to_digi(nr_route.ndigis, nr_route.digipeaters), + nr_call_to_digi(&digi, nr_route.ndigis, + nr_route.digipeaters), dev, nr_route.quality); break; default: -- cgit v1.2.3-70-g09d2 From c1a8f1f1c8e01eab5862c8db39b49ace814e6c66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Aug 2009 09:36:49 +0000 Subject: net: restore gnet_stats_basic to previous definition In 5e140dfc1fe87eae27846f193086724806b33c7d "net: reorder struct Qdisc for better SMP performance" the definition of struct gnet_stats_basic changed incompatibly, as copies of this struct are shipped to userland via netlink. Restoring old behavior is not welcome, for performance reason. Fix is to use a private structure for kernel, and teach gnet_stats_copy_basic() to convert from kernel to user land, using legacy structure (struct gnet_stats_basic) Based on a report and initial patch from Michael Spang. Reported-by: Michael Spang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/gen_stats.h | 5 +++++ include/net/act_api.h | 2 +- include/net/gen_stats.h | 10 +++++----- include/net/netfilter/xt_rateest.h | 2 +- include/net/sch_generic.h | 2 +- net/core/gen_estimator.c | 12 ++++++------ net/core/gen_stats.c | 11 ++++++++--- net/netfilter/xt_RATEEST.c | 2 +- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- 13 files changed, 33 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h index 0ffa41df0ee..710e901085d 100644 --- a/include/linux/gen_stats.h +++ b/include/linux/gen_stats.h @@ -19,6 +19,11 @@ enum { * @packets: number of seen packets */ struct gnet_stats_basic +{ + __u64 bytes; + __u32 packets; +}; +struct gnet_stats_basic_packed { __u64 bytes; __u32 packets; diff --git a/include/net/act_api.h b/include/net/act_api.h index 565eed8fe49..c05fd717c58 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -16,7 +16,7 @@ struct tcf_common { u32 tcfc_capab; int tcfc_action; struct tcf_t tcfc_tm; - struct gnet_stats_basic tcfc_bstats; + struct gnet_stats_basic_packed tcfc_bstats; struct gnet_stats_queue tcfc_qstats; struct gnet_stats_rate_est tcfc_rate_est; spinlock_t tcfc_lock; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index d136b5240ef..c1488553e34 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -28,7 +28,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d); extern int gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic *b); + struct gnet_stats_basic_packed *b); extern int gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r); extern int gnet_stats_copy_queue(struct gnet_dump *d, @@ -37,14 +37,14 @@ extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); extern int gnet_stats_finish_copy(struct gnet_dump *d); -extern int gen_new_estimator(struct gnet_stats_basic *bstats, +extern int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern void gen_kill_estimator(struct gnet_stats_basic *bstats, +extern void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est); -extern int gen_replace_estimator(struct gnet_stats_basic *bstats, +extern int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern bool gen_estimator_active(const struct gnet_stats_basic *bstats, +extern bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est); #endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 65d594dffbf..ddbf37e1961 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -8,7 +8,7 @@ struct xt_rateest { spinlock_t lock; struct gnet_estimator params; struct gnet_stats_rate_est rstats; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; }; extern struct xt_rateest *xt_rateest_lookup(const char *name); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 964ffa0d881..5482e9582f5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -72,7 +72,7 @@ struct Qdisc */ unsigned long state; struct sk_buff_head q; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; }; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 78e5bfc454a..493775f4f2f 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -81,7 +81,7 @@ struct gen_estimator { struct list_head list; - struct gnet_stats_basic *bstats; + struct gnet_stats_basic_packed *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; int ewma_log; @@ -165,7 +165,7 @@ static void gen_add_node(struct gen_estimator *est) } static -struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, +struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { struct rb_node *p = est_root.rb_node; @@ -202,7 +202,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, * * NOTE: Called under rtnl_mutex */ -int gen_new_estimator(struct gnet_stats_basic *bstats, +int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt) @@ -262,7 +262,7 @@ static void __gen_kill_estimator(struct rcu_head *head) * * NOTE: Called under rtnl_mutex */ -void gen_kill_estimator(struct gnet_stats_basic *bstats, +void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; @@ -292,7 +292,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * * Returns 0 on success or a negative error code. */ -int gen_replace_estimator(struct gnet_stats_basic *bstats, +int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -308,7 +308,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * * Returns true if estimator is active, and false if not. */ -bool gen_estimator_active(const struct gnet_stats_basic *bstats, +bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { ASSERT_RTNL(); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index c3d0ffeac24..8569310268a 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -106,16 +106,21 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b) +gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) { if (d->compat_tc_stats) { d->tc_stats.bytes = b->bytes; d->tc_stats.packets = b->packets; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b)); + if (d->tail) { + struct gnet_stats_basic sb; + memset(&sb, 0, sizeof(sb)); + sb.bytes = b->bytes; + sb.packets = b->packets; + return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb)); + } return 0; } diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 43f5676b1af..d80b8192e0d 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -74,7 +74,7 @@ static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) { const struct xt_rateest_target_info *info = par->targinfo; - struct gnet_stats_basic *stats = &info->est->bstats; + struct gnet_stats_basic_packed *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); stats->bytes += skb->len; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2a8b83af7c4..ab82f145f68 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -49,7 +49,7 @@ struct atm_flow_data { struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ int ref; /* reference count */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 23a167670fd..d5798e17a83 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -128,7 +128,7 @@ struct cbq_class long avgidle; long deficit; /* Saved deficit for WRR */ psched_time_t penalized; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct tc_cbq_xstats xstats; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 7597fe14686..12b2fb04b29 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -22,7 +22,7 @@ struct drr_class { unsigned int refcnt; unsigned int filter_cnt; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct list_head alist; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 362c2811b2d..dad0144423d 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,7 +116,7 @@ struct hfsc_class struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; unsigned int level; /* class level in hierarchy */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 88cd0262662..ec4d46399d5 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -74,7 +74,7 @@ enum htb_cmode { struct htb_class { struct Qdisc_class_common common; /* general class parameters */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct tc_htb_xstats xstats; /* our special stats */ -- cgit v1.2.3-70-g09d2 From ee5f9757ea17759e1ce5503bdae2b07e48e32af9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Aug 2009 16:33:34 -0700 Subject: pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer None of this stuff should execute in hw IRQ context, therefore use a tasklet_hrtimer so that it runs in softirq context. Signed-off-by: David S. Miller Acked-by: Thomas Gleixner --- include/net/pkt_sched.h | 4 ++-- net/sched/sch_api.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 82a3191375f..7eafb8d5447 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { - struct hrtimer timer; - struct Qdisc *qdisc; + struct tasklet_hrtimer timer; + struct Qdisc *qdisc; }; extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c29..e1c2bf7e9ba 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { - hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - wd->timer.function = qdisc_watchdog; + tasklet_hrtimer_init(&wd->timer, qdisc_watchdog, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init); @@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); - hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); + tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { - hrtimer_cancel(&wd->timer); + tasklet_hrtimer_cancel(&wd->timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; } EXPORT_SYMBOL(qdisc_watchdog_cancel); -- cgit v1.2.3-70-g09d2 From 38acce2d7983632100a9ff3fd20295f6e34074a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Aug 2009 16:51:38 -0700 Subject: pkt_sched: Convert CBQ to tasklet_hrtimer. This code expects to run in softirq context, and bare hrtimers run in hw IRQ context. Signed-off-by: David S. Miller Acked-by: Thomas Gleixner --- net/sched/sch_cbq.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d5798e17a83..81652d6ccd3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -163,7 +163,7 @@ struct cbq_sched_data psched_time_t now_rt; /* Cached real time */ unsigned pmask; - struct hrtimer delay_timer; + struct tasklet_hrtimer delay_timer; struct qdisc_watchdog watchdog; /* Watchdog timer, started when CBQ has backlog, but cannot @@ -503,6 +503,8 @@ static void cbq_ovl_delay(struct cbq_class *cl) cl->undertime = q->now + delay; if (delay > 0) { + struct hrtimer *ht; + sched += delay + cl->penalty; cl->penalized = sched; cl->cpriority = TC_CBQ_MAXPRIO; @@ -510,12 +512,12 @@ static void cbq_ovl_delay(struct cbq_class *cl) expires = ktime_set(0, 0); expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched)); - if (hrtimer_try_to_cancel(&q->delay_timer) && - ktime_to_ns(ktime_sub( - hrtimer_get_expires(&q->delay_timer), - expires)) > 0) - hrtimer_set_expires(&q->delay_timer, expires); - hrtimer_restart(&q->delay_timer); + ht = &q->delay_timer.timer; + if (hrtimer_try_to_cancel(ht) && + ktime_to_ns(ktime_sub(hrtimer_get_expires(ht), + expires)) > 0) + hrtimer_set_expires(ht, expires); + hrtimer_restart(ht); cl->delayed = 1; cl->xstats.overactions++; return; @@ -621,7 +623,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay)); - hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); + tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } sch->flags &= ~TCQ_F_THROTTLED; @@ -1214,7 +1216,7 @@ cbq_reset(struct Qdisc* sch) q->tx_class = NULL; q->tx_borrowed = NULL; qdisc_watchdog_cancel(&q->watchdog); - hrtimer_cancel(&q->delay_timer); + tasklet_hrtimer_cancel(&q->delay_timer); q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); q->now_rt = q->now; @@ -1397,7 +1399,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.minidle = -0x7FFFFFFF; qdisc_watchdog_init(&q->watchdog, sch); - hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + tasklet_hrtimer_init(&q->delay_timer, cbq_undelay, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); -- cgit v1.2.3-70-g09d2 From ca6982b858e1d08010c1d29d8e8255b2ac2ad70a Mon Sep 17 00:00:00 2001 From: Bruno Prémont Date: Sun, 23 Aug 2009 19:06:28 -0700 Subject: ipv6: Fix commit 63d9950b08184e6531adceb65f64b429909cc101 (ipv6: Make v4-mapped bindings consistent with IPv4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 63d9950b08184e6531adceb65f64b429909cc101 (ipv6: Make v4-mapped bindings consistent with IPv4) changes behavior of inet6_bind() for v4-mapped addresses so it should behave the same way as inet_bind(). During this change setting of err to -EADDRNOTAVAIL got lost: af_inet.c:469 inet_bind() err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; af_inet6.c:463 inet6_bind() if (addr_type == IPV6_ADDR_MAPPED) { int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket * makes no sense */ if (np->ipv6only) { err = -EINVAL; goto out; } /* Reproduce AF_INET checks to make the bindings consitant */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type(net, v4addr); if (!sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; } else { Signed-off-by Bruno Prémont Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index caa0278d30a..45f9a2a42d5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -306,8 +306,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && - chk_addr_ret != RTN_BROADCAST) + chk_addr_ret != RTN_BROADCAST) { + err = -EADDRNOTAVAIL; goto out; + } } else { if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; -- cgit v1.2.3-70-g09d2 From 2149f66f49ab07515666127bf5140c5c94677af8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Aug 2009 02:47:34 +0000 Subject: netfilter: xt_quota: fix wrong return value (error case) Success was indicated on a memory allocation failure, thereby causing a crash due to a later NULL deref. (Affects v2.6.30-rc1 up to here.) Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 98fc190e8f0..390b7d09fe5 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -52,7 +52,7 @@ static bool quota_mt_check(const struct xt_mtchk_param *par) q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); if (q->master == NULL) - return -ENOMEM; + return false; q->master->quota = q->quota; return true; -- cgit v1.2.3-70-g09d2 From 79b1bee888d43b14cf0c08fb8e5aa6cb161e48f8 Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Fri, 21 Aug 2009 03:33:36 +0000 Subject: netpoll: warning for ndo_start_xmit returns with interrupts enabled WARN_ONCE for ndo_start_xmit() enable interrupts in netpoll_send_skb(), because the NETPOLL API requires that interrupts remain disabled in netpoll_send_skb(). Signed-off-by: Dongdong Deng Acked-by: Matt Mackall Signed-off-by: David S. Miller --- net/core/netpoll.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index df30feb2fc7..1b76eb11deb 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -319,6 +319,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) udelay(USEC_PER_POLL); } + + WARN_ONCE(!irqs_disabled(), + "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", + dev->name, ops->ndo_start_xmit); + local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From 28e9fc592cb8c7a43e4d3147b38be6032a0e81bc Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sun, 23 Aug 2009 22:55:51 -0700 Subject: NET: llc, zero sockaddr_llc struct sllc_arphrd member of sockaddr_llc might not be changed. Zero sllc before copying to the above layer's structure. Signed-off-by: Jiri Slaby Signed-off-by: David S. Miller --- net/llc/af_llc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9208cf5f2bd..c45eee1c0e8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -914,6 +914,7 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct llc_sock *llc = llc_sk(sk); int rc = 0; + memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; -- cgit v1.2.3-70-g09d2 From a2cb6a4dd470d7a64255a10b843b0d188416b78f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Aug 2009 19:37:05 -0700 Subject: pkt_sched: Fix bogon in tasklet_hrtimer changes. Reported by Stephen Rothwell, luckily it's harmless: net/sched/sch_api.c: In function 'qdisc_watchdog': net/sched/sch_api.c:460: warning: initialization from incompatible pointer type net/sched/sch_cbq.c: In function 'cbq_undelay': net/sched/sch_cbq.c:595: warning: initialization from incompatible pointer type Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- net/sched/sch_cbq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e1c2bf7e9ba..92e6f3a52c1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_warn_nonwc); static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, - timer); + timer.timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; __netif_schedule(qdisc_root(wd->qdisc)); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 81652d6ccd3..149b0405c5e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -593,7 +593,7 @@ static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio, static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) { struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data, - delay_timer); + delay_timer.timer); struct Qdisc *sch = q->watchdog.qdisc; psched_time_t now; psched_tdiff_t delay = 0; -- cgit v1.2.3-70-g09d2 From 788d908f2879a17e5f80924f3da2e23f1034482d Mon Sep 17 00:00:00 2001 From: Julien TINNES Date: Thu, 27 Aug 2009 15:26:58 +0200 Subject: ipv4: make ip_append_data() handle NULL routing table Add a check in ip_append_data() for NULL *rtp to prevent future bugs in callers from being exploitable. Signed-off-by: Julien Tinnes Signed-off-by: Tavis Ormandy Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- net/ipv4/ip_output.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7d082105472..7ffcd96fe59 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -813,6 +813,8 @@ int ip_append_data(struct sock *sk, inet->cork.addr = ipc->addr; } rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; /* * We steal reference to this route, caller should not release it */ -- cgit v1.2.3-70-g09d2 From 2574cc9f4ffc6c681c9177111357efe5b76f0e36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Aug 2009 11:12:12 -0400 Subject: SUNRPC: Fix rpc_task_force_reencode This patch fixes the bug that was reported in http://bugzilla.kernel.org/show_bug.cgi?id=14053 If we're in the case where we need to force a reencode and then resend of the RPC request, due to xprt_transmit failing with a networking error, then we _must_ retransmit the entire request. Signed-off-by: Trond Myklebust Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ebfcf9b8990..df1039f077c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -937,6 +937,7 @@ static inline void rpc_task_force_reencode(struct rpc_task *task) { task->tk_rqstp->rq_snd_buf.len = 0; + task->tk_rqstp->rq_bytes_sent = 0; } static inline void -- cgit v1.2.3-70-g09d2 From d66ee0587c3927aea5178a822976c7c853d815fe Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 30 Aug 2009 23:15:36 +0000 Subject: net: sk_free() should be allowed right after sk_alloc() After commit 2b85a34e911bf483c27cfdd124aeb1605145dc80 (net: No more expensive sock_hold()/sock_put() on each tx) sk_free() frees socks conditionally and depends on sk_wmem_alloc being set e.g. in sock_init_data(). But in some cases sk_free() is called earlier, usually after other alloc errors. Fix is to move sk_wmem_alloc initialization from sock_init_data() to sk_alloc() itself. Signed-off-by: Jarek Poplawski Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index bbb25be7ddf..76334228ed1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1025,6 +1025,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); sock_net_set(sk, get_net(net)); + atomic_set(&sk->sk_wmem_alloc, 1); } return sk; @@ -1872,7 +1873,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) */ smp_wmb(); atomic_set(&sk->sk_refcnt, 1); - atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data); -- cgit v1.2.3-70-g09d2 From 2fbd3da3877ad8d923b055e5996f80b4d4a6daf4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Sep 2009 17:59:25 -0700 Subject: pkt_sched: Revert tasklet_hrtimer changes. These are full of unresolved problems, mainly that conversions don't work 1-1 from hrtimers to tasklet_hrtimers because unlike hrtimers tasklets can't be killed from softirq context. And when a qdisc gets reset, that's exactly what we need to do here. We'll work this out in the net-next-2.6 tree and if warranted we'll backport that work to -stable. This reverts the following 3 changesets: a2cb6a4dd470d7a64255a10b843b0d188416b78f ("pkt_sched: Fix bogon in tasklet_hrtimer changes.") 38acce2d7983632100a9ff3fd20295f6e34074a8 ("pkt_sched: Convert CBQ to tasklet_hrtimer.") ee5f9757ea17759e1ce5503bdae2b07e48e32af9 ("pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer") Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 ++-- net/sched/sch_api.c | 10 +++++----- net/sched/sch_cbq.c | 25 +++++++++++-------------- 3 files changed, 18 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7eafb8d5447..82a3191375f 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { - struct tasklet_hrtimer timer; - struct Qdisc *qdisc; + struct hrtimer timer; + struct Qdisc *qdisc; }; extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 92e6f3a52c1..24d17ce9c29 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_warn_nonwc); static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, - timer.timer); + timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; __netif_schedule(qdisc_root(wd->qdisc)); @@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { - tasklet_hrtimer_init(&wd->timer, qdisc_watchdog, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + wd->timer.function = qdisc_watchdog; wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init); @@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); - tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); + hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { - tasklet_hrtimer_cancel(&wd->timer); + hrtimer_cancel(&wd->timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; } EXPORT_SYMBOL(qdisc_watchdog_cancel); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 149b0405c5e..d5798e17a83 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -163,7 +163,7 @@ struct cbq_sched_data psched_time_t now_rt; /* Cached real time */ unsigned pmask; - struct tasklet_hrtimer delay_timer; + struct hrtimer delay_timer; struct qdisc_watchdog watchdog; /* Watchdog timer, started when CBQ has backlog, but cannot @@ -503,8 +503,6 @@ static void cbq_ovl_delay(struct cbq_class *cl) cl->undertime = q->now + delay; if (delay > 0) { - struct hrtimer *ht; - sched += delay + cl->penalty; cl->penalized = sched; cl->cpriority = TC_CBQ_MAXPRIO; @@ -512,12 +510,12 @@ static void cbq_ovl_delay(struct cbq_class *cl) expires = ktime_set(0, 0); expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched)); - ht = &q->delay_timer.timer; - if (hrtimer_try_to_cancel(ht) && - ktime_to_ns(ktime_sub(hrtimer_get_expires(ht), - expires)) > 0) - hrtimer_set_expires(ht, expires); - hrtimer_restart(ht); + if (hrtimer_try_to_cancel(&q->delay_timer) && + ktime_to_ns(ktime_sub( + hrtimer_get_expires(&q->delay_timer), + expires)) > 0) + hrtimer_set_expires(&q->delay_timer, expires); + hrtimer_restart(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; return; @@ -593,7 +591,7 @@ static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio, static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) { struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data, - delay_timer.timer); + delay_timer); struct Qdisc *sch = q->watchdog.qdisc; psched_time_t now; psched_tdiff_t delay = 0; @@ -623,7 +621,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay)); - tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); + hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } sch->flags &= ~TCQ_F_THROTTLED; @@ -1216,7 +1214,7 @@ cbq_reset(struct Qdisc* sch) q->tx_class = NULL; q->tx_borrowed = NULL; qdisc_watchdog_cancel(&q->watchdog); - tasklet_hrtimer_cancel(&q->delay_timer); + hrtimer_cancel(&q->delay_timer); q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); q->now_rt = q->now; @@ -1399,8 +1397,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.minidle = -0x7FFFFFFF; qdisc_watchdog_init(&q->watchdog, sch); - tasklet_hrtimer_init(&q->delay_timer, cbq_undelay, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); -- cgit v1.2.3-70-g09d2 From 16ebb5e0b36ceadc8186f71d68b0c4fa4b6e781b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Sep 2009 02:40:09 +0000 Subject: tc: Fix unitialized kernel memory leak Three bytes of uninitialized kernel memory are currently leaked to user Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c29..fdb694e9f75 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1456,6 +1456,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; -- cgit v1.2.3-70-g09d2