diff options
Diffstat (limited to 'net')
272 files changed, 8765 insertions, 4569 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 66821cd64a7..71bc110aebf 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,9 +13,19 @@ menuconfig NET_9P If unsure, say N. +config NET_9P_FD + depends on NET_9P + default y if NET_9P + tristate "9P File Descriptor Transports (Experimental)" + help + This builds support for file descriptor transports for 9p + which includes support for TCP/IP, named pipes, or passed + file descriptors. TCP/IP is the default transport for 9p, + so if you are going to use 9p, you'll likely want this. + config NET_9P_DEBUG bool "Debug information" depends on NET_9P help - Say Y if you want the 9P subsistem to log debug information. + Say Y if you want the 9P subsystem to log debug information. diff --git a/net/9p/Makefile b/net/9p/Makefile index 85b3a7838ac..5059bc06f8f 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_NET_9P) := 9pnet.o +obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o 9pnet-objs := \ mod.o \ - trans_fd.o \ mux.o \ client.o \ conv.o \ @@ -10,4 +10,5 @@ obj-$(CONFIG_NET_9P) := 9pnet.o fcprint.o \ util.o \ -9pnet-$(CONFIG_SYSCTL) += sysctl.o +9pnet_fd-objs := \ + trans_fd.o \ diff --git a/net/9p/client.c b/net/9p/client.c index cb170750337..af919936404 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -30,6 +30,7 @@ #include <linux/sched.h> #include <linux/uaccess.h> #include <net/9p/9p.h> +#include <linux/parser.h> #include <net/9p/transport.h> #include <net/9p/conn.h> #include <net/9p/client.h> @@ -38,7 +39,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt); static void p9_fid_destroy(struct p9_fid *fid); static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); -struct p9_client *p9_client_create(struct p9_transport *trans, int msize, +struct p9_client *p9_client_create(struct p9_trans *trans, int msize, int dotu) { int err, n; @@ -146,7 +147,7 @@ void p9_client_disconnect(struct p9_client *clnt) EXPORT_SYMBOL(p9_client_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, char *aname) + char *uname, u32 n_uname, char *aname) { int err; struct p9_fcall *tc, *rc; @@ -165,7 +166,8 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname); + tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname, + n_uname, clnt->dotu); if (IS_ERR(tc)) { err = PTR_ERR(tc); tc = NULL; @@ -190,7 +192,8 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, char *aname) +struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, + u32 n_uname, char *aname) { int err; struct p9_fcall *tc, *rc; @@ -209,7 +212,7 @@ struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, char *aname) goto error; } - tc = p9_create_tauth(fid->fid, uname, aname); + tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu); if (IS_ERR(tc)) { err = PTR_ERR(tc); tc = NULL; diff --git a/net/9p/conv.c b/net/9p/conv.c index d979d958ea1..aa2aa9884f9 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -547,7 +547,8 @@ error: } EXPORT_SYMBOL(p9_create_tversion); -struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) +struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, + u32 n_uname, int dotu) { int size; struct p9_fcall *fc; @@ -555,7 +556,16 @@ struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) struct cbuf *bufp = &buffer; /* afid[4] uname[s] aname[s] */ - size = 4 + 2 + strlen(uname) + 2 + strlen(aname); + size = 4 + 2 + 2; + if (uname) + size += strlen(uname); + + if (aname) + size += strlen(aname); + + if (dotu) + size += 4; /* n_uname */ + fc = p9_create_common(bufp, size, P9_TAUTH); if (IS_ERR(fc)) goto error; @@ -563,6 +573,8 @@ struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname) p9_put_int32(bufp, afid, &fc->params.tauth.afid); p9_put_str(bufp, uname, &fc->params.tauth.uname); p9_put_str(bufp, aname, &fc->params.tauth.aname); + if (dotu) + p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname); if (buf_check_overflow(bufp)) { kfree(fc); @@ -574,7 +586,8 @@ error: EXPORT_SYMBOL(p9_create_tauth); struct p9_fcall * -p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) +p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, + u32 n_uname, int dotu) { int size; struct p9_fcall *fc; @@ -582,7 +595,16 @@ p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) struct cbuf *bufp = &buffer; /* fid[4] afid[4] uname[s] aname[s] */ - size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname); + size = 4 + 4 + 2 + 2; + if (uname) + size += strlen(uname); + + if (aname) + size += strlen(aname); + + if (dotu) + size += 4; /* n_uname */ + fc = p9_create_common(bufp, size, P9_TATTACH); if (IS_ERR(fc)) goto error; @@ -591,6 +613,8 @@ p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname) p9_put_int32(bufp, afid, &fc->params.tattach.afid); p9_put_str(bufp, uname, &fc->params.tattach.uname); p9_put_str(bufp, aname, &fc->params.tattach.aname); + if (dotu) + p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname); error: return fc; diff --git a/net/9p/mod.c b/net/9p/mod.c index 4f9e1d2ac25..41d70f47375 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -27,6 +27,10 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <net/9p/9p.h> +#include <linux/fs.h> +#include <linux/parser.h> +#include <net/9p/transport.h> +#include <linux/list.h> #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -37,8 +41,64 @@ MODULE_PARM_DESC(debug, "9P debugging level"); extern int p9_mux_global_init(void); extern void p9_mux_global_exit(void); -extern int p9_sysctl_register(void); -extern void p9_sysctl_unregister(void); + +/* + * Dynamic Transport Registration Routines + * + */ + +static LIST_HEAD(v9fs_trans_list); +static struct p9_trans_module *v9fs_default_transport; + +/** + * v9fs_register_trans - register a new transport with 9p + * @m - structure describing the transport module and entry points + * + */ +void v9fs_register_trans(struct p9_trans_module *m) +{ + list_add_tail(&m->list, &v9fs_trans_list); + if (m->def) + v9fs_default_transport = m; +} +EXPORT_SYMBOL(v9fs_register_trans); + +/** + * v9fs_match_trans - match transport versus registered transports + * @arg: string identifying transport + * + */ +struct p9_trans_module *v9fs_match_trans(const substring_t *name) +{ + struct list_head *p; + struct p9_trans_module *t = NULL; + + list_for_each(p, &v9fs_trans_list) { + t = list_entry(p, struct p9_trans_module, list); + if (strncmp(t->name, name->from, name->to-name->from) == 0) + break; + } + return t; +} +EXPORT_SYMBOL(v9fs_match_trans); + +/** + * v9fs_default_trans - returns pointer to default transport + * + */ + +struct p9_trans_module *v9fs_default_trans(void) +{ + if (v9fs_default_transport) + return v9fs_default_transport; + else if (!list_empty(&v9fs_trans_list)) + return list_first_entry(&v9fs_trans_list, + struct p9_trans_module, list); + else + return NULL; +} +EXPORT_SYMBOL(v9fs_default_trans); + /** * v9fs_init - Initialize module @@ -56,12 +116,6 @@ static int __init init_p9(void) return ret; } - ret = p9_sysctl_register(); - if (ret) { - printk(KERN_WARNING "9p: registering sysctl failed\n"); - return ret; - } - return ret; } @@ -72,7 +126,6 @@ static int __init init_p9(void) static void __exit exit_p9(void) { - p9_sysctl_unregister(); p9_mux_global_exit(); } diff --git a/net/9p/mux.c b/net/9p/mux.c index 5d70558c4c6..f14014793be 100644 --- a/net/9p/mux.c +++ b/net/9p/mux.c @@ -31,6 +31,7 @@ #include <linux/idr.h> #include <linux/mutex.h> #include <net/9p/9p.h> +#include <linux/parser.h> #include <net/9p/transport.h> #include <net/9p/conn.h> @@ -71,7 +72,7 @@ struct p9_conn { struct p9_mux_poll_task *poll_task; int msize; unsigned char *extended; - struct p9_transport *trans; + struct p9_trans *trans; struct p9_idpool *tagpool; int err; wait_queue_head_t equeue; @@ -271,7 +272,7 @@ static void p9_mux_poll_stop(struct p9_conn *m) * @msize - maximum message size * @extended - pointer to the extended flag */ -struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize, +struct p9_conn *p9_conn_create(struct p9_trans *trans, int msize, unsigned char *extended) { int i, n; diff --git a/net/9p/sysctl.c b/net/9p/sysctl.c deleted file mode 100644 index 8b61027a24e..00000000000 --- a/net/9p/sysctl.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * net/9p/sysctl.c - * - * 9P sysctl interface - * - * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/init.h> -#include <net/9p/9p.h> - -static struct ctl_table p9_table[] = { -#ifdef CONFIG_NET_9P_DEBUG - { - .ctl_name = CTL_UNNUMBERED, - .procname = "debug", - .data = &p9_debug_level, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#endif - {}, -}; - -static struct ctl_table p9_net_table[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "9p", - .maxlen = 0, - .mode = 0555, - .child = p9_table, - }, - {}, -}; - -static struct ctl_table p9_ctl_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .maxlen = 0, - .mode = 0555, - .child = p9_net_table, - }, - {}, -}; - -static struct ctl_table_header *p9_table_header; - -int __init p9_sysctl_register(void) -{ - p9_table_header = register_sysctl_table(p9_ctl_table); - if (!p9_table_header) - return -ENOMEM; - - return 0; -} - -void __exit p9_sysctl_unregister(void) -{ - unregister_sysctl_table(p9_table_header); -} diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fd636e94358..30269a4ff22 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -5,7 +5,7 @@ * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004-2005 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004-2007 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> * * This program is free software; you can redistribute it and/or modify @@ -36,160 +36,114 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/parser.h> #include <net/9p/9p.h> #include <net/9p/transport.h> #define P9_PORT 564 +#define MAX_SOCK_BUF (64*1024) + + +struct p9_fd_opts { + int rfd; + int wfd; + u16 port; +}; struct p9_trans_fd { struct file *rd; struct file *wr; }; -static int p9_socket_open(struct p9_transport *trans, struct socket *csocket); -static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd); -static int p9_fd_read(struct p9_transport *trans, void *v, int len); -static int p9_fd_write(struct p9_transport *trans, void *v, int len); -static unsigned int p9_fd_poll(struct p9_transport *trans, - struct poll_table_struct *pt); -static void p9_fd_close(struct p9_transport *trans); - -struct p9_transport *p9_trans_create_tcp(const char *addr, int port) -{ - int err; - struct p9_transport *trans; - struct socket *csocket; - struct sockaddr_in sin_server; - - csocket = NULL; - trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->write = p9_fd_write; - trans->read = p9_fd_read; - trans->close = p9_fd_close; - trans->poll = p9_fd_poll; - - sin_server.sin_family = AF_INET; - sin_server.sin_addr.s_addr = in_aton(addr); - sin_server.sin_port = htons(port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - - if (!csocket) { - P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); - err = -EIO; - goto error; - } - - err = csocket->ops->connect(csocket, - (struct sockaddr *)&sin_server, - sizeof(struct sockaddr_in), 0); - if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_tcp: problem connecting socket to %s\n", - addr); - goto error; - } - - err = p9_socket_open(trans, csocket); - if (err < 0) - goto error; +/* + * Option Parsing (code inspired by NFS code) + * - a little lazy - parse all fd-transport options + */ - return trans; +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rfdno, Opt_wfdno, +}; -error: - if (csocket) - sock_release(csocket); +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_rfdno, "rfdno=%u"}, + {Opt_wfdno, "wfdno=%u"}, +}; - kfree(trans); - return ERR_PTR(err); -} -EXPORT_SYMBOL(p9_trans_create_tcp); +/** + * v9fs_parse_options - parse mount options into session structure + * @options: options string passed from mount + * @v9ses: existing v9fs session information + * + */ -struct p9_transport *p9_trans_create_unix(const char *addr) +static void parse_opts(char *options, struct p9_fd_opts *opts) { - int err; - struct socket *csocket; - struct sockaddr_un sun_server; - struct p9_transport *trans; - - csocket = NULL; - trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int ret; - trans->write = p9_fd_write; - trans->read = p9_fd_read; - trans->close = p9_fd_close; - trans->poll = p9_fd_poll; + opts->port = P9_PORT; + opts->rfd = ~0; + opts->wfd = ~0; - if (strlen(addr) > UNIX_PATH_MAX) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", - addr); - err = -ENAMETOOLONG; - goto error; - } + if (!options) + return; - sun_server.sun_family = PF_UNIX; - strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); - err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, - sizeof(struct sockaddr_un) - 1, 0); - if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_unix: problem connecting socket: %s: %d\n", - addr, err); - goto error; + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + token = match_token(p, tokens, args); + ret = match_int(&args[0], &option); + if (ret < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_rfdno: + opts->rfd = option; + break; + case Opt_wfdno: + opts->wfd = option; + break; + default: + continue; + } } - - err = p9_socket_open(trans, csocket); - if (err < 0) - goto error; - - return trans; - -error: - if (csocket) - sock_release(csocket); - - kfree(trans); - return ERR_PTR(err); } -EXPORT_SYMBOL(p9_trans_create_unix); -struct p9_transport *p9_trans_create_fd(int rfd, int wfd) +static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) { - int err; - struct p9_transport *trans; + struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + GFP_KERNEL); + if (!ts) + return -ENOMEM; - if (rfd == ~0 || wfd == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return ERR_PTR(-ENOPROTOOPT); + ts->rd = fget(rfd); + ts->wr = fget(wfd); + if (!ts->rd || !ts->wr) { + if (ts->rd) + fput(ts->rd); + if (ts->wr) + fput(ts->wr); + kfree(ts); + return -EIO; } - trans = kmalloc(sizeof(struct p9_transport), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->write = p9_fd_write; - trans->read = p9_fd_read; - trans->close = p9_fd_close; - trans->poll = p9_fd_poll; - - err = p9_fd_open(trans, rfd, wfd); - if (err < 0) - goto error; - - return trans; + trans->priv = ts; + trans->status = Connected; -error: - kfree(trans); - return ERR_PTR(err); + return 0; } -EXPORT_SYMBOL(p9_trans_create_fd); -static int p9_socket_open(struct p9_transport *trans, struct socket *csocket) +static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) { int fd, ret; @@ -212,30 +166,6 @@ static int p9_socket_open(struct p9_transport *trans, struct socket *csocket) return 0; } -static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd) -{ - struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), - GFP_KERNEL); - if (!ts) - return -ENOMEM; - - ts->rd = fget(rfd); - ts->wr = fget(wfd); - if (!ts->rd || !ts->wr) { - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); - return -EIO; - } - - trans->priv = ts; - trans->status = Connected; - - return 0; -} - /** * p9_fd_read- read from a fd * @v9ses: session information @@ -243,7 +173,7 @@ static int p9_fd_open(struct p9_transport *trans, int rfd, int wfd) * @len: size of receive buffer * */ -static int p9_fd_read(struct p9_transport *trans, void *v, int len) +static int p9_fd_read(struct p9_trans *trans, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; @@ -270,7 +200,7 @@ static int p9_fd_read(struct p9_transport *trans, void *v, int len) * @len: size of send buffer * */ -static int p9_fd_write(struct p9_transport *trans, void *v, int len) +static int p9_fd_write(struct p9_trans *trans, void *v, int len) { int ret; mm_segment_t oldfs; @@ -297,7 +227,7 @@ static int p9_fd_write(struct p9_transport *trans, void *v, int len) } static unsigned int -p9_fd_poll(struct p9_transport *trans, struct poll_table_struct *pt) +p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) { int ret, n; struct p9_trans_fd *ts = NULL; @@ -341,7 +271,7 @@ end: * @trans: private socket structure * */ -static void p9_fd_close(struct p9_transport *trans) +static void p9_fd_close(struct p9_trans *trans) { struct p9_trans_fd *ts; @@ -361,3 +291,182 @@ static void p9_fd_close(struct p9_transport *trans) kfree(ts); } +static struct p9_trans *p9_trans_create_tcp(const char *addr, char *args) +{ + int err; + struct p9_trans *trans; + struct socket *csocket; + struct sockaddr_in sin_server; + struct p9_fd_opts opts; + + parse_opts(args, &opts); + + csocket = NULL; + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + sin_server.sin_family = AF_INET; + sin_server.sin_addr.s_addr = in_aton(addr); + sin_server.sin_port = htons(opts.port); + sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + + if (!csocket) { + P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + err = -EIO; + goto error; + } + + err = csocket->ops->connect(csocket, + (struct sockaddr *)&sin_server, + sizeof(struct sockaddr_in), 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_tcp: problem connecting socket to %s\n", + addr); + goto error; + } + + err = p9_socket_open(trans, csocket); + if (err < 0) + goto error; + + return trans; + +error: + if (csocket) + sock_release(csocket); + + kfree(trans); + return ERR_PTR(err); +} + +static struct p9_trans *p9_trans_create_unix(const char *addr, char *args) +{ + int err; + struct socket *csocket; + struct sockaddr_un sun_server; + struct p9_trans *trans; + + csocket = NULL; + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + if (strlen(addr) > UNIX_PATH_MAX) { + P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", + addr); + err = -ENAMETOOLONG; + goto error; + } + + sun_server.sun_family = PF_UNIX; + strcpy(sun_server.sun_path, addr); + sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_unix: problem connecting socket: %s: %d\n", + addr, err); + goto error; + } + + err = p9_socket_open(trans, csocket); + if (err < 0) + goto error; + + return trans; + +error: + if (csocket) + sock_release(csocket); + + kfree(trans); + return ERR_PTR(err); +} + +static struct p9_trans *p9_trans_create_fd(const char *name, char *args) +{ + int err; + struct p9_trans *trans; + struct p9_fd_opts opts; + + parse_opts(args, &opts); + + if (opts.rfd == ~0 || opts.wfd == ~0) { + printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + return ERR_PTR(-ENOPROTOOPT); + } + + trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); + if (!trans) + return ERR_PTR(-ENOMEM); + + trans->write = p9_fd_write; + trans->read = p9_fd_read; + trans->close = p9_fd_close; + trans->poll = p9_fd_poll; + + err = p9_fd_open(trans, opts.rfd, opts.wfd); + if (err < 0) + goto error; + + return trans; + +error: + kfree(trans); + return ERR_PTR(err); +} + +static struct p9_trans_module p9_tcp_trans = { + .name = "tcp", + .maxsize = MAX_SOCK_BUF, + .def = 1, + .create = p9_trans_create_tcp, +}; + +static struct p9_trans_module p9_unix_trans = { + .name = "unix", + .maxsize = MAX_SOCK_BUF, + .def = 0, + .create = p9_trans_create_unix, +}; + +static struct p9_trans_module p9_fd_trans = { + .name = "fd", + .maxsize = MAX_SOCK_BUF, + .def = 0, + .create = p9_trans_create_fd, +}; + +static int __init p9_trans_fd_init(void) +{ + v9fs_register_trans(&p9_tcp_trans); + v9fs_register_trans(&p9_unix_trans); + v9fs_register_trans(&p9_fd_trans); + + return 1; +} + +static void __exit p9_trans_fd_exit(void) { + printk(KERN_ERR "Removal of 9p transports not implemented\n"); + BUG(); +} + +module_init(p9_trans_fd_init); +module_exit(p9_trans_fd_exit); + +MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index c742d37bfb9..ba6428f204f 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -24,16 +24,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary #include "common.h" -/* - * Define this to use a version of the code which interacts with the higher - * layers in a more intellegent way, by always reserving enough space for - * our header at the begining of the packet. However, there may still be - * some problems with programs like tcpdump. In 2.5 we'll sort out what - * we need to do to get this perfect. For now we just will copy the packet - * if we need space for the header - */ -/* #define FASTER_VERSION */ - #ifdef SKB_DEBUG static void skb_debug(const struct sk_buff *skb) { @@ -69,9 +59,7 @@ struct br2684_vcc { #ifdef CONFIG_ATM_BR2684_IPFILTER struct br2684_filter filter; #endif /* CONFIG_ATM_BR2684_IPFILTER */ -#ifndef FASTER_VERSION unsigned copies_needed, copies_failed; -#endif /* FASTER_VERSION */ }; struct br2684_dev { @@ -147,13 +135,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, struct br2684_vcc *brvcc) { struct atm_vcc *atmvcc; -#ifdef FASTER_VERSION - if (brvcc->encaps == e_llc) - memcpy(skb_push(skb, 8), llc_oui_pid_pad, 8); - /* last 2 bytes of llc_oui_pid_pad are managed by header routines; - yes, you got it: 8 + 2 = sizeof(llc_oui_pid_pad) - */ -#else int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; if (skb_headroom(skb) < minheadroom) { struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom); @@ -170,7 +151,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10); else memset(skb->data, 0, 2); -#endif /* FASTER_VERSION */ skb_debug(skb); ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; @@ -237,87 +217,6 @@ static struct net_device_stats *br2684_get_stats(struct net_device *dev) return &BRPRIV(dev)->stats; } -#ifdef FASTER_VERSION -/* - * These mirror eth_header and eth_header_cache. They are not usually - * exported for use in modules, so we grab them from net_device - * after ether_setup() is done with it. Bit of a hack. - */ -static int (*my_eth_header)(struct sk_buff *, struct net_device *, - unsigned short, void *, void *, unsigned); -static int (*my_eth_header_cache)(struct neighbour *, struct hh_cache *); - -static int -br2684_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, unsigned len) -{ - u16 *pad_before_eth; - int t = my_eth_header(skb, dev, type, daddr, saddr, len); - if (t > 0) { - pad_before_eth = (u16 *) skb_push(skb, 2); - *pad_before_eth = 0; - return dev->hard_header_len; /* or return 16; ? */ - } else - return t; -} - -static int -br2684_header_cache(struct neighbour *neigh, struct hh_cache *hh) -{ -/* hh_data is 16 bytes long. if encaps is ether-llc we need 24, so -xmit will add the additional header part in that case */ - u16 *pad_before_eth = (u16 *)(hh->hh_data); - int t = my_eth_header_cache(neigh, hh); - DPRINTK("br2684_header_cache, neigh=%p, hh_cache=%p\n", neigh, hh); - if (t < 0) - return t; - else { - *pad_before_eth = 0; - hh->hh_len = PADLEN + ETH_HLEN; - } - return 0; -} - -/* - * This is similar to eth_type_trans, which cannot be used because of - * our dev->hard_header_len - */ -static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - struct ethhdr *eth; - unsigned char *rawp; - eth = eth_hdr(skb); - - if (is_multicast_ether_addr(eth->h_dest)) { - if (!compare_ether_addr(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } - - else if (compare_ether_addr(eth->h_dest, dev->dev_addr)) - skb->pkt_type = PACKET_OTHERHOST; - - if (ntohs(eth->h_proto) >= 1536) - return eth->h_proto; - - rawp = skb->data; - - /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. - */ - if (*(unsigned short *) rawp == 0xFFFF) - return htons(ETH_P_802_3); - - /* - * Real 802.2 LLC - */ - return htons(ETH_P_802_2); -} -#endif /* FASTER_VERSION */ /* * We remember when the MAC gets set, so we don't override it later with @@ -448,17 +347,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) return; } -#ifdef FASTER_VERSION - /* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier, - than should be. What else should I set? */ - skb_pull(skb, plen); - skb_set_mac_header(skb, -ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = br_type_trans(skb, net_dev); -#else skb_pull(skb, plen - ETH_HLEN); skb->protocol = eth_type_trans(skb, net_dev); -#endif /* FASTER_VERSION */ #ifdef CONFIG_ATM_BR2684_IPFILTER if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { brdev->stats.rx_dropped++; @@ -584,13 +474,6 @@ static void br2684_setup(struct net_device *netdev) ether_setup(netdev); brdev->net_dev = netdev; -#ifdef FASTER_VERSION - my_eth_header = netdev->hard_header; - netdev->hard_header = br2684_header; - my_eth_header_cache = netdev->hard_header_cache; - netdev->hard_header_cache = br2684_header_cache; - netdev->hard_header_len = sizeof(llc_oui_pid_pad) + ETH_HLEN; /* 10 + 14 */ -#endif my_eth_mac_addr = netdev->set_mac_address; netdev->set_mac_address = br2684_mac_addr; netdev->hard_start_xmit = br2684_start_xmit; @@ -719,16 +602,12 @@ static int br2684_seq_show(struct seq_file *seq, void *v) list_for_each_entry(brvcc, &brdev->brvccs, brvccs) { seq_printf(seq, " vcc %d.%d.%d: encaps=%s" -#ifndef FASTER_VERSION ", failed copies %u/%u" -#endif /* FASTER_VERSION */ "\n", brvcc->atmvcc->dev->number, brvcc->atmvcc->vpi, brvcc->atmvcc->vci, (brvcc->encaps == e_llc) ? "LLC" : "VC" -#ifndef FASTER_VERSION , brvcc->copies_failed , brvcc->copies_needed -#endif /* FASTER_VERSION */ ); #ifdef CONFIG_ATM_BR2684_IPFILTER #define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte] diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index e37d217a986..8273b1200ee 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -75,7 +75,7 @@ static int ax25_ds_state1_machine(ax25_cb *ax25, struct sk_buff *skb, int framet } ax25_dama_on(ax25); - /* according to DK4EG´s spec we are required to + /* according to DK4EG's spec we are required to * send a RR RESPONSE FINAL NR=0. */ diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index a49773ff2b9..b5e59787be2 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -41,7 +41,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_cb *ax25o; struct hlist_node *node; - /* Please note that neither DK4EG´s nor DG2FEF´s + /* Please note that neither DK4EG's nor DG2FEF's * DAMA spec mention the following behaviour as seen * with TheFirmware: * diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 1f78c3e336d..347e935faaf 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -2,7 +2,7 @@ BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by - Clément Moreau <clement.moreau@inventel.fr> + Clément Moreau <clement.moreau@inventel.fr> David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 9092816f58d..95e3837e431 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -2,7 +2,7 @@ BNEP implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2001-2002 Inventel Systemes Written 2001-2002 by - Clément Moreau <clement.moreau@inventel.fr> + Clément Moreau <clement.moreau@inventel.fr> David Libault <david.libault@inventel.fr> Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5fdfc9a67d3..9483320f6da 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -78,11 +78,11 @@ void hci_acl_connect(struct hci_conn *conn) cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK); if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) - cp.role_switch = 0x01; + cp.role_switch = 0x01; else - cp.role_switch = 0x00; + cp.role_switch = 0x00; - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); } static void hci_acl_connect_cancel(struct hci_conn *conn) @@ -95,8 +95,7 @@ static void hci_acl_connect_cancel(struct hci_conn *conn) return; bacpy(&cp.bdaddr, &conn->dst); - hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_CREATE_CONN_CANCEL, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); } void hci_acl_disconn(struct hci_conn *conn, __u8 reason) @@ -109,8 +108,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) cp.handle = cpu_to_le16(conn->handle); cp.reason = reason; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_DISCONNECT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp); } void hci_add_sco(struct hci_conn *conn, __u16 handle) @@ -126,7 +124,29 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) cp.handle = cpu_to_le16(handle); cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); +} + +void hci_setup_sync(struct hci_conn *conn, __u16 handle) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_setup_sync_conn cp; + + BT_DBG("%p", conn); + + conn->state = BT_CONNECT; + conn->out = 1; + + cp.handle = cpu_to_le16(handle); + cp.pkt_type = cpu_to_le16(hdev->esco_type); + + cp.tx_bandwidth = cpu_to_le32(0x00001f40); + cp.rx_bandwidth = cpu_to_le32(0x00001f40); + cp.max_latency = cpu_to_le16(0xffff); + cp.voice_setting = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; + + hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } static void hci_conn_timeout(unsigned long arg) @@ -143,7 +163,10 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: - hci_acl_connect_cancel(conn); + if (conn->type == ACL_LINK) + hci_acl_connect_cancel(conn); + else + hci_acl_disconn(conn, 0x13); break; case BT_CONNECTED: hci_acl_disconn(conn, 0x13); @@ -330,8 +353,12 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_conn_hold(sco); if (acl->state == BT_CONNECTED && - (sco->state == BT_OPEN || sco->state == BT_CLOSED)) - hci_add_sco(sco, acl->handle); + (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { + if (lmp_esco_capable(hdev)) + hci_setup_sync(sco, acl->handle); + else + hci_add_sco(sco, acl->handle); + } return sco; } @@ -348,7 +375,7 @@ int hci_conn_auth(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); } return 0; } @@ -369,7 +396,7 @@ int hci_conn_encrypt(struct hci_conn *conn) struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); } return 0; } @@ -383,7 +410,7 @@ int hci_conn_change_link_key(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_change_conn_link_key cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); } return 0; } @@ -401,7 +428,7 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) struct hci_cp_switch_role cp; bacpy(&cp.bdaddr, &conn->dst); cp.role = role; - hci_send_cmd(conn->hdev, OGF_LINK_POLICY, OCF_SWITCH_ROLE, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); } return 0; } @@ -423,8 +450,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn) if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { struct hci_cp_exit_sniff_mode cp; cp.handle = cpu_to_le16(conn->handle); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_EXIT_SNIFF_MODE, sizeof(cp), &cp); } timer: @@ -455,8 +481,7 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn) cp.max_latency = cpu_to_le16(0); cp.min_remote_timeout = cpu_to_le16(0); cp.min_local_timeout = cpu_to_le16(0); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_SNIFF_SUBRATE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_SNIFF_SUBRATE, sizeof(cp), &cp); } if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { @@ -466,8 +491,7 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn) cp.min_interval = cpu_to_le16(hdev->sniff_min_interval); cp.attempt = cpu_to_le16(4); cp.timeout = cpu_to_le16(1); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_SNIFF_MODE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_SNIFF_MODE, sizeof(cp), &cp); } } @@ -493,6 +517,22 @@ void hci_conn_hash_flush(struct hci_dev *hdev) } } +/* Check pending connect attempts */ +void hci_conn_check_pending(struct hci_dev *hdev) +{ + struct hci_conn *conn; + + BT_DBG("hdev %s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); + if (conn) + hci_acl_connect(conn); + + hci_dev_unlock(hdev); +} + int hci_get_conn_list(void __user *arg) { struct hci_conn_list_req req, *cl; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 18e3afc964d..372b0d3b75a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -176,7 +176,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %ld", hdev->name, opt); /* Reset device */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_RESET, 0, NULL); + hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } static void hci_init_req(struct hci_dev *hdev, unsigned long opt) @@ -202,16 +202,16 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Reset */ if (test_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks)) - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_RESET, 0, NULL); + hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); /* Read Local Supported Features */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); /* Read Local Version */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_VERSION, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); /* Read Buffer Size (ACL mtu, max pkt, etc.) */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); #if 0 /* Host buffer size */ @@ -221,29 +221,35 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) cp.sco_mtu = HCI_MAX_SCO_SIZE; cp.acl_max_pkt = cpu_to_le16(0xffff); cp.sco_max_pkt = cpu_to_le16(0xffff); - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_HOST_BUFFER_SIZE, sizeof(cp), &cp); } #endif /* Read BD Address */ - hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BD_ADDR, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); + + /* Read Class of Device */ + hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + + /* Read Local Name */ + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL); /* Read Voice Setting */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_READ_VOICE_SETTING, 0, NULL); + hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL); /* Optional initialization */ /* Clear Event Filters */ flt_type = HCI_FLT_CLEAR_ALL; - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_SET_EVENT_FLT, 1, &flt_type); + hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); /* Page timeout ~20 secs */ param = cpu_to_le16(0x8000); - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, ¶m); + hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, ¶m); /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, ¶m); + hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) @@ -253,7 +259,7 @@ static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, scan); /* Inquiry and Page scans */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE, 1, &scan); + hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) @@ -263,7 +269,7 @@ static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, auth); /* Authentication */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_AUTH_ENABLE, 1, &auth); + hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); } static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) @@ -273,7 +279,7 @@ static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %x", hdev->name, encrypt); /* Authentication */ - hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_ENCRYPT_MODE, 1, &encrypt); + hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); } /* Get HCI device by index. @@ -384,7 +390,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_INQUIRY, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); } int hci_inquiry(void __user *arg) @@ -1111,13 +1117,13 @@ static int hci_send_frame(struct sk_buff *skb) } /* Send HCI command */ -int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *param) +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; struct sk_buff *skb; - BT_DBG("%s ogf 0x%x ocf 0x%x plen %d", hdev->name, ogf, ocf, plen); + BT_DBG("%s opcode 0x%x plen %d", hdev->name, opcode, plen); skb = bt_skb_alloc(len, GFP_ATOMIC); if (!skb) { @@ -1126,7 +1132,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p } hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf)); + hdr->opcode = cpu_to_le16(opcode); hdr->plen = plen; if (plen) @@ -1143,7 +1149,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p } /* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf) +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { struct hci_command_hdr *hdr; @@ -1152,10 +1158,10 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf) hdr = (void *) hdev->sent_cmd->data; - if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf))) + if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf); + BT_DBG("%s opcode 0x%x", hdev->name, opcode); return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; } @@ -1355,6 +1361,26 @@ static inline void hci_sched_sco(struct hci_dev *hdev) } } +static inline void hci_sched_esco(struct hci_dev *hdev) +{ + struct hci_conn *conn; + struct sk_buff *skb; + int quote; + + BT_DBG("%s", hdev->name); + + while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK, "e))) { + while (quote-- && (skb = skb_dequeue(&conn->data_q))) { + BT_DBG("skb %p len %d", skb, skb->len); + hci_send_frame(skb); + + conn->sent++; + if (conn->sent == ~0) + conn->sent = 0; + } + } +} + static void hci_tx_task(unsigned long arg) { struct hci_dev *hdev = (struct hci_dev *) arg; @@ -1370,6 +1396,8 @@ static void hci_tx_task(unsigned long arg) hci_sched_sco(hdev); + hci_sched_esco(hdev); + /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) hci_send_frame(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4baea1e3865..46df2e403df 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -52,234 +52,273 @@ /* Handle HCI Event packets */ -/* Command Complete OGF LINK_CTL */ -static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) { - __u8 status; - struct hci_conn *pend; + __u8 status = *((__u8 *) skb->data); - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, status); - switch (ocf) { - case OCF_INQUIRY_CANCEL: - case OCF_EXIT_PERIODIC_INQ: - status = *((__u8 *) skb->data); + if (status) + return; - if (status) { - BT_DBG("%s Inquiry cancel error: status 0x%x", hdev->name, status); - } else { - clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); - } + clear_bit(HCI_INQUIRY, &hdev->flags); - hci_dev_lock(hdev); + hci_req_complete(hdev, status); - pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (pend) - hci_acl_connect(pend); + hci_conn_check_pending(hdev); +} - hci_dev_unlock(hdev); +static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); - break; + BT_DBG("%s status 0x%x", hdev->name, status); - default: - BT_DBG("%s Command complete: ogf LINK_CTL ocf %x", hdev->name, ocf); - break; + if (status) + return; + + clear_bit(HCI_INQUIRY, &hdev->flags); + + hci_conn_check_pending(hdev); +} + +static void hci_cc_remote_name_req_cancel(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static void hci_cc_role_discovery(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_role_discovery *rp = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) { + if (rp->role) + conn->link_mode &= ~HCI_LM_MASTER; + else + conn->link_mode |= HCI_LM_MASTER; } + + hci_dev_unlock(hdev); } -/* Command Complete OGF LINK_POLICY */ -static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_rp_write_link_policy *rp = (void *) skb->data; struct hci_conn *conn; - struct hci_rp_role_discovery *rd; - struct hci_rp_write_link_policy *lp; void *sent; - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - switch (ocf) { - case OCF_ROLE_DISCOVERY: - rd = (void *) skb->data; + if (rp->status) + return; - if (rd->status) - break; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LINK_POLICY); + if (!sent) + return; - hci_dev_lock(hdev); + hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rd->handle)); - if (conn) { - if (rd->role) - conn->link_mode &= ~HCI_LM_MASTER; - else - conn->link_mode |= HCI_LM_MASTER; - } + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); + if (conn) { + __le16 policy = get_unaligned((__le16 *) (sent + 2)); + conn->link_policy = __le16_to_cpu(policy); + } - hci_dev_unlock(hdev); - break; + hci_dev_unlock(hdev); +} - case OCF_WRITE_LINK_POLICY: - sent = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY); - if (!sent) - break; +static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); - lp = (struct hci_rp_write_link_policy *) skb->data; + BT_DBG("%s status 0x%x", hdev->name, status); - if (lp->status) - break; + hci_req_complete(hdev, status); +} - hci_dev_lock(hdev); +static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(lp->handle)); - if (conn) { - __le16 policy = get_unaligned((__le16 *) (sent + 2)); - conn->link_policy = __le16_to_cpu(policy); - } + BT_DBG("%s status 0x%x", hdev->name, status); - hci_dev_unlock(hdev); - break; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_LOCAL_NAME); + if (!sent) + return; - default: - BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x", - hdev->name, ocf); - break; + if (!status) + memcpy(hdev->dev_name, sent, 248); +} + +static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_name *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + memcpy(hdev->dev_name, rp->name, 248); +} + +static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_AUTH_ENABLE); + if (!sent) + return; + + if (!status) { + __u8 param = *((__u8 *) sent); + + if (param == AUTH_ENABLED) + set_bit(HCI_AUTH, &hdev->flags); + else + clear_bit(HCI_AUTH, &hdev->flags); } + + hci_req_complete(hdev, status); } -/* Command Complete OGF HOST_CTL */ -static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) { - __u8 status, param; - __u16 setting; - struct hci_rp_read_voice_setting *vs; + __u8 status = *((__u8 *) skb->data); void *sent; - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, status); - switch (ocf) { - case OCF_RESET: - status = *((__u8 *) skb->data); - hci_req_complete(hdev, status); - break; + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_ENCRYPT_MODE); + if (!sent) + return; - case OCF_SET_EVENT_FLT: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s SET_EVENT_FLT failed %d", hdev->name, status); - } else { - BT_DBG("%s SET_EVENT_FLT succeseful", hdev->name); - } - break; + if (!status) { + __u8 param = *((__u8 *) sent); - case OCF_WRITE_AUTH_ENABLE: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_AUTH_ENABLE); - if (!sent) - break; + if (param) + set_bit(HCI_ENCRYPT, &hdev->flags); + else + clear_bit(HCI_ENCRYPT, &hdev->flags); + } - status = *((__u8 *) skb->data); - param = *((__u8 *) sent); + hci_req_complete(hdev, status); +} - if (!status) { - if (param == AUTH_ENABLED) - set_bit(HCI_AUTH, &hdev->flags); - else - clear_bit(HCI_AUTH, &hdev->flags); - } - hci_req_complete(hdev, status); - break; +static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; - case OCF_WRITE_ENCRYPT_MODE: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_ENCRYPT_MODE); - if (!sent) - break; + BT_DBG("%s status 0x%x", hdev->name, status); - status = *((__u8 *) skb->data); - param = *((__u8 *) sent); + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SCAN_ENABLE); + if (!sent) + return; - if (!status) { - if (param) - set_bit(HCI_ENCRYPT, &hdev->flags); - else - clear_bit(HCI_ENCRYPT, &hdev->flags); - } - hci_req_complete(hdev, status); - break; + if (!status) { + __u8 param = *((__u8 *) sent); - case OCF_WRITE_CA_TIMEOUT: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s OCF_WRITE_CA_TIMEOUT failed %d", hdev->name, status); - } else { - BT_DBG("%s OCF_WRITE_CA_TIMEOUT succeseful", hdev->name); - } - break; + clear_bit(HCI_PSCAN, &hdev->flags); + clear_bit(HCI_ISCAN, &hdev->flags); - case OCF_WRITE_PG_TIMEOUT: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s OCF_WRITE_PG_TIMEOUT failed %d", hdev->name, status); - } else { - BT_DBG("%s: OCF_WRITE_PG_TIMEOUT succeseful", hdev->name); - } - break; + if (param & SCAN_INQUIRY) + set_bit(HCI_ISCAN, &hdev->flags); - case OCF_WRITE_SCAN_ENABLE: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_SCAN_ENABLE); - if (!sent) - break; + if (param & SCAN_PAGE) + set_bit(HCI_PSCAN, &hdev->flags); + } - status = *((__u8 *) skb->data); - param = *((__u8 *) sent); + hci_req_complete(hdev, status); +} - BT_DBG("param 0x%x", param); +static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_class_of_dev *rp = (void *) skb->data; - if (!status) { - clear_bit(HCI_PSCAN, &hdev->flags); - clear_bit(HCI_ISCAN, &hdev->flags); - if (param & SCAN_INQUIRY) - set_bit(HCI_ISCAN, &hdev->flags); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - if (param & SCAN_PAGE) - set_bit(HCI_PSCAN, &hdev->flags); - } - hci_req_complete(hdev, status); - break; + if (rp->status) + return; - case OCF_READ_VOICE_SETTING: - vs = (struct hci_rp_read_voice_setting *) skb->data; + memcpy(hdev->dev_class, rp->dev_class, 3); - if (vs->status) { - BT_DBG("%s READ_VOICE_SETTING failed %d", hdev->name, vs->status); - break; - } + BT_DBG("%s class 0x%.2x%.2x%.2x", hdev->name, + hdev->dev_class[2], hdev->dev_class[1], hdev->dev_class[0]); +} - setting = __le16_to_cpu(vs->voice_setting); +static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; - if (hdev->voice_setting != setting ) { - hdev->voice_setting = setting; + BT_DBG("%s status 0x%x", hdev->name, status); - BT_DBG("%s: voice setting 0x%04x", hdev->name, setting); + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_CLASS_OF_DEV); + if (!sent) + return; - if (hdev->notify) { - tasklet_disable(&hdev->tx_task); - hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - tasklet_enable(&hdev->tx_task); - } - } - break; + if (!status) + memcpy(hdev->dev_class, sent, 3); +} - case OCF_WRITE_VOICE_SETTING: - sent = hci_sent_cmd_data(hdev, OGF_HOST_CTL, OCF_WRITE_VOICE_SETTING); - if (!sent) - break; +static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_voice_setting *rp = (void *) skb->data; + __u16 setting; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + setting = __le16_to_cpu(rp->voice_setting); + + if (hdev->voice_setting == setting ) + return; + + hdev->voice_setting = setting; - status = *((__u8 *) skb->data); - setting = __le16_to_cpu(get_unaligned((__le16 *) sent)); + BT_DBG("%s voice setting 0x%04x", hdev->name, setting); - if (!status && hdev->voice_setting != setting) { + if (hdev->notify) { + tasklet_disable(&hdev->tx_task); + hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); + tasklet_enable(&hdev->tx_task); + } +} + +static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + void *sent; + + BT_DBG("%s status 0x%x", hdev->name, status); + + sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_VOICE_SETTING); + if (!sent) + return; + + if (!status) { + __u16 setting = __le16_to_cpu(get_unaligned((__le16 *) sent)); + + if (hdev->voice_setting != setting) { hdev->voice_setting = setting; - BT_DBG("%s: voice setting 0x%04x", hdev->name, setting); + BT_DBG("%s voice setting 0x%04x", hdev->name, setting); if (hdev->notify) { tasklet_disable(&hdev->tx_task); @@ -287,143 +326,153 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb tasklet_enable(&hdev->tx_task); } } - hci_req_complete(hdev, status); - break; - - case OCF_HOST_BUFFER_SIZE: - status = *((__u8 *) skb->data); - if (status) { - BT_DBG("%s OCF_BUFFER_SIZE failed %d", hdev->name, status); - hci_req_complete(hdev, status); - } - break; - - default: - BT_DBG("%s Command complete: ogf HOST_CTL ocf %x", hdev->name, ocf); - break; } } -/* Command Complete OGF INFO_PARAM */ -static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) +static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_rp_read_loc_version *lv; - struct hci_rp_read_local_features *lf; - struct hci_rp_read_buffer_size *bs; - struct hci_rp_read_bd_addr *ba; + __u8 status = *((__u8 *) skb->data); - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + BT_DBG("%s status 0x%x", hdev->name, status); - switch (ocf) { - case OCF_READ_LOCAL_VERSION: - lv = (struct hci_rp_read_loc_version *) skb->data; + hci_req_complete(hdev, status); +} - if (lv->status) { - BT_DBG("%s READ_LOCAL_VERSION failed %d", hdev->name, lf->status); - break; - } +static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_version *rp = (void *) skb->data; - hdev->hci_ver = lv->hci_ver; - hdev->hci_rev = btohs(lv->hci_rev); - hdev->manufacturer = btohs(lv->manufacturer); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - BT_DBG("%s: manufacturer %d hci_ver %d hci_rev %d", hdev->name, - hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); + if (rp->status) + return; - break; + hdev->hci_ver = rp->hci_ver; + hdev->hci_rev = btohs(rp->hci_rev); + hdev->manufacturer = btohs(rp->manufacturer); - case OCF_READ_LOCAL_FEATURES: - lf = (struct hci_rp_read_local_features *) skb->data; + BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, + hdev->manufacturer, + hdev->hci_ver, hdev->hci_rev); +} - if (lf->status) { - BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status); - break; - } +static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_commands *rp = (void *) skb->data; - memcpy(hdev->features, lf->features, sizeof(hdev->features)); + BT_DBG("%s status 0x%x", hdev->name, rp->status); - /* Adjust default settings according to features - * supported by device. */ - if (hdev->features[0] & LMP_3SLOT) - hdev->pkt_type |= (HCI_DM3 | HCI_DH3); + if (rp->status) + return; - if (hdev->features[0] & LMP_5SLOT) - hdev->pkt_type |= (HCI_DM5 | HCI_DH5); + memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); +} - if (hdev->features[1] & LMP_HV2) { - hdev->pkt_type |= (HCI_HV2); - hdev->esco_type |= (ESCO_HV2); - } +static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_local_features *rp = (void *) skb->data; - if (hdev->features[1] & LMP_HV3) { - hdev->pkt_type |= (HCI_HV3); - hdev->esco_type |= (ESCO_HV3); - } + BT_DBG("%s status 0x%x", hdev->name, rp->status); - if (hdev->features[3] & LMP_ESCO) - hdev->esco_type |= (ESCO_EV3); + if (rp->status) + return; - if (hdev->features[4] & LMP_EV4) - hdev->esco_type |= (ESCO_EV4); + memcpy(hdev->features, rp->features, 8); - if (hdev->features[4] & LMP_EV5) - hdev->esco_type |= (ESCO_EV5); + /* Adjust default settings according to features + * supported by device. */ - BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, - lf->features[0], lf->features[1], lf->features[2]); + if (hdev->features[0] & LMP_3SLOT) + hdev->pkt_type |= (HCI_DM3 | HCI_DH3); - break; + if (hdev->features[0] & LMP_5SLOT) + hdev->pkt_type |= (HCI_DM5 | HCI_DH5); - case OCF_READ_BUFFER_SIZE: - bs = (struct hci_rp_read_buffer_size *) skb->data; + if (hdev->features[1] & LMP_HV2) { + hdev->pkt_type |= (HCI_HV2); + hdev->esco_type |= (ESCO_HV2); + } - if (bs->status) { - BT_DBG("%s READ_BUFFER_SIZE failed %d", hdev->name, bs->status); - hci_req_complete(hdev, bs->status); - break; - } + if (hdev->features[1] & LMP_HV3) { + hdev->pkt_type |= (HCI_HV3); + hdev->esco_type |= (ESCO_HV3); + } - hdev->acl_mtu = __le16_to_cpu(bs->acl_mtu); - hdev->sco_mtu = bs->sco_mtu; - hdev->acl_pkts = __le16_to_cpu(bs->acl_max_pkt); - hdev->sco_pkts = __le16_to_cpu(bs->sco_max_pkt); + if (hdev->features[3] & LMP_ESCO) + hdev->esco_type |= (ESCO_EV3); - if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { - hdev->sco_mtu = 64; - hdev->sco_pkts = 8; - } + if (hdev->features[4] & LMP_EV4) + hdev->esco_type |= (ESCO_EV4); - hdev->acl_cnt = hdev->acl_pkts; - hdev->sco_cnt = hdev->sco_pkts; + if (hdev->features[4] & LMP_EV5) + hdev->esco_type |= (ESCO_EV5); - BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name, - hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts); - break; + BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, + hdev->features[0], hdev->features[1], + hdev->features[2], hdev->features[3], + hdev->features[4], hdev->features[5], + hdev->features[6], hdev->features[7]); +} - case OCF_READ_BD_ADDR: - ba = (struct hci_rp_read_bd_addr *) skb->data; +static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_buffer_size *rp = (void *) skb->data; - if (!ba->status) { - bacpy(&hdev->bdaddr, &ba->bdaddr); - } else { - BT_DBG("%s: READ_BD_ADDR failed %d", hdev->name, ba->status); - } + BT_DBG("%s status 0x%x", hdev->name, rp->status); - hci_req_complete(hdev, ba->status); - break; + if (rp->status) + return; - default: - BT_DBG("%s Command complete: ogf INFO_PARAM ocf %x", hdev->name, ocf); - break; + hdev->acl_mtu = __le16_to_cpu(rp->acl_mtu); + hdev->sco_mtu = rp->sco_mtu; + hdev->acl_pkts = __le16_to_cpu(rp->acl_max_pkt); + hdev->sco_pkts = __le16_to_cpu(rp->sco_max_pkt); + + if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { + hdev->sco_mtu = 64; + hdev->sco_pkts = 8; } + + hdev->acl_cnt = hdev->acl_pkts; + hdev->sco_cnt = hdev->sco_pkts; + + BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, + hdev->acl_mtu, hdev->acl_pkts, + hdev->sco_mtu, hdev->sco_pkts); +} + +static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_read_bd_addr *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (!rp->status) + bacpy(&hdev->bdaddr, &rp->bdaddr); + + hci_req_complete(hdev, rp->status); +} + +static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) +{ + BT_DBG("%s status 0x%x", hdev->name, status); + + if (status) { + hci_req_complete(hdev, status); + + hci_conn_check_pending(hdev); + } else + set_bit(HCI_INQUIRY, &hdev->flags); } -/* Command Status OGF LINK_CTL */ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) { + struct hci_cp_create_conn *cp; struct hci_conn *conn; - struct hci_cp_create_conn *cp = hci_sent_cmd_data(hdev, OGF_LINK_CTL, OCF_CREATE_CONN); + BT_DBG("%s status 0x%x", hdev->name, status); + + cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_CONN); if (!cp) return; @@ -431,8 +480,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); - BT_DBG("%s status 0x%x bdaddr %s conn %p", hdev->name, - status, batostr(&cp->bdaddr), conn); + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->bdaddr), conn); if (status) { if (conn && conn->state == BT_CONNECT) { @@ -457,234 +505,138 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } -static void hci_cs_link_ctl(struct hci_dev *hdev, __u16 ocf, __u8 status) +static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) { - BT_DBG("%s ocf 0x%x", hdev->name, ocf); + struct hci_cp_add_sco *cp; + struct hci_conn *acl, *sco; + __u16 handle; - switch (ocf) { - case OCF_CREATE_CONN: - hci_cs_create_conn(hdev, status); - break; - - case OCF_ADD_SCO: - if (status) { - struct hci_conn *acl, *sco; - struct hci_cp_add_sco *cp = hci_sent_cmd_data(hdev, OGF_LINK_CTL, OCF_ADD_SCO); - __u16 handle; - - if (!cp) - break; + BT_DBG("%s status 0x%x", hdev->name, status); - handle = __le16_to_cpu(cp->handle); - - BT_DBG("%s Add SCO error: handle %d status 0x%x", hdev->name, handle, status); + if (!status) + return; - hci_dev_lock(hdev); + cp = hci_sent_cmd_data(hdev, HCI_OP_ADD_SCO); + if (!cp) + return; - acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl && (sco = acl->link)) { - sco->state = BT_CLOSED; + handle = __le16_to_cpu(cp->handle); - hci_proto_connect_cfm(sco, status); - hci_conn_del(sco); - } + BT_DBG("%s handle %d", hdev->name, handle); - hci_dev_unlock(hdev); - } - break; + hci_dev_lock(hdev); - case OCF_INQUIRY: - if (status) { - BT_DBG("%s Inquiry error: status 0x%x", hdev->name, status); - hci_req_complete(hdev, status); - } else { - set_bit(HCI_INQUIRY, &hdev->flags); - } - break; + acl = hci_conn_hash_lookup_handle(hdev, handle); + if (acl && (sco = acl->link)) { + sco->state = BT_CLOSED; - default: - BT_DBG("%s Command status: ogf LINK_CTL ocf %x status %d", - hdev->name, ocf, status); - break; + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); } + + hci_dev_unlock(hdev); } -/* Command Status OGF LINK_POLICY */ -static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status) +static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status) { - BT_DBG("%s ocf 0x%x", hdev->name, ocf); - - switch (ocf) { - case OCF_SNIFF_MODE: - if (status) { - struct hci_conn *conn; - struct hci_cp_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_SNIFF_MODE); + BT_DBG("%s status 0x%x", hdev->name, status); +} - if (!cp) - break; +static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_setup_sync_conn *cp; + struct hci_conn *acl, *sco; + __u16 handle; - hci_dev_lock(hdev); + BT_DBG("%s status 0x%x", hdev->name, status); - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) { - clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); - } - - hci_dev_unlock(hdev); - } - break; + if (!status) + return; - case OCF_EXIT_SNIFF_MODE: - if (status) { - struct hci_conn *conn; - struct hci_cp_exit_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_EXIT_SNIFF_MODE); + cp = hci_sent_cmd_data(hdev, HCI_OP_SETUP_SYNC_CONN); + if (!cp) + return; - if (!cp) - break; + handle = __le16_to_cpu(cp->handle); - hci_dev_lock(hdev); + BT_DBG("%s handle %d", hdev->name, handle); - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) { - clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); - } + hci_dev_lock(hdev); - hci_dev_unlock(hdev); - } - break; + acl = hci_conn_hash_lookup_handle(hdev, handle); + if (acl && (sco = acl->link)) { + sco->state = BT_CLOSED; - default: - BT_DBG("%s Command status: ogf LINK_POLICY ocf %x", hdev->name, ocf); - break; + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); } -} -/* Command Status OGF HOST_CTL */ -static void hci_cs_host_ctl(struct hci_dev *hdev, __u16 ocf, __u8 status) -{ - BT_DBG("%s ocf 0x%x", hdev->name, ocf); - - switch (ocf) { - default: - BT_DBG("%s Command status: ogf HOST_CTL ocf %x", hdev->name, ocf); - break; - } + hci_dev_unlock(hdev); } -/* Command Status OGF INFO_PARAM */ -static void hci_cs_info_param(struct hci_dev *hdev, __u16 ocf, __u8 status) +static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status) { - BT_DBG("%s: hci_cs_info_param: ocf 0x%x", hdev->name, ocf); - - switch (ocf) { - default: - BT_DBG("%s Command status: ogf INFO_PARAM ocf %x", hdev->name, ocf); - break; - } -} + struct hci_cp_sniff_mode *cp; + struct hci_conn *conn; -/* Inquiry Complete */ -static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - struct hci_conn *pend; + BT_DBG("%s status 0x%x", hdev->name, status); - BT_DBG("%s status %d", hdev->name, status); + if (!status) + return; - clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); + cp = hci_sent_cmd_data(hdev, HCI_OP_SNIFF_MODE); + if (!cp) + return; hci_dev_lock(hdev); - pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (pend) - hci_acl_connect(pend); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); hci_dev_unlock(hdev); } -/* Inquiry Result */ -static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) +static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) { - struct inquiry_data data; - struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1); - int num_rsp = *((__u8 *) skb->data); + struct hci_cp_exit_sniff_mode *cp; + struct hci_conn *conn; - BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + BT_DBG("%s status 0x%x", hdev->name, status); - if (!num_rsp) + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_EXIT_SNIFF_MODE); + if (!cp) return; hci_dev_lock(hdev); - for (; num_rsp; num_rsp--) { - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = info->pscan_mode; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = 0x00; - info++; - hci_inquiry_cache_update(hdev, &data); - } + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); hci_dev_unlock(hdev); } -/* Inquiry Result With RSSI */ -static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct inquiry_data data; - int num_rsp = *((__u8 *) skb->data); - - BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - - if (!num_rsp) - return; - - hci_dev_lock(hdev); + __u8 status = *((__u8 *) skb->data); - if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { - struct inquiry_info_with_rssi_and_pscan_mode *info = - (struct inquiry_info_with_rssi_and_pscan_mode *) (skb->data + 1); + BT_DBG("%s status %d", hdev->name, status); - for (; num_rsp; num_rsp--) { - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = info->pscan_mode; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; - info++; - hci_inquiry_cache_update(hdev, &data); - } - } else { - struct inquiry_info_with_rssi *info = - (struct inquiry_info_with_rssi *) (skb->data + 1); + clear_bit(HCI_INQUIRY, &hdev->flags); - for (; num_rsp; num_rsp--) { - bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; - memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; - info++; - hci_inquiry_cache_update(hdev, &data); - } - } + hci_req_complete(hdev, status); - hci_dev_unlock(hdev); + hci_conn_check_pending(hdev); } -/* Extended Inquiry Result */ -static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct inquiry_data data; - struct extended_inquiry_info *info = (struct extended_inquiry_info *) (skb->data + 1); + struct inquiry_info *info = (void *) (skb->data + 1); int num_rsp = *((__u8 *) skb->data); BT_DBG("%s num_rsp %d", hdev->name, num_rsp); @@ -696,12 +648,12 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct for (; num_rsp; num_rsp--) { bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = info->pscan_mode; memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; + data.clock_offset = info->clock_offset; + data.rssi = 0x00; info++; hci_inquiry_cache_update(hdev, &data); } @@ -709,70 +661,18 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } -/* Connect Request */ -static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_conn_request *ev = (struct hci_ev_conn_request *) skb->data; - int mask = hdev->link_mode; - - BT_DBG("%s Connection request: %s type 0x%x", hdev->name, - batostr(&ev->bdaddr), ev->link_type); - - mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - - if (mask & HCI_LM_ACCEPT) { - /* Connection accepted */ - struct hci_conn *conn; - struct hci_cp_accept_conn_req cp; - - hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) { - if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { - BT_ERR("No memmory for new connection"); - hci_dev_unlock(hdev); - return; - } - } - memcpy(conn->dev_class, ev->dev_class, 3); - conn->state = BT_CONNECT; - hci_dev_unlock(hdev); - - bacpy(&cp.bdaddr, &ev->bdaddr); - - if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) - cp.role = 0x00; /* Become master */ - else - cp.role = 0x01; /* Remain slave */ - - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp); - } else { - /* Connection rejected */ - struct hci_cp_reject_conn_req cp; - - bacpy(&cp.bdaddr, &ev->bdaddr); - cp.reason = 0x0f; - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_REJECT_CONN_REQ, sizeof(cp), &cp); - } -} - -/* Connect Complete */ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data; - struct hci_conn *conn, *pend; + struct hci_ev_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; BT_DBG("%s", hdev->name); hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) { - hci_dev_unlock(hdev); - return; - } + if (!conn) + goto unlock; if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); @@ -788,8 +688,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; cp.handle = ev->handle; - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_READ_REMOTE_FEATURES, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); } /* Set link policy */ @@ -797,8 +696,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s struct hci_cp_write_link_policy cp; cp.handle = ev->handle; cp.policy = cpu_to_le16(hdev->link_policy); - hci_send_cmd(hdev, OGF_LINK_POLICY, - OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_WRITE_LINK_POLICY, sizeof(cp), &cp); } /* Set packet type for incoming connection */ @@ -809,8 +707,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - hci_send_cmd(hdev, OGF_LINK_CTL, - OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } else { /* Update disconnect timer */ hci_conn_hold(conn); @@ -822,9 +719,12 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { struct hci_conn *sco = conn->link; if (sco) { - if (!ev->status) - hci_add_sco(sco, conn->handle); - else { + if (!ev->status) { + if (lmp_esco_capable(hdev)) + hci_setup_sync(sco, conn->handle); + else + hci_add_sco(sco, conn->handle); + } else { hci_proto_connect_cfm(sco, ev->status); hci_conn_del(sco); } @@ -835,136 +735,104 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (ev->status) hci_conn_del(conn); - pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (pend) - hci_acl_connect(pend); - +unlock: hci_dev_unlock(hdev); -} - -/* Disconnect Complete */ -static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data; - struct hci_conn *conn; - - BT_DBG("%s status %d", hdev->name, ev->status); - - if (ev->status) - return; - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - conn->state = BT_CLOSED; - hci_proto_disconn_ind(conn, ev->reason); - hci_conn_del(conn); - } - - hci_dev_unlock(hdev); + hci_conn_check_pending(hdev); } -/* Number of completed packets */ -static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_num_comp_pkts *ev = (struct hci_ev_num_comp_pkts *) skb->data; - __le16 *ptr; - int i; - - skb_pull(skb, sizeof(*ev)); - - BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + struct hci_ev_conn_request *ev = (void *) skb->data; + int mask = hdev->link_mode; - if (skb->len < ev->num_hndl * 4) { - BT_DBG("%s bad parameters", hdev->name); - return; - } + BT_DBG("%s bdaddr %s type 0x%x", hdev->name, + batostr(&ev->bdaddr), ev->link_type); - tasklet_disable(&hdev->tx_task); + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - for (i = 0, ptr = (__le16 *) skb->data; i < ev->num_hndl; i++) { + if (mask & HCI_LM_ACCEPT) { + /* Connection accepted */ struct hci_conn *conn; - __u16 handle, count; - - handle = __le16_to_cpu(get_unaligned(ptr++)); - count = __le16_to_cpu(get_unaligned(ptr++)); - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (conn) { - conn->sent -= count; + hci_dev_lock(hdev); - if (conn->type == ACL_LINK) { - if ((hdev->acl_cnt += count) > hdev->acl_pkts) - hdev->acl_cnt = hdev->acl_pkts; - } else { - if ((hdev->sco_cnt += count) > hdev->sco_pkts) - hdev->sco_cnt = hdev->sco_pkts; + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); + if (!conn) { + if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { + BT_ERR("No memmory for new connection"); + hci_dev_unlock(hdev); + return; } } - } - hci_sched_tx(hdev); - tasklet_enable(&hdev->tx_task); -} + memcpy(conn->dev_class, ev->dev_class, 3); + conn->state = BT_CONNECT; -/* Role Change */ -static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ - struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data; - struct hci_conn *conn; + hci_dev_unlock(hdev); - BT_DBG("%s status %d", hdev->name, ev->status); + if (ev->link_type == ACL_LINK || !lmp_esco_capable(hdev)) { + struct hci_cp_accept_conn_req cp; - hci_dev_lock(hdev); + bacpy(&cp.bdaddr, &ev->bdaddr); - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) { - if (!ev->status) { - if (ev->role) - conn->link_mode &= ~HCI_LM_MASTER; + if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) + cp.role = 0x00; /* Become master */ else - conn->link_mode |= HCI_LM_MASTER; - } + cp.role = 0x01; /* Remain slave */ - clear_bit(HCI_CONN_RSWITCH_PEND, &conn->pend); + hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, + sizeof(cp), &cp); + } else { + struct hci_cp_accept_sync_conn_req cp; - hci_role_switch_cfm(conn, ev->status, ev->role); - } + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.pkt_type = cpu_to_le16(hdev->esco_type); - hci_dev_unlock(hdev); + cp.tx_bandwidth = cpu_to_le32(0x00001f40); + cp.rx_bandwidth = cpu_to_le32(0x00001f40); + cp.max_latency = cpu_to_le16(0xffff); + cp.content_format = cpu_to_le16(hdev->voice_setting); + cp.retrans_effort = 0xff; + + hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, + sizeof(cp), &cp); + } + } else { + /* Connection rejected */ + struct hci_cp_reject_conn_req cp; + + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.reason = 0x0f; + hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp); + } } -/* Mode Change */ -static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_mode_change *ev = (struct hci_ev_mode_change *) skb->data; + struct hci_ev_disconn_complete *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); + if (ev->status) + return; + hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { - conn->mode = ev->mode; - conn->interval = __le16_to_cpu(ev->interval); - - if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { - if (conn->mode == HCI_CM_ACTIVE) - conn->power_save = 1; - else - conn->power_save = 0; - } + conn->state = BT_CLOSED; + hci_proto_disconn_ind(conn, ev->reason); + hci_conn_del(conn); } hci_dev_unlock(hdev); } -/* Authentication Complete */ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data; + struct hci_ev_auth_complete *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -985,8 +853,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, + HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); } else { clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); hci_encrypt_cfm(conn, ev->status, 0x00); @@ -997,10 +865,16 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_unlock(hdev); } -/* Encryption Change */ +static inline void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); + + hci_conn_check_pending(hdev); +} + static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data; + struct hci_ev_encrypt_change *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1024,10 +898,9 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_unlock(hdev); } -/* Change Connection Link Key Complete */ -static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_change_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data; + struct hci_ev_change_link_key_complete *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1047,25 +920,263 @@ static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, s hci_dev_unlock(hdev); } -/* Pin Code Request*/ -static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_remote_features *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + if (ev->status) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) + memcpy(conn->features, ev->features, 8); + + hci_dev_unlock(hdev); } -/* Link Key Request */ -static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb) { + BT_DBG("%s", hdev->name); } -/* Link Key Notification */ -static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_qos_setup_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { + BT_DBG("%s", hdev->name); } -/* Remote Features */ -static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_remote_features *ev = (struct hci_ev_remote_features *) skb->data; + struct hci_ev_cmd_complete *ev = (void *) skb->data; + __u16 opcode; + + skb_pull(skb, sizeof(*ev)); + + opcode = __le16_to_cpu(ev->opcode); + + switch (opcode) { + case HCI_OP_INQUIRY_CANCEL: + hci_cc_inquiry_cancel(hdev, skb); + break; + + case HCI_OP_EXIT_PERIODIC_INQ: + hci_cc_exit_periodic_inq(hdev, skb); + break; + + case HCI_OP_REMOTE_NAME_REQ_CANCEL: + hci_cc_remote_name_req_cancel(hdev, skb); + break; + + case HCI_OP_ROLE_DISCOVERY: + hci_cc_role_discovery(hdev, skb); + break; + + case HCI_OP_WRITE_LINK_POLICY: + hci_cc_write_link_policy(hdev, skb); + break; + + case HCI_OP_RESET: + hci_cc_reset(hdev, skb); + break; + + case HCI_OP_WRITE_LOCAL_NAME: + hci_cc_write_local_name(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_NAME: + hci_cc_read_local_name(hdev, skb); + break; + + case HCI_OP_WRITE_AUTH_ENABLE: + hci_cc_write_auth_enable(hdev, skb); + break; + + case HCI_OP_WRITE_ENCRYPT_MODE: + hci_cc_write_encrypt_mode(hdev, skb); + break; + + case HCI_OP_WRITE_SCAN_ENABLE: + hci_cc_write_scan_enable(hdev, skb); + break; + + case HCI_OP_READ_CLASS_OF_DEV: + hci_cc_read_class_of_dev(hdev, skb); + break; + + case HCI_OP_WRITE_CLASS_OF_DEV: + hci_cc_write_class_of_dev(hdev, skb); + break; + + case HCI_OP_READ_VOICE_SETTING: + hci_cc_read_voice_setting(hdev, skb); + break; + + case HCI_OP_WRITE_VOICE_SETTING: + hci_cc_write_voice_setting(hdev, skb); + break; + + case HCI_OP_HOST_BUFFER_SIZE: + hci_cc_host_buffer_size(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_VERSION: + hci_cc_read_local_version(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_COMMANDS: + hci_cc_read_local_commands(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_FEATURES: + hci_cc_read_local_features(hdev, skb); + break; + + case HCI_OP_READ_BUFFER_SIZE: + hci_cc_read_buffer_size(hdev, skb); + break; + + case HCI_OP_READ_BD_ADDR: + hci_cc_read_bd_addr(hdev, skb); + break; + + default: + BT_DBG("%s opcode 0x%x", hdev->name, opcode); + break; + } + + if (ev->ncmd) { + atomic_set(&hdev->cmd_cnt, 1); + if (!skb_queue_empty(&hdev->cmd_q)) + hci_sched_cmd(hdev); + } +} + +static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_cmd_status *ev = (void *) skb->data; + __u16 opcode; + + skb_pull(skb, sizeof(*ev)); + + opcode = __le16_to_cpu(ev->opcode); + + switch (opcode) { + case HCI_OP_INQUIRY: + hci_cs_inquiry(hdev, ev->status); + break; + + case HCI_OP_CREATE_CONN: + hci_cs_create_conn(hdev, ev->status); + break; + + case HCI_OP_ADD_SCO: + hci_cs_add_sco(hdev, ev->status); + break; + + case HCI_OP_REMOTE_NAME_REQ: + hci_cs_remote_name_req(hdev, ev->status); + break; + + case HCI_OP_SETUP_SYNC_CONN: + hci_cs_setup_sync_conn(hdev, ev->status); + break; + + case HCI_OP_SNIFF_MODE: + hci_cs_sniff_mode(hdev, ev->status); + break; + + case HCI_OP_EXIT_SNIFF_MODE: + hci_cs_exit_sniff_mode(hdev, ev->status); + break; + + default: + BT_DBG("%s opcode 0x%x", hdev->name, opcode); + break; + } + + if (ev->ncmd) { + atomic_set(&hdev->cmd_cnt, 1); + if (!skb_queue_empty(&hdev->cmd_q)) + hci_sched_cmd(hdev); + } +} + +static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_role_change *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + if (!ev->status) { + if (ev->role) + conn->link_mode &= ~HCI_LM_MASTER; + else + conn->link_mode |= HCI_LM_MASTER; + } + + clear_bit(HCI_CONN_RSWITCH_PEND, &conn->pend); + + hci_role_switch_cfm(conn, ev->status, ev->role); + } + + hci_dev_unlock(hdev); +} + +static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_num_comp_pkts *ev = (void *) skb->data; + __le16 *ptr; + int i; + + skb_pull(skb, sizeof(*ev)); + + BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + + if (skb->len < ev->num_hndl * 4) { + BT_DBG("%s bad parameters", hdev->name); + return; + } + + tasklet_disable(&hdev->tx_task); + + for (i = 0, ptr = (__le16 *) skb->data; i < ev->num_hndl; i++) { + struct hci_conn *conn; + __u16 handle, count; + + handle = __le16_to_cpu(get_unaligned(ptr++)); + count = __le16_to_cpu(get_unaligned(ptr++)); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (conn) { + conn->sent -= count; + + if (conn->type == ACL_LINK) { + if ((hdev->acl_cnt += count) > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; + } else { + if ((hdev->sco_cnt += count) > hdev->sco_pkts) + hdev->sco_cnt = hdev->sco_pkts; + } + } + } + + hci_sched_tx(hdev); + + tasklet_enable(&hdev->tx_task); +} + +static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_mode_change *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1073,17 +1184,39 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn && !ev->status) { - memcpy(conn->features, ev->features, sizeof(conn->features)); + if (conn) { + conn->mode = ev->mode; + conn->interval = __le16_to_cpu(ev->interval); + + if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + if (conn->mode == HCI_CM_ACTIVE) + conn->power_save = 1; + else + conn->power_save = 0; + } } hci_dev_unlock(hdev); } -/* Clock Offset */ +static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data; + struct hci_ev_clock_offset *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1103,10 +1236,9 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk hci_dev_unlock(hdev); } -/* Page Scan Repetition Mode */ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_pscan_rep_mode *ev = (struct hci_ev_pscan_rep_mode *) skb->data; + struct hci_ev_pscan_rep_mode *ev = (void *) skb->data; struct inquiry_entry *ie; BT_DBG("%s", hdev->name); @@ -1121,10 +1253,91 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_unlock(hdev); } -/* Sniff Subrate */ +static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct inquiry_data data; + int num_rsp = *((__u8 *) skb->data); + + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); + + if (!num_rsp) + return; + + hci_dev_lock(hdev); + + if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { + struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = info->pscan_mode; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + } else { + struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + } + + hci_dev_unlock(hdev); +} + +static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + +static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_sync_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); + if (!conn) + goto unlock; + + if (!ev->status) { + conn->handle = __le16_to_cpu(ev->handle); + conn->state = BT_CONNECTED; + } else + conn->state = BT_CLOSED; + + hci_proto_connect_cfm(conn, ev->status); + if (ev->status) + hci_conn_del(conn); + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + BT_DBG("%s", hdev->name); +} + static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_sniff_subrate *ev = (struct hci_ev_sniff_subrate *) skb->data; + struct hci_ev_sniff_subrate *ev = (void *) skb->data; struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -1138,22 +1351,42 @@ static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_unlock(hdev); } -void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) +static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; - struct hci_ev_cmd_complete *ec; - struct hci_ev_cmd_status *cs; - u16 opcode, ocf, ogf; + struct inquiry_data data; + struct extended_inquiry_info *info = (void *) (skb->data + 1); + int num_rsp = *((__u8 *) skb->data); - skb_pull(skb, HCI_EVENT_HDR_SIZE); + BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - BT_DBG("%s evt 0x%x", hdev->name, hdr->evt); + if (!num_rsp) + return; - switch (hdr->evt) { - case HCI_EV_NUM_COMP_PKTS: - hci_num_comp_pkts_evt(hdev, skb); - break; + hci_dev_lock(hdev); + + for (; num_rsp; num_rsp--) { + bacpy(&data.bdaddr, &info->bdaddr); + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; + memcpy(data.dev_class, info->dev_class, 3); + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; + info++; + hci_inquiry_cache_update(hdev, &data); + } + hci_dev_unlock(hdev); +} + +void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_event_hdr *hdr = (void *) skb->data; + __u8 event = hdr->evt; + + skb_pull(skb, HCI_EVENT_HDR_SIZE); + + switch (event) { case HCI_EV_INQUIRY_COMPLETE: hci_inquiry_complete_evt(hdev, skb); break; @@ -1162,44 +1395,64 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_inquiry_result_evt(hdev, skb); break; - case HCI_EV_INQUIRY_RESULT_WITH_RSSI: - hci_inquiry_result_with_rssi_evt(hdev, skb); - break; - - case HCI_EV_EXTENDED_INQUIRY_RESULT: - hci_extended_inquiry_result_evt(hdev, skb); + case HCI_EV_CONN_COMPLETE: + hci_conn_complete_evt(hdev, skb); break; case HCI_EV_CONN_REQUEST: hci_conn_request_evt(hdev, skb); break; - case HCI_EV_CONN_COMPLETE: - hci_conn_complete_evt(hdev, skb); - break; - case HCI_EV_DISCONN_COMPLETE: hci_disconn_complete_evt(hdev, skb); break; - case HCI_EV_ROLE_CHANGE: - hci_role_change_evt(hdev, skb); - break; - - case HCI_EV_MODE_CHANGE: - hci_mode_change_evt(hdev, skb); - break; - case HCI_EV_AUTH_COMPLETE: hci_auth_complete_evt(hdev, skb); break; + case HCI_EV_REMOTE_NAME: + hci_remote_name_evt(hdev, skb); + break; + case HCI_EV_ENCRYPT_CHANGE: hci_encrypt_change_evt(hdev, skb); break; - case HCI_EV_CHANGE_CONN_LINK_KEY_COMPLETE: - hci_change_conn_link_key_complete_evt(hdev, skb); + case HCI_EV_CHANGE_LINK_KEY_COMPLETE: + hci_change_link_key_complete_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_FEATURES: + hci_remote_features_evt(hdev, skb); + break; + + case HCI_EV_REMOTE_VERSION: + hci_remote_version_evt(hdev, skb); + break; + + case HCI_EV_QOS_SETUP_COMPLETE: + hci_qos_setup_complete_evt(hdev, skb); + break; + + case HCI_EV_CMD_COMPLETE: + hci_cmd_complete_evt(hdev, skb); + break; + + case HCI_EV_CMD_STATUS: + hci_cmd_status_evt(hdev, skb); + break; + + case HCI_EV_ROLE_CHANGE: + hci_role_change_evt(hdev, skb); + break; + + case HCI_EV_NUM_COMP_PKTS: + hci_num_comp_pkts_evt(hdev, skb); + break; + + case HCI_EV_MODE_CHANGE: + hci_mode_change_evt(hdev, skb); break; case HCI_EV_PIN_CODE_REQ: @@ -1214,10 +1467,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_link_key_notify_evt(hdev, skb); break; - case HCI_EV_REMOTE_FEATURES: - hci_remote_features_evt(hdev, skb); - break; - case HCI_EV_CLOCK_OFFSET: hci_clock_offset_evt(hdev, skb); break; @@ -1226,82 +1475,32 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_pscan_rep_mode_evt(hdev, skb); break; - case HCI_EV_SNIFF_SUBRATE: - hci_sniff_subrate_evt(hdev, skb); + case HCI_EV_INQUIRY_RESULT_WITH_RSSI: + hci_inquiry_result_with_rssi_evt(hdev, skb); break; - case HCI_EV_CMD_STATUS: - cs = (struct hci_ev_cmd_status *) skb->data; - skb_pull(skb, sizeof(cs)); - - opcode = __le16_to_cpu(cs->opcode); - ogf = hci_opcode_ogf(opcode); - ocf = hci_opcode_ocf(opcode); - - switch (ogf) { - case OGF_INFO_PARAM: - hci_cs_info_param(hdev, ocf, cs->status); - break; - - case OGF_HOST_CTL: - hci_cs_host_ctl(hdev, ocf, cs->status); - break; - - case OGF_LINK_CTL: - hci_cs_link_ctl(hdev, ocf, cs->status); - break; - - case OGF_LINK_POLICY: - hci_cs_link_policy(hdev, ocf, cs->status); - break; - - default: - BT_DBG("%s Command Status OGF %x", hdev->name, ogf); - break; - } - - if (cs->ncmd) { - atomic_set(&hdev->cmd_cnt, 1); - if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); - } + case HCI_EV_REMOTE_EXT_FEATURES: + hci_remote_ext_features_evt(hdev, skb); break; - case HCI_EV_CMD_COMPLETE: - ec = (struct hci_ev_cmd_complete *) skb->data; - skb_pull(skb, sizeof(*ec)); - - opcode = __le16_to_cpu(ec->opcode); - ogf = hci_opcode_ogf(opcode); - ocf = hci_opcode_ocf(opcode); - - switch (ogf) { - case OGF_INFO_PARAM: - hci_cc_info_param(hdev, ocf, skb); - break; - - case OGF_HOST_CTL: - hci_cc_host_ctl(hdev, ocf, skb); - break; + case HCI_EV_SYNC_CONN_COMPLETE: + hci_sync_conn_complete_evt(hdev, skb); + break; - case OGF_LINK_CTL: - hci_cc_link_ctl(hdev, ocf, skb); - break; + case HCI_EV_SYNC_CONN_CHANGED: + hci_sync_conn_changed_evt(hdev, skb); + break; - case OGF_LINK_POLICY: - hci_cc_link_policy(hdev, ocf, skb); - break; + case HCI_EV_SNIFF_SUBRATE: + hci_sniff_subrate_evt(hdev, skb); + break; - default: - BT_DBG("%s Command Completed OGF %x", hdev->name, ogf); - break; - } + case HCI_EV_EXTENDED_INQUIRY_RESULT: + hci_extended_inquiry_result_evt(hdev, skb); + break; - if (ec->ncmd) { - atomic_set(&hdev->cmd_cnt, 1); - if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); - } + default: + BT_DBG("%s event 0x%x", hdev->name, event); break; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 43dd6373bff..8825102c517 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -451,7 +451,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (test_bit(HCI_RAW, &hdev->flags) || (ogf == OGF_VENDOR_CMD)) { + if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) { skb_queue_tail(&hdev->raw_q, skb); hci_sched_tx(hdev); } else { diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 25835403d65..cef1e3e1881 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -41,6 +41,26 @@ static ssize_t show_type(struct device *dev, struct device_attribute *attr, char return sprintf(buf, "%s\n", typetostr(hdev->type)); } +static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + char name[249]; + int i; + + for (i = 0; i < 248; i++) + name[i] = hdev->dev_name[i]; + + name[248] = '\0'; + return sprintf(buf, "%s\n", name); +} + +static ssize_t show_class(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "0x%.2x%.2x%.2x\n", + hdev->dev_class[2], hdev->dev_class[1], hdev->dev_class[0]); +} + static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -49,6 +69,17 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr, c return sprintf(buf, "%s\n", batostr(&bdaddr)); } +static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + hdev->features[0], hdev->features[1], + hdev->features[2], hdev->features[3], + hdev->features[4], hdev->features[5], + hdev->features[6], hdev->features[7]); +} + static ssize_t show_manufacturer(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -170,7 +201,10 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib } static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); +static DEVICE_ATTR(class, S_IRUGO, show_class, NULL); static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(features, S_IRUGO, show_features, NULL); static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL); static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL); static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL); @@ -185,7 +219,10 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, static struct device_attribute *bt_attrs[] = { &dev_attr_type, + &dev_attr_name, + &dev_attr_class, &dev_attr_address, + &dev_attr_features, &dev_attr_manufacturer, &dev_attr_hci_version, &dev_attr_hci_revision, diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index ff5784b440d..4bbacddeb49 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -247,7 +247,7 @@ static inline int hidp_queue_report(struct hidp_session *session, unsigned char { struct sk_buff *skb; - BT_DBG("session %p hid %p data %p size %d", session, device, data, size); + BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { BT_ERR("Can't allocate memory for new frame"); @@ -656,11 +656,13 @@ static inline int hidp_setup_input(struct hidp_session *session, struct hidp_con } if (req->subclass & 0x80) { - input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL); - input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE); - input->relbit[0] = BIT(REL_X) | BIT(REL_Y); - input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA); - input->relbit[0] |= BIT(REL_WHEEL); + input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL); + input->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | + BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE); + input->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y); + input->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) | + BIT_MASK(BTN_EXTRA); + input->relbit[0] |= BIT_MASK(REL_WHEEL); } input->dev.parent = hidp_get_device(session); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 36ef27b625d..6fbbae78b30 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -55,7 +55,9 @@ #define BT_DBG(D...) #endif -#define VERSION "2.8" +#define VERSION "2.9" + +static u32 l2cap_feat_mask = 0x0000; static const struct proto_ops l2cap_sock_ops; @@ -258,7 +260,119 @@ static void l2cap_chan_del(struct sock *sk, int err) sk->sk_state_change(sk); } +static inline u8 l2cap_get_ident(struct l2cap_conn *conn) +{ + u8 id; + + /* Get next available identificator. + * 1 - 128 are used by kernel. + * 129 - 199 are reserved. + * 200 - 254 are used by utilities like l2ping, etc. + */ + + spin_lock_bh(&conn->lock); + + if (++conn->tx_ident > 128) + conn->tx_ident = 1; + + id = conn->tx_ident; + + spin_unlock_bh(&conn->lock); + + return id; +} + +static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) +{ + struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); + + BT_DBG("code 0x%2.2x", code); + + if (!skb) + return -ENOMEM; + + return hci_send_acl(conn->hcon, skb, 0); +} + /* ---- L2CAP connections ---- */ +static void l2cap_conn_start(struct l2cap_conn *conn) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); + + if (sk->sk_type != SOCK_SEQPACKET) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } else if (sk->sk_state == BT_CONNECT) { + struct l2cap_conn_req req; + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } + + bh_unlock_sock(sk); + } + + read_unlock(&l->lock); +} + +static void l2cap_conn_ready(struct l2cap_conn *conn) +{ + BT_DBG("conn %p", conn); + + if (conn->chan_list.head || !hlist_empty(&l2cap_sk_list.head)) { + struct l2cap_info_req req; + + req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, + jiffies + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } +} + +/* Notify sockets that we cannot guaranty reliability anymore */ +static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) + sk->sk_err = err; + } + + read_unlock(&l->lock); +} + +static void l2cap_info_timeout(unsigned long arg) +{ + struct l2cap_conn *conn = (void *) arg; + + conn->info_ident = 0; + + l2cap_conn_start(conn); +} + static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -279,6 +393,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->src = &hcon->hdev->bdaddr; conn->dst = &hcon->dst; + conn->feat_mask = 0; + + init_timer(&conn->info_timer); + conn->info_timer.function = l2cap_info_timeout; + conn->info_timer.data = (unsigned long) conn; + spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); @@ -318,40 +438,6 @@ static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, stru write_unlock_bh(&l->lock); } -static inline u8 l2cap_get_ident(struct l2cap_conn *conn) -{ - u8 id; - - /* Get next available identificator. - * 1 - 128 are used by kernel. - * 129 - 199 are reserved. - * 200 - 254 are used by utilities like l2ping, etc. - */ - - spin_lock_bh(&conn->lock); - - if (++conn->tx_ident > 128) - conn->tx_ident = 1; - - id = conn->tx_ident; - - spin_unlock_bh(&conn->lock); - - return id; -} - -static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) -{ - struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); - - BT_DBG("code 0x%2.2x", code); - - if (!skb) - return -ENOMEM; - - return hci_send_acl(conn->hcon, skb, 0); -} - /* ---- Socket interface ---- */ static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) { @@ -508,7 +594,6 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) /* Default config options */ pi->conf_len = 0; - pi->conf_mtu = L2CAP_DEFAULT_MTU; pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; } @@ -530,7 +615,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; + sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); sock_reset_flag(sk, SOCK_ZAPPED); @@ -650,6 +735,11 @@ static int l2cap_do_connect(struct sock *sk) l2cap_sock_set_timer(sk, sk->sk_sndtimeo); if (hcon->state == BT_CONNECTED) { + if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)) { + l2cap_conn_ready(conn); + goto done; + } + if (sk->sk_type == SOCK_SEQPACKET) { struct l2cap_conn_req req; l2cap_pi(sk)->ident = l2cap_get_ident(conn); @@ -958,7 +1048,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch opts.imtu = l2cap_pi(sk)->imtu; opts.omtu = l2cap_pi(sk)->omtu; opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = 0x00; + opts.mode = L2CAP_MODE_BASIC; len = min_t(unsigned int, sizeof(opts), optlen); if (copy_from_user((char *) &opts, optval, len)) { @@ -1007,7 +1097,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch opts.imtu = l2cap_pi(sk)->imtu; opts.omtu = l2cap_pi(sk)->omtu; opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = 0x00; + opts.mode = L2CAP_MODE_BASIC; len = min_t(unsigned int, len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) @@ -1084,52 +1174,6 @@ static int l2cap_sock_release(struct socket *sock) return err; } -static void l2cap_conn_ready(struct l2cap_conn *conn) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - bh_lock_sock(sk); - - if (sk->sk_type != SOCK_SEQPACKET) { - l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - } else if (sk->sk_state == BT_CONNECT) { - struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); - } - - bh_unlock_sock(sk); - } - - read_unlock(&l->lock); -} - -/* Notify sockets that we cannot guaranty reliability anymore */ -static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) - sk->sk_err = err; - } - read_unlock(&l->lock); -} - static void l2cap_chan_ready(struct sock *sk) { struct sock *parent = bt_sk(sk)->parent; @@ -1256,11 +1300,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned break; case 2: - *val = __le16_to_cpu(*((__le16 *)opt->val)); + *val = __le16_to_cpu(*((__le16 *) opt->val)); break; case 4: - *val = __le32_to_cpu(*((__le32 *)opt->val)); + *val = __le32_to_cpu(*((__le32 *) opt->val)); break; default: @@ -1332,6 +1376,8 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) int len = pi->conf_len; int type, hint, olen; unsigned long val; + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; + u16 mtu = L2CAP_DEFAULT_MTU; u16 result = L2CAP_CONF_SUCCESS; BT_DBG("sk %p", sk); @@ -1344,7 +1390,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) switch (type) { case L2CAP_CONF_MTU: - pi->conf_mtu = val; + mtu = val; break; case L2CAP_CONF_FLUSH_TO: @@ -1354,6 +1400,11 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) case L2CAP_CONF_QOS: break; + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *) val, olen); + break; + default: if (hint) break; @@ -1368,12 +1419,24 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) /* Configure output options and let the other side know * which ones we don't like. */ - if (pi->conf_mtu < pi->omtu) + if (rfc.mode == L2CAP_MODE_BASIC) { + if (mtu < pi->omtu) + result = L2CAP_CONF_UNACCEPT; + else { + pi->omtu = mtu; + pi->conf_state |= L2CAP_CONF_OUTPUT_DONE; + } + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + } else { result = L2CAP_CONF_UNACCEPT; - else - pi->omtu = pi->conf_mtu; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + memset(&rfc, 0, sizeof(rfc)); + rfc.mode = L2CAP_MODE_BASIC; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + } } rsp->scid = cpu_to_le16(pi->dcid); @@ -1397,6 +1460,23 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 fla return ptr - data; } +static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data; + + if (rej->reason != 0x0000) + return 0; + + if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && + cmd->ident == conn->info_ident) { + conn->info_ident = 0; + del_timer(&conn->info_timer); + l2cap_conn_start(conn); + } + + return 0; +} + static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) { struct l2cap_chan_list *list = &conn->chan_list; @@ -1577,16 +1657,19 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - /* Output config done. */ - l2cap_pi(sk)->conf_state |= L2CAP_CONF_OUTPUT_DONE; - /* Reset config buffer. */ l2cap_pi(sk)->conf_len = 0; + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE)) + goto unlock; + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { sk->sk_state = BT_CONNECTED; l2cap_chan_ready(sk); - } else if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { + goto unlock; + } + + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { u8 req[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(sk, req), req); @@ -1646,7 +1729,6 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (flags & 0x01) goto done; - /* Input config done */ l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE; if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) { @@ -1711,16 +1793,27 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) { struct l2cap_info_req *req = (struct l2cap_info_req *) data; - struct l2cap_info_rsp rsp; u16 type; type = __le16_to_cpu(req->type); BT_DBG("type 0x%4.4x", type); - rsp.type = cpu_to_le16(type); - rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); - l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp); + if (type == L2CAP_IT_FEAT_MASK) { + u8 buf[8]; + struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; + rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); + put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(buf), buf); + } else { + struct l2cap_info_rsp rsp; + rsp.type = cpu_to_le16(type); + rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(rsp), &rsp); + } return 0; } @@ -1735,6 +1828,15 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); + conn->info_ident = 0; + + del_timer(&conn->info_timer); + + if (type == L2CAP_IT_FEAT_MASK) + conn->feat_mask = __le32_to_cpu(get_unaligned((__le32 *) rsp->data)); + + l2cap_conn_start(conn); + return 0; } @@ -1764,7 +1866,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk switch (cmd.code) { case L2CAP_COMMAND_REJ: - /* FIXME: We should process this */ + l2cap_command_rej(conn, &cmd, data); break; case L2CAP_CONN_REQ: diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index bb7220770f2..e7ac6ba7eca 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -33,11 +33,11 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/init.h> -#include <linux/freezer.h> #include <linux/wait.h> #include <linux/device.h> #include <linux/net.h> #include <linux/mutex.h> +#include <linux/kthread.h> #include <net/sock.h> #include <asm/uaccess.h> @@ -68,7 +68,6 @@ static DEFINE_MUTEX(rfcomm_mutex); static unsigned long rfcomm_event; static LIST_HEAD(session_list); -static atomic_t terminate, running; static int rfcomm_send_frame(struct rfcomm_session *s, u8 *data, int len); static int rfcomm_send_sabm(struct rfcomm_session *s, u8 dlci); @@ -1850,26 +1849,6 @@ static inline void rfcomm_process_sessions(void) rfcomm_unlock(); } -static void rfcomm_worker(void) -{ - BT_DBG(""); - - while (!atomic_read(&terminate)) { - set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { - /* No pending events. Let's sleep. - * Incoming connections and data will wake us up. */ - schedule(); - } - set_current_state(TASK_RUNNING); - - /* Process stuff */ - clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); - rfcomm_process_sessions(); - } - return; -} - static int rfcomm_add_listener(bdaddr_t *ba) { struct sockaddr_l2 addr; @@ -1935,22 +1914,28 @@ static void rfcomm_kill_listener(void) static int rfcomm_run(void *unused) { - rfcomm_thread = current; - - atomic_inc(&running); + BT_DBG(""); - daemonize("krfcommd"); set_user_nice(current, -10); - BT_DBG(""); - rfcomm_add_listener(BDADDR_ANY); - rfcomm_worker(); + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { + /* No pending events. Let's sleep. + * Incoming connections and data will wake us up. */ + schedule(); + } + set_current_state(TASK_RUNNING); + + /* Process stuff */ + clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); + rfcomm_process_sessions(); + } rfcomm_kill_listener(); - atomic_dec(&running); return 0; } @@ -2059,7 +2044,11 @@ static int __init rfcomm_init(void) hci_register_cb(&rfcomm_cb); - kernel_thread(rfcomm_run, NULL, CLONE_KERNEL); + rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); + if (IS_ERR(rfcomm_thread)) { + hci_unregister_cb(&rfcomm_cb); + return PTR_ERR(rfcomm_thread); + } if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0) BT_ERR("Failed to create RFCOMM info file"); @@ -2081,14 +2070,7 @@ static void __exit rfcomm_exit(void) hci_unregister_cb(&rfcomm_cb); - /* Terminate working thread. - * ie. Set terminate flag and wake it up */ - atomic_inc(&terminate); - rfcomm_schedule(RFCOMM_SCHED_STATE); - - /* Wait until thread is running */ - while (atomic_read(&running)) - schedule(); + kthread_stop(rfcomm_thread); #ifdef CONFIG_BT_RFCOMM_TTY rfcomm_cleanup_ttys(); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 22a832098d4..e447651a2db 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -189,6 +189,23 @@ static struct device *rfcomm_get_device(struct rfcomm_dev *dev) return conn ? &conn->dev : NULL; } +static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf) +{ + struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); + bdaddr_t bdaddr; + baswap(&bdaddr, &dev->dst); + return sprintf(buf, "%s\n", batostr(&bdaddr)); +} + +static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf) +{ + struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); + return sprintf(buf, "%d\n", dev->channel); +} + +static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL); + static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) { struct rfcomm_dev *dev; @@ -281,6 +298,14 @@ out: return err; } + dev_set_drvdata(dev->tty_dev, dev); + + if (device_create_file(dev->tty_dev, &dev_attr_address) < 0) + BT_ERR("Failed to create address attribute"); + + if (device_create_file(dev->tty_dev, &dev_attr_channel) < 0) + BT_ERR("Failed to create channel attribute"); + return dev->id; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 65b6fb1c415..82d0dfdfa7e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -189,7 +189,7 @@ static int sco_connect(struct sock *sk) struct sco_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; - int err = 0; + int err, type; BT_DBG("%s -> %s", batostr(src), batostr(dst)); @@ -200,7 +200,9 @@ static int sco_connect(struct sock *sk) err = -ENOMEM; - hcon = hci_connect(hdev, SCO_LINK, dst); + type = lmp_esco_capable(hdev) ? ESCO_LINK : SCO_LINK; + + hcon = hci_connect(hdev, type, dst); if (!hcon) goto done; @@ -224,6 +226,7 @@ static int sco_connect(struct sock *sk) sk->sk_state = BT_CONNECT; sco_sock_set_timer(sk, sk->sk_sndtimeo); } + done: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -846,7 +849,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) { BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - if (hcon->type != SCO_LINK) + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) return 0; if (!status) { @@ -865,10 +868,11 @@ static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - if (hcon->type != SCO_LINK) + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) return 0; sco_conn_del(hcon, bt_err(reason)); + return 0; } diff --git a/net/bridge/br.c b/net/bridge/br.c index 848b8fa8bed..93867bb6cc9 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -23,7 +23,7 @@ #include "br_private.h" -int (*br_should_route_hook) (struct sk_buff **pskb) = NULL; +int (*br_should_route_hook)(struct sk_buff *skb); static struct llc_sap *br_stp_sap; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 3a8a015c92e..3cedd4eeeed 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -126,6 +126,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NULL; + if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) @@ -145,7 +149,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) case BR_STATE_FORWARDING: if (br_should_route_hook) { - if (br_should_route_hook(&skb)) + if (br_should_route_hook(skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 8245f051ccb..da22f900e89 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -503,18 +503,14 @@ inhdr_error: * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct iphdr *iph; - struct sk_buff *skb = *pskb; __u32 len = nf_bridge_encap_header_len(skb); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - return NF_STOLEN; - if (unlikely(!pskb_may_pull(skb, len))) goto out; @@ -584,13 +580,11 @@ out: * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); skb->dst = NULL; @@ -625,12 +619,11 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct net_device *parent; int pf; @@ -648,7 +641,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; - nf_bridge_pull_encap_header(*pskb); + nf_bridge_pull_encap_header(skb); nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { @@ -666,12 +659,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct net_device **d = (struct net_device **)(skb->cb); #ifdef CONFIG_SYSCTL @@ -682,12 +674,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) return NF_ACCEPT; - nf_bridge_pull_encap_header(*pskb); + nf_bridge_pull_encap_header(skb); } if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) - nf_bridge_push_encap_header(*pskb); + nf_bridge_push_encap_header(skb); return NF_ACCEPT; } *d = (struct net_device *)in; @@ -709,13 +701,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. */ -static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct net_device *realindev; - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; if (!skb->nf_bridge) @@ -752,13 +743,12 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) } /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); int pf; @@ -828,13 +818,13 @@ print_error: /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, +static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if ((*pskb)->nf_bridge && - !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + if (skb->nf_bridge && + !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; } diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index ffe468a632e..48a80e42328 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -15,7 +15,7 @@ #include <net/arp.h> #include <linux/module.h> -static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -23,7 +23,6 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, __be32 _sip, *siptr, _dip, *diptr; struct arphdr _ah, *ap; unsigned char _sha[ETH_ALEN], *shp; - struct sk_buff *skb = *pskb; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 4582659dff0..74262e9a566 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -8,29 +8,22 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> -static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *)data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN); return info->target; } diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 62d23c7b25e..6cba54309c0 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -17,7 +17,7 @@ #include <linux/netfilter_bridge/ebt_mark_t.h> #include <linux/module.h> -static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -25,13 +25,13 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, int action = info->target & -16; if (action == MARK_SET_VALUE) - (*pskb)->mark = info->mark; + skb->mark = info->mark; else if (action == MARK_OR_VALUE) - (*pskb)->mark |= info->mark; + skb->mark |= info->mark; else if (action == MARK_AND_VALUE) - (*pskb)->mark &= info->mark; + skb->mark &= info->mark; else - (*pskb)->mark ^= info->mark; + skb->mark ^= info->mark; return info->target | ~EBT_VERDICT_BITS; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 9f378eab72d..422cb834cff 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -8,35 +8,28 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" -static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } if (hooknr != NF_BR_BROUTING) - memcpy(eth_hdr(*pskb)->h_dest, + memcpy(eth_hdr(skb)->h_dest, in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN); - (*pskb)->pkt_type = PACKET_HOST; + memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); + skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index a50722182bf..425ac920904 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -8,6 +8,7 @@ * */ +#include <linux/netfilter.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> @@ -15,34 +16,26 @@ #include <linux/if_arp.h> #include <net/arp.h> -static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, +static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *) data; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, 0)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); + memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); if (!(info->target & NAT_ARP_BIT) && - eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) { + eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { struct arphdr _ah, *ap; - ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah); + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) return EBT_DROP; if (ap->ar_hln != ETH_ALEN) goto out; - if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN)) + if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN)) return EBT_DROP; } out: diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d37ce047893..e44519ebf1d 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -51,11 +51,11 @@ static struct ebt_table broute_table = .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff **pskb) +static int ebt_broute(struct sk_buff *skb) { int ret; - ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL, + ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, &broute_table); if (ret == NF_DROP) return 1; /* route it */ diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 81d84145c41..210493f99bc 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -61,10 +61,10 @@ static struct ebt_table frame_filter = }; static unsigned int -ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, +ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_filter); + return ebt_do_table(hook, skb, in, out, &frame_filter); } static struct nf_hook_ops ebt_ops_filter[] = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9c50488b62e..3e58c2e5ee2 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -61,17 +61,17 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in +ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, &frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in +ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, &frame_nat); } static struct nf_hook_ops ebt_ops_nat[] = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 6018d0e5193..817169e718c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -142,7 +142,7 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, } /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, +unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table) { @@ -172,19 +172,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, eth_hdr(*pskb), in, out)) + if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) goto letscontinue; /* increase counter */ (*(counter_base + i)).pcnt++; - (*(counter_base + i)).bcnt+=(**pskb).len; + (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, out); t = (struct ebt_entry_target *) @@ -193,7 +193,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; else - verdict = t->u.target->target(pskb, hook, + verdict = t->u.target->target(skb, hook, in, out, t->data, t->target_size); if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); @@ -871,7 +871,7 @@ static int translate_table(char *name, struct ebt_table_info *newinfo) return -EINVAL; } - /* we now know the following (along with E=mc²): + /* we now know the following (along with E=mc²): - the nr of entries in each chain is right - the size of the allocated space is right - all valid hooks have a corresponding chain diff --git a/net/core/dev.c b/net/core/dev.c index 99b7bda37d1..872658927e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1362,22 +1362,21 @@ int skb_checksum_help(struct sk_buff *skb) goto out_set_summed; } - if (skb_cloned(skb)) { + offset = skb->csum_start - skb_headroom(skb); + BUG_ON(offset >= skb_headlen(skb)); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + + offset += skb->csum_offset; + BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); + + if (skb_cloned(skb) && + !skb_clone_writable(skb, offset + sizeof(__sum16))) { ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (ret) goto out; } - offset = skb->csum_start - skb_headroom(skb); - BUG_ON(offset > (int)skb->len); - csum = skb_checksum(skb, offset, skb->len-offset, 0); - - offset = skb_headlen(skb) - offset; - BUG_ON(offset <= 0); - BUG_ON(skb->csum_offset + 2 > offset); - - *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = - csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: @@ -1554,7 +1553,7 @@ gso: return rc; } if (unlikely((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb->queue_mapping)) && + netif_subqueue_stopped(dev, skb)) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1662,7 +1661,7 @@ gso: q = dev->qdisc; if (q->enqueue) { /* reset queue_mapping to zero */ - skb->queue_mapping = 0; + skb_set_queue_mapping(skb, 0); rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev->queue_lock); @@ -1693,7 +1692,7 @@ gso: HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb->queue_mapping)) { + !netif_subqueue_stopped(dev, skb)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); @@ -1949,27 +1948,51 @@ static int ing_filter(struct sk_buff *skb) struct Qdisc *q; struct net_device *dev = skb->dev; int result = TC_ACT_OK; + u32 ttl = G_TC_RTTL(skb->tc_verd); - if (dev->qdisc_ingress) { - __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, skb->dev->ifindex); - return TC_ACT_SHOT; - } + if (MAX_RED_LOOP < ttl++) { + printk(KERN_WARNING + "Redir loop detected Dropping packet (%d->%d)\n", + skb->iif, dev->ifindex); + return TC_ACT_SHOT; + } + + skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); + spin_lock(&dev->ingress_lock); + if ((q = dev->qdisc_ingress) != NULL) + result = q->enqueue(skb, q); + spin_unlock(&dev->ingress_lock); - skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); + return result; +} + +static inline struct sk_buff *handle_ing(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + if (!skb->dev->qdisc_ingress) + goto out; - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } else { + /* Huh? Why does turning on AF_PACKET affect this? */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + } + switch (ing_filter(skb)) { + case TC_ACT_SHOT: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NULL; } - return result; +out: + skb->tc_verd = 0; + return skb; } #endif @@ -2021,21 +2044,9 @@ int netif_receive_skb(struct sk_buff *skb) } #ifdef CONFIG_NET_CLS_ACT - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; /* noone else should process this after*/ - } else { - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - } - - ret = ing_filter(skb); - - if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { - kfree_skb(skb); + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) goto out; - } - - skb->tc_verd = 0; ncls: #endif diff --git a/net/core/filter.c b/net/core/filter.c index bd903aaf7aa..e0a06942c02 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -387,6 +387,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen) } /** + * sk_filter_rcu_release: Release a socket filter by rcu_head + * @rcu: rcu_head that contains the sk_filter to free + */ +static void sk_filter_rcu_release(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + + sk_filter_release(fp); +} + +static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) +{ + unsigned int size = sk_filter_len(fp); + + atomic_sub(size, &sk->sk_omem_alloc); + call_rcu_bh(&fp->rcu, sk_filter_rcu_release); +} + +/** * sk_attach_filter - attach a socket filter * @fprog: the filter program * @sk: the socket to use @@ -398,7 +417,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp; + struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; @@ -418,19 +437,35 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) fp->len = fprog->len; err = sk_chk_filter(fp->insns, fp->len); - if (!err) { - struct sk_filter *old_fp; - - rcu_read_lock_bh(); - old_fp = rcu_dereference(sk->sk_filter); - rcu_assign_pointer(sk->sk_filter, fp); - rcu_read_unlock_bh(); - fp = old_fp; + if (err) { + sk_filter_uncharge(sk, fp); + return err; } - if (fp) - sk_filter_release(sk, fp); - return err; + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); + + if (old_fp) + sk_filter_delayed_uncharge(sk, old_fp); + return 0; +} + +int sk_detach_filter(struct sock *sk) +{ + int ret = -ENOENT; + struct sk_filter *filter; + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { + rcu_assign_pointer(sk->sk_filter, NULL); + sk_filter_delayed_uncharge(sk, filter); + ret = 0; + } + rcu_read_unlock_bh(); + return ret; } EXPORT_SYMBOL(sk_chk_filter); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 590a767b029..daadbcc4e8d 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -15,7 +15,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/bitops.h> +#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c52df858d0b..05979e35696 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -481,6 +481,8 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, if (!creat) goto out; + ASSERT_RTNL(); + n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; @@ -1436,6 +1438,9 @@ int neigh_table_clear(struct neigh_table *tbl) free_percpu(tbl->stats); tbl->stats = NULL; + kmem_cache_destroy(tbl->kmem_cachep); + tbl->kmem_cachep = NULL; + return 0; } @@ -2494,7 +2499,6 @@ static struct neigh_sysctl_table { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_NEIGH_RETRANS_TIME, .procname = "retrans_time", .maxlen = sizeof(int), .mode = 0644, @@ -2539,27 +2543,40 @@ static struct neigh_sysctl_table { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_NEIGH_ANYCAST_DELAY, .procname = "anycast_delay", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_PROXY_DELAY, .procname = "proxy_delay", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_LOCKTIME, .procname = "locktime", .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_userhz_jiffies, }, { + .ctl_name = NET_NEIGH_RETRANS_TIME_MS, + .procname = "retrans_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, + { + .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, + .procname = "base_reachable_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, + { .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), @@ -2588,22 +2605,7 @@ static struct neigh_sysctl_table { .mode = 0644, .proc_handler = &proc_dointvec, }, - { - .ctl_name = NET_NEIGH_RETRANS_TIME_MS, - .procname = "retrans_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, - }, - { - .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, - .procname = "base_reachable_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, - }, + {} }, .neigh_dev = { { @@ -2656,42 +2658,48 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[9].data = &p->anycast_delay; t->neigh_vars[10].data = &p->proxy_delay; t->neigh_vars[11].data = &p->locktime; + t->neigh_vars[12].data = &p->retrans_time; + t->neigh_vars[13].data = &p->base_reachable_time; if (dev) { dev_name_source = dev->name; t->neigh_dev[0].ctl_name = dev->ifindex; - t->neigh_vars[12].procname = NULL; - t->neigh_vars[13].procname = NULL; - t->neigh_vars[14].procname = NULL; - t->neigh_vars[15].procname = NULL; + /* Terminate the table early */ + memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); } else { dev_name_source = t->neigh_dev[0].procname; - t->neigh_vars[12].data = (int *)(p + 1); - t->neigh_vars[13].data = (int *)(p + 1) + 1; - t->neigh_vars[14].data = (int *)(p + 1) + 2; - t->neigh_vars[15].data = (int *)(p + 1) + 3; + t->neigh_vars[14].data = (int *)(p + 1); + t->neigh_vars[15].data = (int *)(p + 1) + 1; + t->neigh_vars[16].data = (int *)(p + 1) + 2; + t->neigh_vars[17].data = (int *)(p + 1) + 3; } - t->neigh_vars[16].data = &p->retrans_time; - t->neigh_vars[17].data = &p->base_reachable_time; if (handler || strategy) { /* RetransTime */ t->neigh_vars[3].proc_handler = handler; t->neigh_vars[3].strategy = strategy; t->neigh_vars[3].extra1 = dev; + if (!strategy) + t->neigh_vars[3].ctl_name = CTL_UNNUMBERED; /* ReachableTime */ t->neigh_vars[4].proc_handler = handler; t->neigh_vars[4].strategy = strategy; t->neigh_vars[4].extra1 = dev; + if (!strategy) + t->neigh_vars[4].ctl_name = CTL_UNNUMBERED; /* RetransTime (in milliseconds)*/ - t->neigh_vars[16].proc_handler = handler; - t->neigh_vars[16].strategy = strategy; - t->neigh_vars[16].extra1 = dev; + t->neigh_vars[12].proc_handler = handler; + t->neigh_vars[12].strategy = strategy; + t->neigh_vars[12].extra1 = dev; + if (!strategy) + t->neigh_vars[12].ctl_name = CTL_UNNUMBERED; /* ReachableTime (in milliseconds) */ - t->neigh_vars[17].proc_handler = handler; - t->neigh_vars[17].strategy = strategy; - t->neigh_vars[17].extra1 = dev; + t->neigh_vars[13].proc_handler = handler; + t->neigh_vars[13].strategy = strategy; + t->neigh_vars[13].extra1 = dev; + if (!strategy) + t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; } dev_name = kstrdup(dev_name_source, GFP_KERNEL); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 95daba62496..bf8d18f1b01 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -67,7 +67,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); netif_tx_lock(dev); if ((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb->queue_mapping)) || + netif_subqueue_stopped(dev, skb)) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); netif_tx_unlock(dev); @@ -269,7 +269,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (netif_tx_trylock(dev)) { if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb->queue_mapping)) + !netif_subqueue_stopped(dev, skb)) status = dev->hard_start_xmit(skb, dev); netif_tx_unlock(dev); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2100c734b10..de33f36947e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -6,7 +6,7 @@ * * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Ben Greear <greearb@candelatech.com> - * Jens Låås <jens.laas@data.slu.se> + * Jens Låås <jens.laas@data.slu.se> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,7 +161,7 @@ #endif #include <asm/byteorder.h> #include <linux/rcupdate.h> -#include <asm/bitops.h> +#include <linux/bitops.h> #include <asm/io.h> #include <asm/dma.h> #include <asm/uaccess.h> @@ -2454,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) spin_lock(&x->lock); iph = ip_hdr(skb); - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; err = x->type->output(x, skb); @@ -2603,8 +2603,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); - skb->queue_mapping = pkt_dev->cur_queue_map; - + skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2941,8 +2940,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); - skb->queue_mapping = pkt_dev->cur_queue_map; - + skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3385,7 +3383,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if ((netif_queue_stopped(odev) || (pkt_dev->skb && - netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping))) || + netif_subqueue_stopped(odev, pkt_dev->skb))) || need_resched()) { idle_start = getCurUs(); @@ -3402,7 +3400,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; if (netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { + netif_subqueue_stopped(odev, pkt_dev->skb)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3431,7 +3429,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) netif_tx_lock_bh(odev); if (!netif_queue_stopped(odev) && - !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) { + !netif_subqueue_stopped(odev, pkt_dev->skb)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3514,7 +3512,7 @@ static int pktgen_thread_worker(void *arg) init_waitqueue_head(&t->queue); - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); + pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); set_current_state(TASK_INTERRUPTIBLE); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1072d16696c..4a2640d3826 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -744,10 +744,10 @@ static struct net *get_net_ns_by_pid(pid_t pid) rcu_read_lock(); tsk = find_task_by_pid(pid); if (tsk) { - task_lock(tsk); - if (tsk->nsproxy) - net = get_net(tsk->nsproxy->net_ns); - task_unlock(tsk); + struct nsproxy *nsproxy; + nsproxy = task_nsproxy(tsk); + if (nsproxy) + net = get_net(nsproxy->net_ns); } rcu_read_unlock(); return net; diff --git a/net/core/scm.c b/net/core/scm.c index 530bee8d9ed..100ba6d9d47 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -24,6 +24,8 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/security.h> +#include <linux/pid.h> +#include <linux/nsproxy.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -42,7 +44,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) { - if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && ((creds->uid == current->uid || creds->uid == current->euid || creds->uid == current->suid) || capable(CAP_SETUID)) && ((creds->gid == current->gid || creds->gid == current->egid || diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 944189d9632..4e2c84fcf27 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -362,6 +362,97 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +{ + new->tstamp = old->tstamp; + new->dev = old->dev; + new->transport_header = old->transport_header; + new->network_header = old->network_header; + new->mac_header = old->mac_header; + new->dst = dst_clone(old->dst); +#ifdef CONFIG_INET + new->sp = secpath_get(old->sp); +#endif + memcpy(new->cb, old->cb, sizeof(old->cb)); + new->csum_start = old->csum_start; + new->csum_offset = old->csum_offset; + new->local_df = old->local_df; + new->pkt_type = old->pkt_type; + new->ip_summed = old->ip_summed; + skb_copy_queue_mapping(new, old); + new->priority = old->priority; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; +#endif + new->protocol = old->protocol; + new->mark = old->mark; + __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif +#ifdef CONFIG_NET_SCHED + new->tc_index = old->tc_index; +#ifdef CONFIG_NET_CLS_ACT + new->tc_verd = old->tc_verd; +#endif +#endif + skb_copy_secmark(new, old); +} + +static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) +{ +#define C(x) n->x = skb->x + + n->next = n->prev = NULL; + n->sk = NULL; + __copy_skb_header(n, skb); + + C(len); + C(data_len); + C(mac_len); + n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->nohdr = 0; + n->destructor = NULL; +#ifdef CONFIG_NET_CLS_ACT + /* FIXME What is this and why don't we do it in copy_skb_header? */ + n->tc_verd = SET_TC_VERD(n->tc_verd,0); + n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); + n->tc_verd = CLR_TC_MUNGED(n->tc_verd); + C(iif); +#endif + C(truesize); + atomic_set(&n->users, 1); + C(head); + C(data); + C(tail); + C(end); + + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; + + return n; +#undef C +} + +/** + * skb_morph - morph one skb into another + * @dst: the skb to receive the contents + * @src: the skb to supply the contents + * + * This is identical to skb_clone except that the target skb is + * supplied by the user. + * + * The target skb is returned upon exit. + */ +struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) +{ + skb_release_data(dst); + return __skb_clone(dst, src); +} +EXPORT_SYMBOL_GPL(skb_morph); + /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -393,66 +484,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_UNAVAILABLE; } -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->sk = NULL; - C(tstamp); - C(dev); - C(transport_header); - C(network_header); - C(mac_header); - C(dst); - dst_clone(skb->dst); - C(sp); -#ifdef CONFIG_INET - secpath_get(skb->sp); -#endif - memcpy(n->cb, skb->cb, sizeof(skb->cb)); - C(len); - C(data_len); - C(mac_len); - C(csum); - C(local_df); - n->cloned = 1; - n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; - n->nohdr = 0; - C(pkt_type); - C(ip_summed); - skb_copy_queue_mapping(n, skb); - C(priority); -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - C(ipvs_property); -#endif - C(protocol); - n->destructor = NULL; - C(mark); - __nf_copy(n, skb); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - C(nf_trace); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#ifdef CONFIG_NET_CLS_ACT - n->tc_verd = SET_TC_VERD(skb->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - C(iif); -#endif -#endif - skb_copy_secmark(n, skb); - C(truesize); - atomic_set(&n->users, 1); - C(head); - C(data); - C(tail); - C(end); - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; + return __skb_clone(n, skb); } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) @@ -463,50 +495,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; #endif - new->sk = NULL; - new->dev = old->dev; - skb_copy_queue_mapping(new, old); - new->priority = old->priority; - new->protocol = old->protocol; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET - new->sp = secpath_get(old->sp); -#endif - new->csum_start = old->csum_start; - new->csum_offset = old->csum_offset; - new->ip_summed = old->ip_summed; - new->transport_header = old->transport_header; - new->network_header = old->network_header; - new->mac_header = old->mac_header; + + __copy_skb_header(new, old); + #ifndef NET_SKBUFF_DATA_USES_OFFSET /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; new->mac_header += offset; #endif - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->local_df = old->local_df; - new->fclone = SKB_FCLONE_UNAVAILABLE; - new->pkt_type = old->pkt_type; - new->tstamp = old->tstamp; - new->destructor = NULL; - new->mark = old->mark; - __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - new->nf_trace = old->nf_trace; -#endif -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - new->ipvs_property = old->ipvs_property; -#endif -#ifdef CONFIG_NET_SCHED -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif - new->tc_index = old->tc_index; -#endif - skb_copy_secmark(new, old); - atomic_set(&new->users, 1); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -685,7 +682,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->transport_header += off; skb->network_header += off; skb->mac_header += off; - skb->csum_start += off; + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -2048,7 +2045,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > 0) { if (copy > len) copy = len; - sg[elt].page = virt_to_page(skb->data + offset); + sg_set_page(&sg[elt], virt_to_page(skb->data + offset)); sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg[elt].length = copy; elt++; @@ -2068,7 +2065,7 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > len) copy = len; - sg[elt].page = frag->page; + sg_set_page(&sg[elt], frag->page); sg[elt].offset = frag->page_offset+offset-start; sg[elt].length = copy; elt++; diff --git a/net/core/sock.c b/net/core/sock.c index d45ecdccc6a..febbcbcf802 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -232,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", - current->comm, current->pid); + current->comm, task_pid_nr(current)); return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -428,7 +428,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk=sock->sk; - struct sk_filter *filter; int val; int valbool; struct linger ling; @@ -652,16 +651,7 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_release(sk, filter); - rcu_read_unlock_bh(); - break; - } - rcu_read_unlock_bh(); - ret = -ENONET; + ret = sk_detach_filter(sk); break; case SO_PASSSEC: @@ -925,7 +915,7 @@ void sk_free(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { - sk_filter_release(sk, filter); + sk_filter_uncharge(sk, filter); rcu_assign_pointer(sk->sk_filter, NULL); } diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 0f3745585a9..d8a3509b26f 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -68,3 +68,4 @@ module_exit(dccp_diag_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP inet_diag handler"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK); diff --git a/net/dccp/input.c b/net/dccp/input.c index 19d7e1dbd87..3560a2a875a 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -19,6 +19,9 @@ #include "ccid.h" #include "dccp.h" +/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ +int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; + static void dccp_fin(struct sock *sk, struct sk_buff *skb) { sk->sk_shutdown |= RCV_SHUTDOWN; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 44f6e17e105..222549ab274 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1037,8 +1037,8 @@ module_exit(dccp_v4_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); -MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 006a3834fbc..bbadd6681b8 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -767,10 +767,9 @@ discard: return 0; } -static int dccp_v6_rcv(struct sk_buff **pskb) +static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; - struct sk_buff *skb = *pskb; struct sock *sk; int min_cov; @@ -1220,8 +1219,8 @@ module_exit(dccp_v6_exit); * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP */ -MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-33-type-6"); -MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-0-type-6"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 9364b2fb4db..c62c05039f6 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -18,9 +18,6 @@ #error This file should not be compiled without CONFIG_SYSCTL defined #endif -/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ -int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; - static struct ctl_table dccp_default_table[] = { { .procname = "seq_window", diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index f7fba7721e6..43fcd29046d 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -88,12 +88,12 @@ static void dnrmg_send_peer(struct sk_buff *skb) static unsigned int dnrmg_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - dnrmg_send_peer(*pskb); + dnrmg_send_peer(skb); return NF_ACCEPT; } diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 6cc54eeca3e..c796661a021 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -390,9 +390,7 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[3] = crc >> 24; crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = len + 4; + sg_init_one(&sg, pos, len + 4); return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } @@ -485,9 +483,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 12; crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = plen + 4; + sg_init_one(&sg, pos, plen + 4); if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { if (net_ratelimit()) { printk(KERN_DEBUG ": TKIP: failed to decrypt " @@ -539,11 +535,12 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); return -1; } - sg[0].page = virt_to_page(hdr); + sg_init_table(sg, 2); + sg_set_page(&sg[0], virt_to_page(hdr)); sg[0].offset = offset_in_page(hdr); sg[0].length = 16; - sg[1].page = virt_to_page(data); + sg_set_page(&sg[1], virt_to_page(data)); sg[1].offset = offset_in_page(data); sg[1].length = data_len; @@ -586,7 +583,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) if (stype & IEEE80211_STYPE_QOS_DATA) { const struct ieee80211_hdr_3addrqos *qoshdr = (struct ieee80211_hdr_3addrqos *)skb->data; - hdr[12] = qoshdr->qos_ctl & cpu_to_le16(IEEE80211_QCTL_TID); + hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; } else hdr[12] = 0; /* priority */ diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 8d182459344..0af6103d715 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -170,9 +170,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[3] = crc >> 24; crypto_blkcipher_setkey(wep->tx_tfm, key, klen); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = len + 4; + sg_init_one(&sg, pos, len + 4); return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } @@ -212,9 +210,7 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 8; crypto_blkcipher_setkey(wep->rx_tfm, key, klen); - sg.page = virt_to_page(pos); - sg.offset = offset_in_page(pos); - sg.length = plen + 4; + sg_init_one(&sg, pos, plen + 4); if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) return -7; diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 9b58dd67acb..d309e8f1999 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -409,7 +409,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, (*crypt)->priv); sec.flags |= (1 << key); /* This ensures a key will be activated if no key is - * explicitely set */ + * explicitly set */ if (key == sec.active_key) sec.flags |= SEC_ACTIVE_KEY; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d894f616c3d..9f9fd2c6f6e 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -560,7 +560,7 @@ config TCP_CONG_ILLINOIS depends on EXPERIMENTAL default n ---help--- - TCP-Illinois is a sender-side modificatio of TCP Reno for + TCP-Illinois is a sender-side modification of TCP Reno for high speed long delay links. It uses round-trip-time to adjust the alpha and beta parameters to achieve a higher average throughput and maintain fairness. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a02c36d0a13..93fe3966805 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ + inet_fragment.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 81a8285d6d6..8d8c2915e06 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -54,7 +54,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/bitops.h> +#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3cef12835c4..8fb6ca23700 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -93,7 +93,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, int remaining, rover, low, high; inet_get_local_port_range(&low, &high); - remaining = high - low; + remaining = (high - low) + 1; rover = net_random() % remaining + low; do { diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7eb83ebed2e..dc429b6b0ba 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -815,6 +815,12 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; +#ifdef CONFIG_KMOD + if (inet_diag_table[nlh->nlmsg_type] == NULL) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_INET_DIAG, nlh->nlmsg_type); +#endif + if (inet_diag_table[nlh->nlmsg_type] == NULL) return -ENOENT; @@ -914,3 +920,4 @@ static void __exit inet_diag_exit(void) module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c new file mode 100644 index 00000000000..e15e04fc666 --- /dev/null +++ b/net/ipv4/inet_fragment.c @@ -0,0 +1,261 @@ +/* + * inet fragments management + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Pavel Emelyanov <xemul@openvz.org> + * Started as consolidation of ipv4/ip_fragment.c, + * ipv6/reassembly. and ipv6 nf conntrack reassembly + */ + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/random.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> + +#include <net/inet_frag.h> + +static void inet_frag_secret_rebuild(unsigned long dummy) +{ + struct inet_frags *f = (struct inet_frags *)dummy; + unsigned long now = jiffies; + int i; + + write_lock(&f->lock); + get_random_bytes(&f->rnd, sizeof(u32)); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { + struct inet_frag_queue *q; + struct hlist_node *p, *n; + + hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { + unsigned int hval = f->hashfn(q); + + if (hval != i) { + hlist_del(&q->list); + + /* Relink to new hash chain. */ + hlist_add_head(&q->list, &f->hash[hval]); + } + } + } + write_unlock(&f->lock); + + mod_timer(&f->secret_timer, now + f->ctl->secret_interval); +} + +void inet_frags_init(struct inet_frags *f) +{ + int i; + + for (i = 0; i < INETFRAGS_HASHSZ; i++) + INIT_HLIST_HEAD(&f->hash[i]); + + INIT_LIST_HEAD(&f->lru_list); + rwlock_init(&f->lock); + + f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + f->nqueues = 0; + atomic_set(&f->mem, 0); + + init_timer(&f->secret_timer); + f->secret_timer.function = inet_frag_secret_rebuild; + f->secret_timer.data = (unsigned long)f; + f->secret_timer.expires = jiffies + f->ctl->secret_interval; + add_timer(&f->secret_timer); +} +EXPORT_SYMBOL(inet_frags_init); + +void inet_frags_fini(struct inet_frags *f) +{ + del_timer(&f->secret_timer); +} +EXPORT_SYMBOL(inet_frags_fini); + +static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +{ + write_lock(&f->lock); + hlist_del(&fq->list); + list_del(&fq->lru_list); + f->nqueues--; + write_unlock(&f->lock); +} + +void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq, f); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } +} + +EXPORT_SYMBOL(inet_frag_kill); + +static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, + int *work) +{ + if (work) + *work -= skb->truesize; + + atomic_sub(skb->truesize, &f->mem); + if (f->skb_free) + f->skb_free(skb); + kfree_skb(skb); +} + +void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, + int *work) +{ + struct sk_buff *fp; + + BUG_TRAP(q->last_in & COMPLETE); + BUG_TRAP(del_timer(&q->timer) == 0); + + /* Release all fragment data. */ + fp = q->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(f, fp, work); + fp = xp; + } + + if (work) + *work -= f->qsize; + atomic_sub(f->qsize, &f->mem); + + if (f->destructor) + f->destructor(q); + kfree(q); + +} +EXPORT_SYMBOL(inet_frag_destroy); + +int inet_frag_evictor(struct inet_frags *f) +{ + struct inet_frag_queue *q; + int work, evicted = 0; + + work = atomic_read(&f->mem) - f->ctl->low_thresh; + while (work > 0) { + read_lock(&f->lock); + if (list_empty(&f->lru_list)) { + read_unlock(&f->lock); + break; + } + + q = list_first_entry(&f->lru_list, + struct inet_frag_queue, lru_list); + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + + spin_lock(&q->lock); + if (!(q->last_in & COMPLETE)) + inet_frag_kill(q, f); + spin_unlock(&q->lock); + + if (atomic_dec_and_test(&q->refcnt)) + inet_frag_destroy(q, f, &work); + evicted++; + } + + return evicted; +} +EXPORT_SYMBOL(inet_frag_evictor); + +static struct inet_frag_queue *inet_frag_intern(struct inet_frag_queue *qp_in, + struct inet_frags *f, unsigned int hash, void *arg) +{ + struct inet_frag_queue *qp; +#ifdef CONFIG_SMP + struct hlist_node *n; +#endif + + write_lock(&f->lock); +#ifdef CONFIG_SMP + /* With SMP race we have to recheck hash table, because + * such entry could be created on other cpu, while we + * promoted read lock to write lock. + */ + hlist_for_each_entry(qp, n, &f->hash[hash], list) { + if (f->match(qp, arg)) { + atomic_inc(&qp->refcnt); + write_unlock(&f->lock); + qp_in->last_in |= COMPLETE; + inet_frag_put(qp_in, f); + return qp; + } + } +#endif + qp = qp_in; + if (!mod_timer(&qp->timer, jiffies + f->ctl->timeout)) + atomic_inc(&qp->refcnt); + + atomic_inc(&qp->refcnt); + hlist_add_head(&qp->list, &f->hash[hash]); + list_add_tail(&qp->lru_list, &f->lru_list); + f->nqueues++; + write_unlock(&f->lock); + return qp; +} + +static struct inet_frag_queue *inet_frag_alloc(struct inet_frags *f, void *arg) +{ + struct inet_frag_queue *q; + + q = kzalloc(f->qsize, GFP_ATOMIC); + if (q == NULL) + return NULL; + + f->constructor(q, arg); + atomic_add(f->qsize, &f->mem); + setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + spin_lock_init(&q->lock); + atomic_set(&q->refcnt, 1); + + return q; +} + +static struct inet_frag_queue *inet_frag_create(struct inet_frags *f, + void *arg, unsigned int hash) +{ + struct inet_frag_queue *q; + + q = inet_frag_alloc(f, arg); + if (q == NULL) + return NULL; + + return inet_frag_intern(q, f, hash, arg); +} + +struct inet_frag_queue *inet_frag_find(struct inet_frags *f, void *key, + unsigned int hash) +{ + struct inet_frag_queue *q; + struct hlist_node *n; + + read_lock(&f->lock); + hlist_for_each_entry(q, n, &f->hash[hash], list) { + if (f->match(q, key)) { + atomic_inc(&q->refcnt); + read_unlock(&f->lock); + return q; + } + } + read_unlock(&f->lock); + + return inet_frag_create(f, key, hash); +} +EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fac6398e436..16eecc7046a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -286,7 +286,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high); - remaining = high - low; + remaining = (high - low) + 1; local_bh_disable(); for (i = 1; i <= remaining; i++) { diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index afbf938836f..877da3ed52e 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -40,7 +40,7 @@ #include <net/route.h> #include <net/xfrm.h> -static inline int ip_forward_finish(struct sk_buff *skb) +static int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fabb86db763..2143bf30597 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -39,6 +39,7 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> +#include <net/inet_frag.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> @@ -49,21 +50,8 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - int sysctl_ipfrag_max_dist __read_mostly = 64; -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; - struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -74,153 +62,123 @@ struct ipfrag_skb_cb /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; + u32 user; __be32 saddr; __be32 daddr; __be16 id; u8 protocol; - u8 last_in; -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 - - struct sk_buff *fragments; /* linked list of received fragments */ - int len; /* total length of original datagram */ - int meat; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* when will this queue expire? */ - ktime_t stamp; int iif; unsigned int rid; struct inet_peer *peer; }; -/* Hash table. */ +struct inet_frags_ctl ip4_frags_ctl __read_mostly = { + /* + * Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to + * measurably harm machine performance. + */ + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, -#define IPQ_HASHSZ 64 + /* + * Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival + * by TTL. + */ + .timeout = IP_FRAG_TIME, + .secret_interval = 10 * 60 * HZ, +}; -/* Per-bucket lock is easy to add now. */ -static struct hlist_head ipq_hash[IPQ_HASHSZ]; -static DEFINE_RWLOCK(ipfrag_lock); -static u32 ipfrag_hash_rnd; -static LIST_HEAD(ipq_lru_list); -int ip_frag_nqueues = 0; +static struct inet_frags ip4_frags; -static __inline__ void __ipq_unlink(struct ipq *qp) +int ip_frag_nqueues(void) { - hlist_del(&qp->list); - list_del(&qp->lru_list); - ip_frag_nqueues--; + return ip4_frags.nqueues; } -static __inline__ void ipq_unlink(struct ipq *ipq) +int ip_frag_mem(void) { - write_lock(&ipfrag_lock); - __ipq_unlink(ipq); - write_unlock(&ipfrag_lock); + return atomic_read(&ip4_frags.mem); } +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev); + +struct ip4_create_arg { + struct iphdr *iph; + u32 user; +}; + static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static unsigned int ip4_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; + struct ipq *ipq; - write_lock(&ipfrag_lock); - get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); - for (i = 0; i < IPQ_HASHSZ; i++) { - struct ipq *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, - q->daddr, q->protocol); - - if (hval != i) { - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->list, &ipq_hash[hval]); - } - } - } - write_unlock(&ipfrag_lock); - - mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); + ipq = container_of(q, struct ipq, q); + return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); } -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ +static int ip4_frag_match(struct inet_frag_queue *q, void *a) +{ + struct ipq *qp; + struct ip4_create_arg *arg = a; + + qp = container_of(q, struct ipq, q); + return (qp->id == arg->iph->id && + qp->saddr == arg->iph->saddr && + qp->daddr == arg->iph->daddr && + qp->protocol == arg->iph->protocol && + qp->user == arg->user); +} /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip_frag_mem); + atomic_sub(skb->truesize, &ip4_frags.mem); kfree_skb(skb); } -static __inline__ void frag_free_queue(struct ipq *qp, int *work) +static void ip4_frag_init(struct inet_frag_queue *q, void *a) { - if (work) - *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip_frag_mem); - kfree(qp); + struct ipq *qp = container_of(q, struct ipq, q); + struct ip4_create_arg *arg = a; + + qp->protocol = arg->iph->protocol; + qp->id = arg->iph->id; + qp->saddr = arg->iph->saddr; + qp->daddr = arg->iph->daddr; + qp->user = arg->user; + qp->peer = sysctl_ipfrag_max_dist ? + inet_getpeer(arg->iph->saddr, 1) : NULL; } -static __inline__ struct ipq *frag_alloc_queue(void) +static __inline__ void ip4_frag_free(struct inet_frag_queue *q) { - struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); - - if (!qp) - return NULL; - atomic_add(sizeof(struct ipq), &ip_frag_mem); - return qp; -} - - -/* Destruction primitives. */ - -/* Complete destruction of ipq. */ -static void ip_frag_destroy(struct ipq *qp, int *work) -{ - struct sk_buff *fp; - - BUG_TRAP(qp->last_in&COMPLETE); - BUG_TRAP(del_timer(&qp->timer) == 0); + struct ipq *qp; + qp = container_of(q, struct ipq, q); if (qp->peer) inet_putpeer(qp->peer); +} - /* Release all fragment data. */ - fp = qp->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - /* Finally, release the queue descriptor itself. */ - frag_free_queue(qp, work); -} +/* Destruction primitives. */ -static __inline__ void ipq_put(struct ipq *ipq, int *work) +static __inline__ void ipq_put(struct ipq *ipq) { - if (atomic_dec_and_test(&ipq->refcnt)) - ip_frag_destroy(ipq, work); + inet_frag_put(&ipq->q, &ip4_frags); } /* Kill ipq entry. It is not destroyed immediately, @@ -228,14 +186,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work) */ static void ipq_kill(struct ipq *ipq) { - if (del_timer(&ipq->timer)) - atomic_dec(&ipq->refcnt); - - if (!(ipq->last_in & COMPLETE)) { - ipq_unlink(ipq); - atomic_dec(&ipq->refcnt); - ipq->last_in |= COMPLETE; - } + inet_frag_kill(&ipq->q, &ip4_frags); } /* Memory limiting on fragments. Evictor trashes the oldest @@ -243,33 +194,11 @@ static void ipq_kill(struct ipq *ipq) */ static void ip_evictor(void) { - struct ipq *qp; - struct list_head *tmp; - int work; - - work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; - if (work <= 0) - return; - - while (work > 0) { - read_lock(&ipfrag_lock); - if (list_empty(&ipq_lru_list)) { - read_unlock(&ipfrag_lock); - return; - } - tmp = ipq_lru_list.next; - qp = list_entry(tmp, struct ipq, lru_list); - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); - - spin_lock(&qp->lock); - if (!(qp->last_in&COMPLETE)) - ipq_kill(qp); - spin_unlock(&qp->lock); + int evicted; - ipq_put(qp, &work); - IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - } + evicted = inet_frag_evictor(&ip4_frags); + if (evicted) + IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); } /* @@ -277,11 +206,13 @@ static void ip_evictor(void) */ static void ip_expire(unsigned long arg) { - struct ipq *qp = (struct ipq *) arg; + struct ipq *qp; + + qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); - spin_lock(&qp->lock); + spin_lock(&qp->q.lock); - if (qp->last_in & COMPLETE) + if (qp->q.last_in & COMPLETE) goto out; ipq_kill(qp); @@ -289,8 +220,8 @@ static void ip_expire(unsigned long arg) IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { - struct sk_buff *head = qp->fragments; + if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { + struct sk_buff *head = qp->q.fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); @@ -298,121 +229,34 @@ static void ip_expire(unsigned long arg) } } out: - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); } -/* Creation primitives. */ - -static struct ipq *ip_frag_intern(struct ipq *qp_in) +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - struct ipq *qp; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif + struct inet_frag_queue *q; + struct ip4_create_arg arg; unsigned int hash; - write_lock(&ipfrag_lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, - qp_in->protocol); -#ifdef CONFIG_SMP - /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * promoted read lock to write lock. - */ - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { - if (qp->id == qp_in->id && - qp->saddr == qp_in->saddr && - qp->daddr == qp_in->daddr && - qp->protocol == qp_in->protocol && - qp->user == qp_in->user) { - atomic_inc(&qp->refcnt); - write_unlock(&ipfrag_lock); - qp_in->last_in |= COMPLETE; - ipq_put(qp_in, NULL); - return qp; - } - } -#endif - qp = qp_in; - - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) - atomic_inc(&qp->refcnt); - - atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &ipq_hash[hash]); - INIT_LIST_HEAD(&qp->lru_list); - list_add_tail(&qp->lru_list, &ipq_lru_list); - ip_frag_nqueues++; - write_unlock(&ipfrag_lock); - return qp; -} - -/* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) -{ - struct ipq *qp; + arg.iph = iph; + arg.user = user; + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); - if ((qp = frag_alloc_queue()) == NULL) + q = inet_frag_find(&ip4_frags, &arg, hash); + if (q == NULL) goto out_nomem; - qp->protocol = iph->protocol; - qp->last_in = 0; - qp->id = iph->id; - qp->saddr = iph->saddr; - qp->daddr = iph->daddr; - qp->user = user; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; - qp->iif = 0; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; - - /* Initialize a timer for this entry. */ - init_timer(&qp->timer); - qp->timer.data = (unsigned long) qp; /* pointer to queue */ - qp->timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->lock); - atomic_set(&qp->refcnt, 1); - - return ip_frag_intern(qp); + return container_of(q, struct ipq, q); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); return NULL; } -/* Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and create new one, if nothing is found. - */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) -{ - __be16 id = iph->id; - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; - __u8 protocol = iph->protocol; - unsigned int hash; - struct ipq *qp; - struct hlist_node *n; - - read_lock(&ipfrag_lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { - if (qp->id == id && - qp->saddr == saddr && - qp->daddr == daddr && - qp->protocol == protocol && - qp->user == user) { - atomic_inc(&qp->refcnt); - read_unlock(&ipfrag_lock); - return qp; - } - } - read_unlock(&ipfrag_lock); - - return ip_frag_create(iph, user); -} - /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { @@ -429,7 +273,7 @@ static inline int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->fragments && (end - start) > max; + rc = qp->q.fragments && (end - start) > max; if (rc) { IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); @@ -442,39 +286,42 @@ static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { - atomic_inc(&qp->refcnt); + if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { + atomic_inc(&qp->q.refcnt); return -ETIMEDOUT; } - fp = qp->fragments; + fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; frag_kfree_skb(fp, NULL); fp = xp; } while (fp); - qp->last_in = 0; - qp->len = 0; - qp->meat = 0; - qp->fragments = NULL; + qp->q.last_in = 0; + qp->q.len = 0; + qp->q.meat = 0; + qp->q.fragments = NULL; qp->iif = 0; return 0; } /* Add new segment to existing queue. */ -static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) +static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct sk_buff *prev, *next; + struct net_device *dev; int flags, offset; int ihl, end; + int err = -ENOENT; - if (qp->last_in & COMPLETE) + if (qp->q.last_in & COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && - unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + unlikely(ip_frag_too_far(qp)) && + unlikely(err = ip_frag_reinit(qp))) { ipq_kill(qp); goto err; } @@ -487,36 +334,40 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Determine the position of this fragment. */ end = offset + skb->len - ihl; + err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrrupted. */ - if (end < qp->len || - ((qp->last_in & LAST_IN) && end != qp->len)) + if (end < qp->q.len || + ((qp->q.last_in & LAST_IN) && end != qp->q.len)) goto err; - qp->last_in |= LAST_IN; - qp->len = end; + qp->q.last_in |= LAST_IN; + qp->q.len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } - if (end > qp->len) { + if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->last_in & LAST_IN) + if (qp->q.last_in & LAST_IN) goto err; - qp->len = end; + qp->q.len = end; } } if (end == offset) goto err; + err = -ENOMEM; if (pskb_pull(skb, ihl) == NULL) goto err; - if (pskb_trim_rcsum(skb, end-offset)) + + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto err; /* Find out which fragments are in front and at the back of us @@ -524,7 +375,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * this fragment, right? */ prev = NULL; - for (next = qp->fragments; next != NULL; next = next->next) { + for (next = qp->q.fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -539,8 +390,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (i > 0) { offset += i; + err = -EINVAL; if (end <= offset) goto err; + err = -ENOMEM; if (!pskb_pull(skb, i)) goto err; if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -548,6 +401,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } } + err = -ENOMEM; + while (next && FRAG_CB(next)->offset < end) { int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ @@ -558,7 +413,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!pskb_pull(next, i)) goto err; FRAG_CB(next)->offset += i; - qp->meat -= i; + qp->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -573,9 +428,9 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = next; else - qp->fragments = next; + qp->q.fragments = next; - qp->meat -= free_it->len; + qp->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -587,46 +442,71 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = skb; else - qp->fragments = skb; - - if (skb->dev) - qp->iif = skb->dev->ifindex; - skb->dev = NULL; - qp->stamp = skb->tstamp; - qp->meat += skb->len; - atomic_add(skb->truesize, &ip_frag_mem); + qp->q.fragments = skb; + + dev = skb->dev; + if (dev) { + qp->iif = dev->ifindex; + skb->dev = NULL; + } + qp->q.stamp = skb->tstamp; + qp->q.meat += skb->len; + atomic_add(skb->truesize, &ip4_frags.mem); if (offset == 0) - qp->last_in |= FIRST_IN; + qp->q.last_in |= FIRST_IN; - write_lock(&ipfrag_lock); - list_move_tail(&qp->lru_list, &ipq_lru_list); - write_unlock(&ipfrag_lock); + if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) + return ip_frag_reasm(qp, prev, dev); - return; + write_lock(&ip4_frags.lock); + list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); + write_unlock(&ip4_frags.lock); + return -EINPROGRESS; err: kfree_skb(skb); + return err; } /* Build a new IP datagram from all its fragments. */ -static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, + struct net_device *dev) { struct iphdr *iph; - struct sk_buff *fp, *head = qp->fragments; + struct sk_buff *fp, *head = qp->q.fragments; int len; int ihlen; + int err; ipq_kill(qp); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + if (!fp) + goto out_nomem; + + fp->next = head->next; + prev->next = fp; + + skb_morph(head, qp->q.fragments); + head->next = qp->q.fragments->next; + + kfree_skb(qp->q.fragments); + qp->q.fragments = head; + } + BUG_TRAP(head != NULL); BUG_TRAP(FRAG_CB(head)->offset == 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); - len = ihlen + qp->len; + len = ihlen + qp->q.len; + err = -E2BIG; if (len > 65535) goto out_oversize; @@ -654,12 +534,12 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip_frag_mem); + atomic_add(clone->truesize, &ip4_frags.mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip_frag_mem); + atomic_sub(head->truesize, &ip4_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -669,23 +549,24 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip_frag_mem); + atomic_sub(fp->truesize, &ip4_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = qp->stamp; + head->tstamp = qp->q.stamp; iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - qp->fragments = NULL; - return head; + qp->q.fragments = NULL; + return 0; out_nomem: LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " "queue %p\n", qp); + err = -ENOMEM; goto out_fail; out_oversize: if (net_ratelimit()) @@ -694,54 +575,49 @@ out_oversize: NIPQUAD(qp->saddr)); out_fail: IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - return NULL; + return err; } /* Process an incoming IP datagram fragment. */ -struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) +int ip_defrag(struct sk_buff *skb, u32 user) { struct ipq *qp; - struct net_device *dev; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) + if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) ip_evictor(); - dev = skb->dev; - /* Lookup (or create) queue header */ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { - struct sk_buff *ret = NULL; + int ret; - spin_lock(&qp->lock); + spin_lock(&qp->q.lock); - ip_frag_queue(qp, skb); + ret = ip_frag_queue(qp, skb); - if (qp->last_in == (FIRST_IN|LAST_IN) && - qp->meat == qp->len) - ret = ip_frag_reasm(qp, dev); - - spin_unlock(&qp->lock); - ipq_put(qp, NULL); + spin_unlock(&qp->q.lock); + ipq_put(qp); return ret; } IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); kfree_skb(skb); - return NULL; + return -ENOMEM; } void __init ipfrag_init(void) { - ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - init_timer(&ipfrag_secret_timer); - ipfrag_secret_timer.function = ipfrag_secret_rebuild; - ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ipfrag_secret_timer); + ip4_frags.ctl = &ip4_frags_ctl; + ip4_frags.hashfn = ip4_hashfn; + ip4_frags.constructor = ip4_frag_init; + ip4_frags.destructor = ip4_frag_free; + ip4_frags.skb_free = NULL; + ip4_frags.qsize = sizeof(struct ipq); + ip4_frags.match = ip4_frag_match; + ip4_frags.frag_expire = ip_expire; + inet_frags_init(&ip4_frags); } EXPORT_SYMBOL(ip_defrag); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 41d8964591e..168c871fcd7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -172,8 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); - if (skb == NULL) { + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); return 1; } @@ -196,7 +195,7 @@ int ip_call_ra_chain(struct sk_buff *skb) return 0; } -static inline int ip_local_deliver_finish(struct sk_buff *skb) +static int ip_local_deliver_finish(struct sk_buff *skb) { __skb_pull(skb, ip_hdrlen(skb)); @@ -265,8 +264,7 @@ int ip_local_deliver(struct sk_buff *skb) */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); - if (!skb) + if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } @@ -326,7 +324,7 @@ drop: return -1; } -static inline int ip_rcv_finish(struct sk_buff *skb) +static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 699f06781fd..f508835ba71 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -202,7 +202,7 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb) skb->dst->dev->mtu : dst_mtu(skb->dst); } -static inline int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 341474eefa5..664cb8e97c1 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -25,6 +25,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/netfilter.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> @@ -328,18 +329,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, spin_unlock(&cp->lock); } -static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, +static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(*pskb); + const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) + if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -360,7 +361,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, if (app->pkt_out == NULL) return 1; - if (!app->pkt_out(app, cp, pskb, &diff)) + if (!app->pkt_out(app, cp, skb, &diff)) return 0; /* @@ -378,7 +379,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, * called by ipvs packet handler, assumes previously checked cp!=NULL * returns false if it can't handle packet (oom) */ -int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) +int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *app; @@ -391,7 +392,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_out(cp, pskb, app); + return app_tcp_pkt_out(cp, skb, app); /* * Call private output hook function @@ -399,22 +400,22 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) if (app->pkt_out == NULL) return 1; - return app->pkt_out(app, cp, pskb, NULL); + return app->pkt_out(app, cp, skb, NULL); } -static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, +static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(*pskb); + const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) + if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -435,7 +436,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, if (app->pkt_in == NULL) return 1; - if (!app->pkt_in(app, cp, pskb, &diff)) + if (!app->pkt_in(app, cp, skb, &diff)) return 0; /* @@ -453,7 +454,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, * called by ipvs packet handler, assumes previously checked cp!=NULL. * returns false if can't handle packet (oom). */ -int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) +int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_app *app; @@ -466,7 +467,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_in(cp, pskb, app); + return app_tcp_pkt_in(cp, skb, app); /* * Call private input hook function @@ -474,7 +475,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) if (app->pkt_in == NULL) return 1; - return app->pkt_in(app, cp, pskb, NULL); + return app->pkt_in(app, cp, skb, NULL); } diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index fbca2a2ff29..c6ed7654e83 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -EXPORT_SYMBOL(ip_vs_make_skb_writable); /* ID used in ICMP lookups */ @@ -163,42 +162,6 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, } -int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) -{ - struct sk_buff *skb = *pskb; - - /* skb is already used, better copy skb and its payload */ - if (unlikely(skb_shared(skb) || skb->sk)) - goto copy_skb; - - /* skb data is already used, copy it */ - if (unlikely(skb_cloned(skb))) - goto copy_data; - - return pskb_may_pull(skb, writable_len); - - copy_data: - if (unlikely(writable_len > skb->len)) - return 0; - return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - - copy_skb: - if (unlikely(writable_len > skb->len)) - return 0; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) - return 0; - BUG_ON(skb_is_nonlinear(skb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(skb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = skb; - return 1; -} - /* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, @@ -525,12 +488,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * for VS/NAT. */ static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->ipvs_property)) + if (!skb->ipvs_property) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ return NF_STOP; @@ -541,13 +504,14 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } -static inline struct sk_buff * -ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) +static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - skb = ip_defrag(skb, user); - if (skb) + int err = ip_defrag(skb, user); + + if (!err) ip_send_check(ip_hdr(skb)); - return skb; + + return err; } /* @@ -605,9 +569,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, * Currently handles error types - unreachable, quench, ttl exceeded. * (Only used in VS/NAT) */ -static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) +static int ip_vs_out_icmp(struct sk_buff *skb, int *related) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -619,10 +582,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); - if (!skb) + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; - *pskb = skb; } iph = ip_hdr(skb); @@ -690,9 +651,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - if (!ip_vs_make_skb_writable(pskb, offset)) + if (!skb_make_writable(skb, offset)) goto out; - skb = *pskb; ip_vs_nat_icmp(skb, pp, cp, 1); @@ -724,11 +684,10 @@ static inline int is_tcp_reset(const struct sk_buff *skb) * rewrite addresses of the packet and send it on its way... */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -741,11 +700,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_out_icmp(pskb, &related); + int related, verdict = ip_vs_out_icmp(skb, &related); if (related) return verdict; - skb = *pskb; iph = ip_hdr(skb); } @@ -756,11 +714,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, /* reassemble IP fragments */ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); - if (!skb) + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) return NF_STOLEN; iph = ip_hdr(skb); - *pskb = skb; } ihl = iph->ihl << 2; @@ -802,13 +758,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - if (!ip_vs_make_skb_writable(pskb, ihl)) + if (!skb_make_writable(skb, ihl)) goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) goto drop; - skb = *pskb; ip_hdr(skb)->saddr = cp->vaddr; ip_send_check(ip_hdr(skb)); @@ -818,9 +773,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + if (ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; - skb = *pskb; IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); @@ -835,7 +789,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, drop: ip_vs_conn_put(cp); - kfree_skb(*pskb); + kfree_skb(skb); return NF_STOLEN; } @@ -847,9 +801,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, * Currently handles error types - unreachable, quench, ttl exceeded. */ static int -ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) +ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -861,12 +814,9 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - skb = ip_vs_gather_frags(skb, - hooknum == NF_IP_LOCAL_IN ? - IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); - if (!skb) + if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) return NF_STOLEN; - *pskb = skb; } iph = ip_hdr(skb); @@ -945,11 +895,10 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) * and send it on its way... */ static unsigned int -ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -971,11 +920,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); + int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; - skb = *pskb; iph = ip_hdr(skb); } @@ -1056,16 +1004,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, +ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { int r; - if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; - return ip_vs_in_icmp(pskb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, hooknum); } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 344ddbbdc75..59aa166b767 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -30,6 +30,7 @@ #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/netfilter.h> #include <net/protocol.h> #include <net/tcp.h> #include <asm/unaligned.h> @@ -135,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. */ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff **pskb, int *diff) + struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -155,14 +156,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (cp->app_data == &ip_vs_ftp_pasv) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); data = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(*pskb); + data_limit = skb_tail_pointer(skb); if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, @@ -213,7 +214,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, memcpy(start, buf, buf_len); ret = 1; } else { - ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start, + ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, end-start, buf, buf_len); } @@ -238,7 +239,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * the client. */ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff **pskb, int *diff) + struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -256,20 +257,20 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; /* * Detecting whether it is passive */ - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); /* Since there may be OPTIONS in the TCP packet and the HLEN is the length of the header in 32-bit multiples, it is accurate to calculate data address by th+HLEN*4 */ data = data_start = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(*pskb); + data_limit = skb_tail_pointer(skb); while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index e65577a7700..12dc0d640b6 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -20,6 +20,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ +#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -122,27 +123,27 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, static int -tcp_snat_handler(struct sk_buff **pskb, +tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(*pskb); + const unsigned int tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, pskb)) + if (!ip_vs_app_pkt_out(cp, skb)) return 0; } - tcph = (void *)ip_hdr(*pskb) + tcphoff; + tcph = (void *)ip_hdr(skb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ @@ -150,17 +151,15 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, tcphoff, - (*pskb)->len - tcphoff, 0); + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - (*pskb)->len - tcphoff, - cp->protocol, - (*pskb)->csum); + skb->len - tcphoff, + cp->protocol, skb->csum); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -170,30 +169,30 @@ tcp_snat_handler(struct sk_buff **pskb, static int -tcp_dnat_handler(struct sk_buff **pskb, +tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(*pskb); + const unsigned int tcphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn and iph ack_seq stuff */ - if (!ip_vs_app_pkt_in(cp, pskb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } - tcph = (void *)ip_hdr(*pskb) + tcphoff; + tcph = (void *)ip_hdr(skb) + tcphoff; tcph->dest = cp->dport; /* @@ -203,18 +202,16 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, tcphoff, - (*pskb)->len - tcphoff, 0); + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - (*pskb)->len - tcphoff, - cp->protocol, - (*pskb)->csum); - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; + skb->len - tcphoff, + cp->protocol, skb->csum); + skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8ee5fe6a101..1fa7b330b9a 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -18,6 +18,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/kernel.h> +#include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/udp.h> @@ -129,29 +130,29 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, } static int -udp_snat_handler(struct sk_buff **pskb, +udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(*pskb); + const unsigned int udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) + if (!skb_make_writable(skb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, pskb)) + if (!ip_vs_app_pkt_out(cp, skb)) return 0; } - udph = (void *)ip_hdr(*pskb) + udphoff; + udph = (void *)ip_hdr(skb) + udphoff; udph->source = cp->vport; /* @@ -161,17 +162,15 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, udphoff, - (*pskb)->len - udphoff, 0); + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - (*pskb)->len - udphoff, - cp->protocol, - (*pskb)->csum); + skb->len - udphoff, + cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -183,30 +182,30 @@ udp_snat_handler(struct sk_buff **pskb, static int -udp_dnat_handler(struct sk_buff **pskb, +udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(*pskb); + unsigned int udphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) + if (!skb_make_writable(skb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(*pskb, pp)) + if (pp->csum_check && !pp->csum_check(skb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ - if (!ip_vs_app_pkt_in(cp, pskb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } - udph = (void *)ip_hdr(*pskb) + udphoff; + udph = (void *)ip_hdr(skb) + udphoff; udph->dest = cp->dport; /* @@ -216,20 +215,18 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) - (*pskb)->ip_summed = CHECKSUM_NONE; + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - (*pskb)->csum = skb_checksum(*pskb, udphoff, - (*pskb)->len - udphoff, 0); + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - (*pskb)->len - udphoff, - cp->protocol, - (*pskb)->csum); + skb->len - udphoff, + cp->protocol, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; + skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 1960747f354..c99f2a33fb9 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -794,7 +794,7 @@ static int sync_thread(void *startup) add_wait_queue(&sync_wait, &wait); - set_sync_pid(state, current->pid); + set_sync_pid(state, task_pid_nr(current)); complete(tinfo->startup); /* @@ -877,7 +877,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if (!tinfo) return -ENOMEM; - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); + IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", sizeof(struct ip_vs_sync_conn)); @@ -917,7 +917,7 @@ int stop_sync_thread(int state) (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) return -ESRCH; - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); + IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); IP_VS_INFO("stopping sync thread %d ...\n", (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 666e080a74a..d0a92dec105 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -253,7 +253,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) @@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dst = &rt->u.dst; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; ip_hdr(skb)->daddr = cp->daddr; ip_send_check(ip_hdr(skb)); @@ -529,7 +529,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!ip_vs_make_skb_writable(&skb, offset)) + if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index b44192924f9..5539debf497 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -3,14 +3,15 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/ip.h> +#include <linux/skbuff.h> #include <net/route.h> #include <net/xfrm.h> #include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) +int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { - const struct iphdr *iph = ip_hdr(*pskb); + const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; struct dst_entry *odst; @@ -29,14 +30,14 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) if (type == RTN_LOCAL) fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; - fl.mark = (*pskb)->mark; + fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl.mark = skb->mark; if (ip_route_output_key(&rt, &fl) != 0) return -1; /* Drop old route. */ - dst_release((*pskb)->dst); - (*pskb)->dst = &rt->u.dst; + dst_release(skb->dst); + skb->dst = &rt->u.dst; } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -44,8 +45,8 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) if (ip_route_output_key(&rt, &fl) != 0) return -1; - odst = (*pskb)->dst; - if (ip_route_input(*pskb, iph->daddr, iph->saddr, + odst = skb->dst; + if (ip_route_input(skb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->u.dst.dev) != 0) { dst_release(&rt->u.dst); return -1; @@ -54,70 +55,54 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) dst_release(odst); } - if ((*pskb)->dst->error) + if (skb->dst->error) return -1; #ifdef CONFIG_XFRM - if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(*pskb, &fl, AF_INET) == 0) - if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) return -1; #endif /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } + hh_len = skb->dst->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; return 0; } EXPORT_SYMBOL(ip_route_me_harder); #ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff **pskb) +int ip_xfrm_me_harder(struct sk_buff *skb) { struct flowi fl; unsigned int hh_len; struct dst_entry *dst; - if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return 0; - if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + if (xfrm_decode_session(skb, &fl, AF_INET) < 0) return -1; - dst = (*pskb)->dst; + dst = skb->dst; if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) return -1; - dst_release((*pskb)->dst); - (*pskb)->dst = dst; + dst_release(skb->dst); + skb->dst = dst; /* Change in oif may mean change in hh_len. */ - hh_len = (*pskb)->dst->dev->hard_header_len; - if (skb_headroom(*pskb) < hh_len) { - struct sk_buff *nskb; - - nskb = skb_realloc_headroom(*pskb, hh_len); - if (!nskb) - return -1; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } + hh_len = skb->dst->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; return 0; } EXPORT_SYMBOL(ip_xfrm_me_harder); @@ -150,17 +135,17 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info) { const struct ip_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(*pskb); + const struct iphdr *iph = ip_hdr(skb); if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb, RTN_UNSPEC); + return ip_route_me_harder(skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 29114a9ccd1..2909c92ecd9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -197,7 +197,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff **pskb, +static unsigned int arpt_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -215,7 +215,7 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset) return (struct arpt_entry *)(base + offset); } -unsigned int arpt_do_table(struct sk_buff **pskb, +unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -231,9 +231,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + - (2 * (*pskb)->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, (sizeof(struct arphdr) + + (2 * skb->dev->addr_len) + + (2 * sizeof(u32))))) return NF_DROP; indev = in ? in->name : nulldevname; @@ -245,14 +245,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - arp = arp_hdr(*pskb); + arp = arp_hdr(skb); do { - if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { + if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { struct arpt_entry_target *t; int hdr_len; hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * (*pskb)->dev->addr_len); + (2 * skb->dev->addr_len); ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -290,14 +290,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb, /* Targets which reenter must return * abs. verdicts */ - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, t->data); /* Target might have changed stuff. */ - arp = arp_hdr(*pskb); + arp = arp_hdr(skb); if (verdict == ARPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index c4bdab47597..45fa4e20094 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -1,5 +1,6 @@ /* module that allows mangling of the arp payload */ #include <linux/module.h> +#include <linux/netfilter.h> #include <linux/netfilter_arp/arpt_mangle.h> #include <net/sock.h> @@ -8,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -18,47 +19,38 @@ target(struct sk_buff **pskb, unsigned char *arpptr; int pln, hln; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (skb_make_writable(skb, skb->len)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - arp = arp_hdr(*pskb); - arpptr = skb_network_header(*pskb) + sizeof(*arp); + arp = arp_hdr(skb); + arpptr = skb_network_header(skb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(*pskb))) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(*pskb))) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(*pskb))) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(*pskb))) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 75c02306253..302d3da5f69 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -56,12 +56,12 @@ static struct arpt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int arpt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter); + return arpt_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 23cbfc7c80f..10a2ce09fd8 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -335,6 +335,7 @@ static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; + int err; struct iphdr *user_iph = (struct iphdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -347,25 +348,18 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { + printk(KERN_WARNING "ip_queue: error " + "in mangle, dropping packet: %d\n", -err); + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6486894f450..4b10b98640a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -169,7 +169,7 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff **pskb, +ipt_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -312,7 +312,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff **pskb, +ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -331,8 +331,8 @@ ipt_do_table(struct sk_buff **pskb, struct xt_table_info *private; /* Initialization */ - ip = ip_hdr(*pskb); - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -359,7 +359,7 @@ ipt_do_table(struct sk_buff **pskb, struct ipt_entry_target *t; if (IPT_MATCH_ITERATE(e, do_match, - *pskb, in, out, + skb, in, out, offset, &hotdrop) != 0) goto no_match; @@ -371,8 +371,8 @@ ipt_do_table(struct sk_buff **pskb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely((*pskb)->nf_trace)) - trace_packet(*pskb, hook, in, out, + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -410,7 +410,7 @@ ipt_do_table(struct sk_buff **pskb, ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, @@ -428,8 +428,8 @@ ipt_do_table(struct sk_buff **pskb, = 0x57acc001; #endif /* Target might have changed stuff. */ - ip = ip_hdr(*pskb); - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); + datalen = skb->len - ip->ihl * 4; if (verdict == IPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 27f14e1ebd8..2f544dac72d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -289,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -305,7 +305,7 @@ target(struct sk_buff **pskb, * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies @@ -316,7 +316,7 @@ target(struct sk_buff **pskb, /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) return XT_CONTINUE; @@ -325,7 +325,7 @@ target(struct sk_buff **pskb, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ - hash = clusterip_hashfn(*pskb, cipinfo->config); + hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { case IP_CT_NEW: @@ -355,7 +355,7 @@ target(struct sk_buff **pskb, /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - (*pskb)->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; return XT_CONTINUE; } @@ -505,12 +505,12 @@ static void arp_print(struct arp_payload *payload) static unsigned int arp_mangle(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct arphdr *arp = arp_hdr(*pskb); + struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f1253bd3837..add110060a2 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -26,15 +26,15 @@ MODULE_DESCRIPTION("iptables ECN modification module"); /* set ECT codepoint from IP header. * return false if there was an error. */ static inline bool -set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { __u8 oldtos; - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return false; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); @@ -45,14 +45,13 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) /* Return false if there was an error. */ static inline bool -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; __be16 oldval; /* Not enought header? */ - tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), - sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; @@ -62,9 +61,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) tcph->cwr == einfo->proto.tcp.cwr)) return true; - if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) return false; - tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); + tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) @@ -72,13 +71,13 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, oldval, ((__be16 *)tcph)[6], 0); return true; } static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -88,12 +87,12 @@ target(struct sk_buff **pskb, const struct ipt_ECN_info *einfo = targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) - if (!set_ect_ip(pskb, einfo)) + if (!set_ect_ip(skb, einfo)) return NF_DROP; if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && ip_hdr(*pskb)->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo)) + && ip_hdr(skb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(skb, einfo)) return NF_DROP; return XT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 127a5e89bf1..4b5e8216a4e 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -418,7 +418,7 @@ ipt_log_packet(unsigned int pf, } static unsigned int -ipt_log_target(struct sk_buff **pskb, +ipt_log_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, + ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 3e0b562b2db..44b516e7cb7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_check(const char *tablename, } static unsigned int -masquerade_target(struct sk_buff **pskb, +masquerade_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -69,7 +69,7 @@ masquerade_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED @@ -82,7 +82,7 @@ masquerade_target(struct sk_buff **pskb, return NF_ACCEPT; mr = targinfo; - rt = (struct rtable *)(*pskb)->dst; + rt = (struct rtable *)skb->dst; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("MASQUERADE: %s ate my IP address\n", out->name); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 41a011d5a06..f8699291e33 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -43,7 +43,7 @@ check(const char *tablename, } static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -59,14 +59,14 @@ target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) - new_ip = ip_hdr(*pskb)->daddr & ~netmask; + new_ip = ip_hdr(skb)->daddr & ~netmask; else - new_ip = ip_hdr(*pskb)->saddr & ~netmask; + new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; newrange = ((struct nf_nat_range) diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 6ac7a237331..f7cf7d61a2d 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -47,7 +47,7 @@ redirect_check(const char *tablename, } static unsigned int -redirect_target(struct sk_buff **pskb, +redirect_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -63,7 +63,7 @@ redirect_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ @@ -76,7 +76,7 @@ redirect_target(struct sk_buff **pskb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get_rcu((*pskb)->dev); + indev = __in_dev_get_rcu(skb->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index cb038c8fbc9..dcf4d21d511 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -131,7 +131,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) ) addr_type = RTN_LOCAL; - if (ip_route_me_harder(&nskb, addr_type)) + if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; @@ -162,7 +162,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } -static unsigned int reject(struct sk_buff **pskb, +static unsigned int reject(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -173,7 +173,7 @@ static unsigned int reject(struct sk_buff **pskb, /* Our naive response construction doesn't deal with IP options, and probably shouldn't try. */ - if (ip_hdrlen(*pskb) != sizeof(struct iphdr)) + if (ip_hdrlen(skb) != sizeof(struct iphdr)) return NF_DROP; /* WARNING: This code causes reentry within iptables. @@ -181,28 +181,28 @@ static unsigned int reject(struct sk_buff **pskb, must return an absolute verdict. --RR */ switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(*pskb, ICMP_NET_UNREACH); + send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(*pskb, ICMP_HOST_UNREACH); + send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(*pskb, ICMP_PROT_UNREACH); + send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(*pskb, ICMP_PORT_UNREACH); + send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(*pskb, ICMP_NET_ANO); + send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(*pskb, ICMP_HOST_ANO); + send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(*pskb, ICMP_PKT_FILTERED); + send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(*pskb, hooknum); + send_reset(skb, hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 97641f1a97f..8988571436b 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -104,7 +104,7 @@ same_destroy(const struct xt_target *target, void *targinfo) } static unsigned int -same_target(struct sk_buff **pskb, +same_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,7 +121,7 @@ same_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 25f5d0b3906..d4573baa7f2 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -21,7 +21,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables TOS mangling module"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,13 +29,13 @@ target(struct sk_buff **pskb, const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { __u8 oldtos; - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); oldtos = iph->tos; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 2b54e7b0cfe..c620a052766 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("IP tables TTL modification module"); MODULE_LICENSE("GPL"); static unsigned int -ipt_ttl_target(struct sk_buff **pskb, +ipt_ttl_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -29,10 +29,10 @@ ipt_ttl_target(struct sk_buff **pskb, const struct ipt_TTL_info *info = targinfo; int new_ttl; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); switch (info->mode) { case IPT_TTL_SET: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index c636d6d6357..212b830765a 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -279,7 +279,7 @@ alloc_failure: spin_unlock_bh(&ulog_lock); } -static unsigned int ipt_ulog_target(struct sk_buff **pskb, +static unsigned int ipt_ulog_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -288,7 +288,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); + ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); return XT_CONTINUE; } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 4f51c1d7d2d..ba3262c6043 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -62,31 +62,31 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, &packet_filter); } static unsigned int ipt_local_out_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_filter: ignoring short SOCK_RAW " "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 902446f7cbc..b4360a69d5c 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -75,17 +75,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_route_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler); + return ipt_do_table(skb, hook, in, out, &packet_mangler); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -97,8 +97,8 @@ ipt_local_hook(unsigned int hook, u_int32_t mark; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_mangle: ignoring short SOCK_RAW " "packet.\n"); @@ -106,22 +106,22 @@ ipt_local_hook(unsigned int hook, } /* Save things which could affect route */ - mark = (*pskb)->mark; - iph = ip_hdr(*pskb); + mark = skb->mark; + iph = ip_hdr(skb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); + ret = ipt_do_table(skb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || - (*pskb)->mark != mark || + skb->mark != mark || iph->tos != tos) - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(skb, RTN_UNSPEC)) ret = NF_DROP; } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index d6e50339568..5de6e57ac55 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -47,30 +47,30 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, &packet_raw); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_raw: ignoring short SOCK_RAW" "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2fcb9249a8d..831e9b29806 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -63,19 +63,20 @@ static int ipv4_print_conntrack(struct seq_file *s, } /* Returns new sk_buff, or NULL */ -static struct sk_buff * -nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { + int err; + skb_orphan(skb); local_bh_disable(); - skb = ip_defrag(skb, user); + err = ip_defrag(skb, user); local_bh_enable(); - if (skb) + if (!err) ip_send_check(ip_hdr(skb)); - return skb; + return err; } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, @@ -99,17 +100,17 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); + return nf_conntrack_confirm(skb); } static unsigned int ipv4_conntrack_help(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -120,7 +121,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) return NF_ACCEPT; @@ -131,56 +132,55 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; - return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if ((*pskb)->nfct) + if (skb->nfct) return NF_ACCEPT; /* Gather fragments. */ - if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { - *pskb = nf_ct_ipv4_gather_frags(*pskb, - hooknum == NF_IP_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT); - if (!*pskb) + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_IP_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) return NF_STOLEN; } return NF_ACCEPT; } static unsigned int ipv4_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 11fedc73049..adcbaf6d429 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -281,7 +281,6 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[], static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, .procname = "nf_conntrack_icmp_timeout", .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), @@ -295,7 +294,6 @@ static struct ctl_table icmp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table icmp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, .procname = "ip_conntrack_icmp_timeout", .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index bd93a1d7105..35a5aa69cd9 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_amanda"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff **pskb, return NF_DROP; sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo, + ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 7221aa20e6f..56e93f692e8 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -349,7 +349,7 @@ EXPORT_SYMBOL(nf_nat_setup_info); /* Returns true if succeeded. */ static int manip_pkt(u_int16_t proto, - struct sk_buff **pskb, + struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) @@ -357,19 +357,19 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct nf_nat_protocol *p; - if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) return 0; - iph = (void *)(*pskb)->data + iphdroff; + iph = (void *)skb->data + iphdroff; /* Manipulate protcol part. */ /* rcu_read_lock()ed by nf_hook_slow */ p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) + if (!p->manip_pkt(skb, iphdroff, target, maniptype)) return 0; - iph = (void *)(*pskb)->data + iphdroff; + iph = (void *)skb->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); @@ -385,7 +385,7 @@ manip_pkt(u_int16_t proto, unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb) + struct sk_buff *skb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -407,7 +407,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) + if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) return NF_DROP; } return NF_ACCEPT; @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); int nf_nat_icmp_reply_translation(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff **pskb) + struct sk_buff *skb) { struct { struct icmphdr icmp; @@ -426,24 +426,24 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } *inside; struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple inner, target; - int hdrlen = ip_hdrlen(*pskb); + int hdrlen = ip_hdrlen(skb); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); + inside = (void *)skb->data + ip_hdrlen(skb); /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) return 0; /* Must be RELATED */ - NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || - (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || + skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be @@ -458,15 +458,15 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", *pskb, manip, + "dir %s\n", skb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(*pskb, - ip_hdrlen(*pskb) + sizeof(struct icmphdr), - (ip_hdrlen(*pskb) + + if (!nf_ct_get_tuple(skb, + ip_hdrlen(skb) + sizeof(struct icmphdr), + (ip_hdrlen(skb) + sizeof(struct icmphdr) + inside->ip.ihl * 4), (u_int16_t)AF_INET, inside->ip.protocol, @@ -478,19 +478,19 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, pskb, - ip_hdrlen(*pskb) + sizeof(inside->icmp), + if (!manip_pkt(inside->ip.protocol, skb, + ip_hdrlen(skb) + sizeof(inside->icmp), &ct->tuplehash[!dir].tuple, !manip)) return 0; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); + inside = (void *)skb->data + ip_hdrlen(skb); inside->icmp.checksum = 0; inside->icmp.checksum = - csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0)); + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); } /* Change outer to look the reply to an incoming packet @@ -506,7 +506,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (ct->status & statusbit) { nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, pskb, 0, &target, manip)) + if (!manip_pkt(0, skb, 0, &target, manip)) return 0; } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index 3663bd879c3..e1a16d3ea4c 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip_nat_ftp"); /* FIXME: Time out? --RR */ static int -mangle_rfc959_packet(struct sk_buff **pskb, +mangle_rfc959_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -43,13 +43,13 @@ mangle_rfc959_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_eprt_packet(struct sk_buff **pskb, +mangle_eprt_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -63,13 +63,13 @@ mangle_eprt_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_epsv_packet(struct sk_buff **pskb, +mangle_epsv_packet(struct sk_buff *skb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -83,11 +83,11 @@ mangle_epsv_packet(struct sk_buff **pskb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, +static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, unsigned int, unsigned int, struct nf_conn *, enum ip_conntrack_info) = { @@ -99,7 +99,7 @@ static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int nf_nat_ftp(struct sk_buff **pskb, +static unsigned int nf_nat_ftp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -132,7 +132,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { + if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index c1b059a7370..a868c8c4132 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -22,12 +22,12 @@ #include <linux/netfilter/nf_conntrack_h323.h> /****************************************************************************/ -static int set_addr(struct sk_buff **pskb, +static int set_addr(struct sk_buff *skb, unsigned char **data, int dataoff, unsigned int addroff, __be32 ip, __be16 port) { enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo); + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct { __be32 ip; __be16 port; @@ -38,8 +38,8 @@ static int set_addr(struct sk_buff **pskb, buf.port = port; addroff += dataoff; - if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) { - if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -49,14 +49,13 @@ static int set_addr(struct sk_buff **pskb, } /* Relocate data pointer */ - th = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (th == NULL) return -1; - *data = (*pskb)->data + ip_hdrlen(*pskb) + - th->doff * 4 + dataoff; + *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; } else { - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -67,36 +66,35 @@ static int set_addr(struct sk_buff **pskb, /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy * or pull everything in a linear buffer, so we can safely * use the skb pointers now */ - *data = ((*pskb)->data + ip_hdrlen(*pskb) + - sizeof(struct udphdr)); + *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); } return 0; } /****************************************************************************/ -static int set_h225_addr(struct sk_buff **pskb, +static int set_h225_addr(struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(pskb, data, dataoff, taddr->ipAddress.ip, + return set_addr(skb, data, dataoff, taddr->ipAddress.ip, addr->ip, port); } /****************************************************************************/ -static int set_h245_addr(struct sk_buff **pskb, +static int set_h245_addr(struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(pskb, data, dataoff, + return set_addr(skb, data, dataoff, taddr->unicastAddress.iPAddress.network, addr->ip, port); } /****************************************************************************/ -static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, +static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -125,7 +123,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), info->sig_port[!dir]); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, info->sig_port[!dir]); @@ -137,7 +135,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), info->sig_port[!dir]); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, info->sig_port[!dir]); @@ -149,7 +147,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, +static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -168,7 +166,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, NIPQUAD(addr.ip), ntohs(port), NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); - return set_h225_addr(pskb, data, 0, &taddr[i], + return set_h225_addr(skb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. dst.u.udp.port); @@ -179,7 +177,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, @@ -244,7 +242,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(pskb, data, dataoff, taddr, + if (set_h245_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : nated_port)) == 0) { @@ -273,7 +271,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, @@ -301,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(pskb, data, dataoff, taddr, + if (set_h245_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { nf_ct_unexpect_related(exp); @@ -318,7 +316,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -351,7 +349,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(pskb, data, dataoff, taddr, + if (set_h225_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -406,7 +404,7 @@ static void ip_nat_q931_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) @@ -439,7 +437,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(pskb, data, 0, &taddr[idx], + if (set_h225_addr(skb, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -450,7 +448,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(pskb, data, 0, &taddr[0], + set_h225_addr(skb, data, 0, &taddr[0], &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); } @@ -495,7 +493,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, +static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -525,7 +523,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(pskb, data, dataoff, taddr, + if (!set_h225_addr(skb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { nf_ct_unexpect_related(exp); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 93d8a0a8f03..8718da00ef2 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -111,22 +111,14 @@ static void mangle_contents(struct sk_buff *skb, } /* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) +static int enlarge_skb(struct sk_buff *skb, unsigned int extra) { - struct sk_buff *nskb; - - if ((*pskb)->len + extra > 65535) + if (skb->len + extra > 65535) return 0; - nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); - if (!nskb) + if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) return 0; - /* Transfer socket to new skb. */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; return 1; } @@ -139,7 +131,7 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) * * */ int -nf_nat_mangle_tcp_packet(struct sk_buff **pskb, +nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -147,37 +139,37 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; struct tcphdr *tcph; int oldlen, datalen; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(*pskb) && - !enlarge_skb(pskb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) return 0; - SKB_LINEAR_ASSERT(*pskb); + SKB_LINEAR_ASSERT(skb); - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); tcph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, + oldlen = skb->len - iph->ihl*4; + mangle_contents(skb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); - datalen = (*pskb)->len - iph->ihl*4; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + datalen = skb->len - iph->ihl*4; + if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_V4_CSUM) { - (*pskb)->ip_summed = CHECKSUM_PARTIAL; - (*pskb)->csum_start = skb_headroom(*pskb) + - skb_network_offset(*pskb) + - iph->ihl * 4; - (*pskb)->csum_offset = offsetof(struct tcphdr, check); + skb->dev->features & NETIF_F_V4_CSUM) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + iph->ihl * 4; + skb->csum_offset = offsetof(struct tcphdr, check); tcph->check = ~tcp_v4_check(datalen, iph->saddr, iph->daddr, 0); } else { @@ -188,7 +180,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen, 0)); } } else - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(oldlen), htons(datalen), 1); if (rep_len != match_len) { @@ -197,7 +189,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, (int)rep_len - (int)match_len, ct, ctinfo); /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); } return 1; @@ -215,7 +207,7 @@ EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); * should be fairly easy to do. */ int -nf_nat_mangle_udp_packet(struct sk_buff **pskb, +nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -223,48 +215,48 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; struct udphdr *udph; int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ - iph = ip_hdr(*pskb); - if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + + iph = ip_hdr(skb); + if (skb->len < iph->ihl*4 + sizeof(*udph) + match_offset + match_len) return 0; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(*pskb) && - !enlarge_skb(pskb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) return 0; - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); udph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), + oldlen = skb->len - iph->ihl*4; + mangle_contents(skb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - datalen = (*pskb)->len - iph->ihl*4; + datalen = skb->len - iph->ihl*4; udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) return 1; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (*pskb)->dev->features & NETIF_F_V4_CSUM) { - (*pskb)->ip_summed = CHECKSUM_PARTIAL; - (*pskb)->csum_start = skb_headroom(*pskb) + - skb_network_offset(*pskb) + - iph->ihl * 4; - (*pskb)->csum_offset = offsetof(struct udphdr, check); + skb->dev->features & NETIF_F_V4_CSUM) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + iph->ihl * 4; + skb->csum_offset = offsetof(struct udphdr, check); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, 0); @@ -278,7 +270,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb, udph->check = CSUM_MANGLED_0; } } else - nf_proto_csum_replace2(&udph->check, *pskb, + nf_proto_csum_replace2(&udph->check, skb, htons(oldlen), htons(datalen), 1); return 1; @@ -330,7 +322,7 @@ sack_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static inline unsigned int -nf_nat_sack_adjust(struct sk_buff **pskb, +nf_nat_sack_adjust(struct sk_buff *skb, struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -338,17 +330,17 @@ nf_nat_sack_adjust(struct sk_buff **pskb, unsigned int dir, optoff, optend; struct nf_conn_nat *nat = nfct_nat(ct); - optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr); - optend = ip_hdrlen(*pskb) + tcph->doff * 4; + optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); + optend = ip_hdrlen(skb) + tcph->doff * 4; - if (!skb_make_writable(pskb, optend)) + if (!skb_make_writable(skb, optend)) return 0; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { /* Usually: option, length. */ - unsigned char *op = (*pskb)->data + optoff; + unsigned char *op = skb->data + optoff; switch (op[0]) { case TCPOPT_EOL: @@ -365,7 +357,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, if (op[0] == TCPOPT_SACK && op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(*pskb, tcph, optoff+2, + sack_adjust(skb, tcph, optoff+2, optoff+op[1], &nat->seq[!dir]); optoff += op[1]; } @@ -375,7 +367,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb, /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ int -nf_nat_seq_adjust(struct sk_buff **pskb, +nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -390,10 +382,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) return 0; - tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb); + tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else @@ -405,8 +397,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb, else newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); - nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); + nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), @@ -415,10 +407,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) + if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); return 1; } diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index bcf274bba60..766e2c16c6b 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_irc"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -58,7 +58,7 @@ static unsigned int help(struct sk_buff **pskb, pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", buffer, NIPQUAD(ip), port); - ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, + ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 984ec8308b2..e1385a09907 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -110,7 +110,7 @@ static void pptp_nat_expected(struct nf_conn *ct, /* outbound packets == from PNS to PAC */ static int -pptp_outbound_pkt(struct sk_buff **pskb, +pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, cid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_callid), (char *)&new_callid, @@ -213,7 +213,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, /* inbound packets == from PAC to PNS */ static int -pptp_inbound_pkt(struct sk_buff **pskb, +pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -268,7 +268,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, pcid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_pcid), (char *)&new_pcid, diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d562290b182..b820f996035 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -98,21 +98,21 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static int -gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ - if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8)) + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) return 0; - greh = (void *)(*pskb)->data + hdroff; + greh = (void *)skb->data + hdroff; pgreh = (struct gre_hdr_pptp *)greh; /* we only have destination manip of a packet, since 'source key' diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 898d7377115..b9fc724388f 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -52,20 +52,20 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -icmp_manip_pkt(struct sk_buff **pskb, +icmp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return 0; - hdr = (struct icmphdr *)((*pskb)->data + hdroff); - nf_proto_csum_replace2(&hdr->checksum, *pskb, + hdr = (struct icmphdr *)(skb->data + hdroff); + nf_proto_csum_replace2(&hdr->checksum, skb, hdr->un.echo.id, tuple->src.u.icmp.id, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 5bbbb2acdc7..6bab2e18445 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -88,12 +88,12 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -tcp_manip_pkt(struct sk_buff **pskb, +tcp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; @@ -103,14 +103,14 @@ tcp_manip_pkt(struct sk_buff **pskb, /* this could be a inner header returned in icmp packet; in such cases we cannot update the checksum field since it is outside of the 8 bytes of transport layer headers we are guaranteed */ - if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) + if (skb->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(skb, hdroff + hdrsize)) return 0; - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct tcphdr *)((*pskb)->data + hdroff); + iph = (struct iphdr *)(skb->data + iphdroff); + hdr = (struct tcphdr *)(skb->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); + nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); return 1; } diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index a0af4fd9558..cbf1a61e290 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -86,22 +86,22 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -udp_manip_pkt(struct sk_buff **pskb, +udp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; __be16 *portptr, newport; - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return 0; - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct udphdr *)((*pskb)->data + hdroff); + iph = (struct iphdr *)(skb->data + iphdroff); + hdr = (struct udphdr *)(skb->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -116,9 +116,9 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { - nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, + if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index f50d0203f9c..cfd2742e970 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -37,7 +37,7 @@ static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -unknown_manip_pkt(struct sk_buff **pskb, +unknown_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 76ec59ae524..46b25ab5f78 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -65,7 +65,7 @@ static struct xt_table nat_table = { }; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff **pskb, +static unsigned int ipt_snat_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -78,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || @@ -107,7 +107,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff **pskb, +static unsigned int ipt_dnat_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,14 +121,14 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); if (hooknum == NF_IP_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(*pskb)->daddr, + warn_if_extra_mangle(ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], hooknum); @@ -204,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) return nf_nat_setup_info(ct, &range, hooknum); } -int nf_nat_rule_find(struct sk_buff **pskb, +int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, @@ -212,7 +212,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); + ret = ipt_do_table(skb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e14d41976c2..ce9edbcc01e 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -60,7 +60,7 @@ static void addr_map_init(struct nf_conn *ct, struct addr_map *map) } } -static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, +static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, enum sip_header_pos pos, struct addr_map *map) { @@ -84,15 +84,15 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, } else return 1; - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, addr, addrlen)) return 0; - *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); return 1; } -static unsigned int ip_nat_sip(struct sk_buff **pskb, +static unsigned int ip_nat_sip(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) @@ -101,8 +101,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, struct addr_map map; int dataoff, datalen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); - datalen = (*pskb)->len - dataoff; + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); + datalen = skb->len - dataoff; if (datalen < sizeof("SIP/2.0") - 1) return NF_ACCEPT; @@ -121,19 +121,19 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, else pos = POS_REQ_URI; - if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) + if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) return NF_DROP; } - if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || - !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) return NF_DROP; return NF_ACCEPT; } -static unsigned int mangle_sip_packet(struct sk_buff **pskb, +static unsigned int mangle_sip_packet(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, @@ -145,16 +145,16 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb, if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) return 0; - if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, bufflen)) return 0; /* We need to reload this. Thanks Patrick. */ - *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); + *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); return 1; } -static int mangle_content_len(struct sk_buff **pskb, +static int mangle_content_len(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char *dptr) @@ -163,22 +163,22 @@ static int mangle_content_len(struct sk_buff **pskb, char buffer[sizeof("65536")]; int bufflen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); /* Get actual SDP lenght */ - if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, &matchlen, POS_SDP_HEADER) > 0) { /* since ct_sip_get_info() give us a pointer passing 'v=' we need to add 2 bytes in this count. */ - int c_len = (*pskb)->len - dataoff - matchoff + 2; + int c_len = skb->len - dataoff - matchoff + 2; /* Now, update SDP length */ - if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, &matchlen, POS_CONTENT) > 0) { bufflen = sprintf(buffer, "%u", c_len); - return nf_nat_mangle_udp_packet(pskb, ct, ctinfo, + return nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, buffer, bufflen); } @@ -186,7 +186,7 @@ static int mangle_content_len(struct sk_buff **pskb, return 0; } -static unsigned int mangle_sdp(struct sk_buff **pskb, +static unsigned int mangle_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, __be32 newip, u_int16_t port, @@ -195,25 +195,25 @@ static unsigned int mangle_sdp(struct sk_buff **pskb, char buffer[sizeof("nnn.nnn.nnn.nnn")]; unsigned int dataoff, bufflen; - dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_OWNER_IP4)) return 0; - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_CONNECTION_IP4)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, buffer, bufflen, POS_MEDIA)) return 0; - return mangle_content_len(pskb, ctinfo, ct, dptr); + return mangle_content_len(skb, ctinfo, ct, dptr); } static void ip_nat_sdp_expect(struct nf_conn *ct, @@ -241,7 +241,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp(struct sk_buff **pskb, +static unsigned int ip_nat_sdp(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) @@ -277,7 +277,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, if (port == 0) return NF_DROP; - if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { + if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 6bfcd3a90f0..03709d6b4b0 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1188,9 +1188,9 @@ static int snmp_parse_mangle(unsigned char *msg, */ static int snmp_translate(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct sk_buff **pskb) + struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); @@ -1225,13 +1225,13 @@ static int snmp_translate(struct nf_conn *ct, /* We don't actually set up expectations, just adjust internal IP * addresses if this is being NATted */ -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { int dir = CTINFO2DIR(ctinfo); unsigned int ret; - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); /* SNMP replies and originating SNMP traps get mangled */ @@ -1250,7 +1250,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, * enough room for a UDP header. Just verify the UDP length field so we * can mess around with the payload. */ - if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { if (net_ratelimit()) printk(KERN_WARNING "SNMP: dropping malformed packet " "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", @@ -1258,11 +1258,11 @@ static int help(struct sk_buff **pskb, unsigned int protoff, return NF_DROP; } - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, pskb); + ret = snmp_translate(ct, ctinfo, skb); spin_unlock_bh(&snmp_lock); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 46cc99def16..7db76ea9af9 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) static unsigned int nf_nat_fn(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -80,9 +80,9 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that @@ -91,10 +91,10 @@ nf_nat_fn(unsigned int hooknum, /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; - hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) @@ -119,9 +119,9 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: - if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, pskb)) + hooknum, skb)) return NF_DROP; else return NF_ACCEPT; @@ -141,7 +141,7 @@ nf_nat_fn(unsigned int hooknum, /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else - ret = nf_nat_rule_find(pskb, hooknum, in, out, + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { @@ -159,31 +159,31 @@ nf_nat_fn(unsigned int hooknum, ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } - return nf_nat_packet(ct, ctinfo, hooknum, pskb); + return nf_nat_packet(ct, ctinfo, hooknum, skb); } static unsigned int nf_nat_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { unsigned int ret; - __be32 daddr = ip_hdr(*pskb)->daddr; + __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(*pskb)->daddr) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; + daddr != ip_hdr(skb)->daddr) { + dst_release(skb->dst); + skb->dst = NULL; } return ret; } static unsigned int nf_nat_out(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -195,14 +195,14 @@ nf_nat_out(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.u3.ip != @@ -210,7 +210,7 @@ nf_nat_out(unsigned int hooknum, || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all ) - return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; } #endif return ret; @@ -218,7 +218,7 @@ nf_nat_out(unsigned int hooknum, static unsigned int nf_nat_local_fn(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -228,24 +228,24 @@ nf_nat_local_fn(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) || - ip_hdrlen(*pskb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + ret = nf_nat_fn(hooknum, skb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(skb, RTN_UNSPEC)) ret = NF_DROP; } #ifdef CONFIG_XFRM else if (ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(pskb)) + if (ip_xfrm_me_harder(skb)) ret = NF_DROP; #endif } @@ -254,7 +254,7 @@ nf_nat_local_fn(unsigned int hooknum, static unsigned int nf_nat_adjust(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -262,10 +262,10 @@ nf_nat_adjust(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { pr_debug("nf_nat_standalone: adjusting sequence number\n"); - if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) + if (!nf_nat_seq_adjust(skb, ct, ctinfo)) return NF_DROP; } return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 04dfeaefec0..0ecec701cb4 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("TFTP NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_tftp"); -static unsigned int help(struct sk_buff **pskb, +static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e5b05b03910..fd16cb8f8ab 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, - atomic_read(&ip_frag_mem)); + seq_printf(seq, "FRAG: inuse %d memory %d\n", + ip_frag_nqueues(), ip_frag_mem()); return 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index eb286abcf5d..ffddd2b4535 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -19,6 +19,7 @@ #include <net/route.h> #include <net/tcp.h> #include <net/cipso_ipv4.h> +#include <net/inet_frag.h> /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -121,7 +122,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { - if (range[1] <= range[0]) + if (range[1] < range[0]) ret = -EINVAL; else set_local_port_range(range); @@ -149,7 +150,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { - if (range[1] <= range[0]) + if (range[1] < range[0]) ret = -EINVAL; else set_local_port_range(range); @@ -357,7 +358,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", - .data = &sysctl_ipfrag_high_thresh, + .data = &ip4_frags_ctl.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -365,7 +366,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", - .data = &sysctl_ipfrag_low_thresh, + .data = &ip4_frags_ctl.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -381,7 +382,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", - .data = &sysctl_ipfrag_time, + .data = &ip4_frags_ctl.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -732,14 +733,13 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", - .data = &sysctl_ipfrag_secret_interval, + .data = &ip4_frags_ctl.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, { - .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, .procname = "ipfrag_max_dist", .data = &sysctl_ipfrag_max_dist, .maxlen = sizeof(int), @@ -864,7 +864,6 @@ ctl_table ipv4_table[] = { }, #endif /* CONFIG_NETLABEL */ { - .ctl_name = NET_TCP_AVAIL_CONG_CONTROL, .procname = "tcp_available_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0444, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4f322003835..2e6ad6dbba6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1334,7 +1334,7 @@ do_prequeue: if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", - current->comm, current->pid); + current->comm, task_pid_nr(current)); peek_seq = tp->copied_seq; } continue; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 3904d2158a9..2fbcc7d1b1a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -56,3 +56,4 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0a42e934034..9288220b73a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1121,7 +1121,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) struct sk_buff *skb; int flag = 0; int cnt = 0; - u32 new_low_seq = 0; + u32 new_low_seq = tp->snd_nxt; tcp_for_write_queue(skb, sk) { u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; @@ -1153,7 +1153,7 @@ static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } } else { - if (!new_low_seq || before(ack_seq, new_low_seq)) + if (before(ack_seq, new_low_seq)) new_low_seq = ack_seq; cnt += tcp_skb_pcount(skb); } @@ -1242,7 +1242,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; - u32 highest_sack_end_seq = 0; + u32 highest_sack_end_seq = tp->lost_retrans_low; int flag = 0; int found_dup_sack = 0; int cached_fack_count; @@ -1995,8 +1995,7 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, } /* Mark head of queue up as lost. */ -static void tcp_mark_head_lost(struct sock *sk, - int packets, u32 high_seq) +static void tcp_mark_head_lost(struct sock *sk, int packets) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -2019,7 +2018,7 @@ static void tcp_mark_head_lost(struct sock *sk, tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; cnt += tcp_skb_pcount(skb); - if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -2040,9 +2039,9 @@ static void tcp_update_scoreboard(struct sock *sk) int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, lost, tp->high_seq); + tcp_mark_head_lost(sk, lost); } else { - tcp_mark_head_lost(sk, 1, tp->high_seq); + tcp_mark_head_lost(sk, 1); } /* New heuristics: it is possible only after we switched @@ -2381,7 +2380,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq); + tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cb9fc58efb2..35d2b0e9e10 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -147,13 +147,14 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, write_lock_bh(&udp_hash_lock); if (!snum) { - int i, low, high; + int i, low, high, remaining; unsigned rover, best, best_size_so_far; inet_get_local_port_range(&low, &high); + remaining = (high - low) + 1; best_size_so_far = UINT_MAX; - best = rover = net_random() % (high - low) + low; + best = rover = net_random() % remaining + low; /* 1st pass: look for empty (or shortest) hash chain */ for (i = 0; i < UDP_HTABLE_SIZE; i++) { diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index e9bbfde19ac..5e95c8a07ef 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -16,19 +16,6 @@ #include <net/ip.h> #include <net/xfrm.h> -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) -{ - switch (nexthdr) { - case IPPROTO_IPIP: - case IPPROTO_IPV6: - *spi = ip_hdr(skb)->saddr; - *seq = 0; - return 0; - } - - return xfrm_parse_spi(skb, nexthdr, spi, seq); -} - #ifdef CONFIG_NETFILTER static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) { @@ -46,28 +33,29 @@ drop: } #endif -static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) +int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { - __be32 spi, seq; + int err; + __be32 seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); unsigned int nhoff = offsetof(struct iphdr, protocol); - if (err != 0) + seq = 0; + if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) goto drop; do { const struct iphdr *iph = ip_hdr(skb); - int nexthdr; if (xfrm_nr == XFRM_MAX_DEPTH) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - iph->protocol != IPPROTO_IPV6 ? iph->protocol : IPPROTO_IPIP, AF_INET); + nexthdr, AF_INET); if (x == NULL) goto drop; @@ -103,15 +91,15 @@ static int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } - err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq); + err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) goto drop; } while (!err); @@ -165,6 +153,7 @@ drop: kfree_skb(skb); return 0; } +EXPORT_SYMBOL(xfrm4_rcv_encap); /* If it's a keepalive packet, then just eat it. * If it's an encapsulated packet, then pass it to the @@ -252,11 +241,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, len); skb_reset_transport_header(skb); - /* modify the protocol (it's ESP!) */ - iph->protocol = IPPROTO_ESP; - /* process ESP */ - ret = xfrm4_rcv_encap(skb, encap_type); + ret = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); return ret; drop: @@ -266,7 +252,7 @@ drop: int xfrm4_rcv(struct sk_buff *skb) { - return xfrm4_rcv_encap(skb, 0); + return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); } EXPORT_SYMBOL(xfrm4_rcv); diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 73d2338bec5..e42e122414b 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -114,6 +114,7 @@ static struct xfrm_mode xfrm4_beet_mode = { .output = xfrm4_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_beet_init(void) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 1ae9d32276f..e4deecba6dd 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -139,6 +139,7 @@ static struct xfrm_mode xfrm4_tunnel_mode = { .output = xfrm4_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm4_tunnel_init(void) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 434ef302ba8..c4a7156962b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -47,7 +47,7 @@ static inline int xfrm4_output_one(struct sk_buff *skb) struct iphdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -78,7 +78,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -86,7 +86,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm4_output_finish2); if (unlikely(err != 1)) break; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 329825ca68f..cc86fb110dd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch (encap_family) { case AF_INET: @@ -151,7 +151,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -169,27 +168,17 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv6 module can be unloaded, we should manage reference - * to xfrm6_output in afinfo->output. Miyazawa - * */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - err = -EAFNOSUPPORT; - goto error; - } - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); - if (dst_prev->xfrm->props.family == AF_INET && rt->peer) - atomic_inc(&rt->peer->refcnt); - x->u.rt.peer = rt->peer; + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; + if (rt0->peer) + atomic_inc(&rt0->peer->refcnt); + x->u.rt.peer = rt0->peer; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); - x->u.rt.rt_type = rt->rt_type; + x->u.rt.rt_type = rt0->rt_type; x->u.rt.rt_src = rt0->rt_src; x->u.rt.rt_dst = rt0->rt_dst; - x->u.rt.rt_gateway = rt->rt_gateway; + x->u.rt.rt_gateway = rt0->rt_gateway; x->u.rt.rt_spec_dst = rt0->rt_spec_dst; x->u.rt.idev = rt0->idev; in_dev_hold(rt0->idev); @@ -291,7 +280,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); - if (dst->xfrm && dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer)) + if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 93e2c061cdd..13d54a1c333 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -49,6 +49,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, + .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .output = xfrm4_output, diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 1312417608e..32684519562 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -18,7 +18,7 @@ static int ipip_output(struct xfrm_state *x, struct sk_buff *skb) static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) { - return IPPROTO_IP; + return ip_hdr(skb)->protocol; } static int ipip_init_state(struct xfrm_state *x) @@ -48,20 +48,25 @@ static struct xfrm_type ipip_type = { .output = ipip_output }; +static int xfrm_tunnel_rcv(struct sk_buff *skb) +{ + return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); +} + static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) { return -ENOENT; } static struct xfrm_tunnel xfrm_tunnel_handler = { - .handler = xfrm4_rcv, + .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 2, }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static struct xfrm_tunnel xfrm64_tunnel_handler = { - .handler = xfrm4_rcv, + .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, .priority = 2, }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 52d10d21321..348bd8d0611 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -255,11 +255,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { - int err = -ENOMEM; - - if (!idev || !idev->dev) - return -EINVAL; - if (snmp_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) @@ -280,15 +275,14 @@ err_icmpmsg: err_icmp: snmp_mib_free((void **)idev->stats.ipv6); err_ip: - return err; + return -ENOMEM; } -static int snmp6_free_dev(struct inet6_dev *idev) +static void snmp6_free_dev(struct inet6_dev *idev) { snmp_mib_free((void **)idev->stats.icmpv6msg); snmp_mib_free((void **)idev->stats.icmpv6); snmp_mib_free((void **)idev->stats.ipv6); - return 0; } /* Nobody refers to this device, we may destroy it. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bc929381fa4..1b1caf3aa1c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -747,6 +747,7 @@ static void cleanup_ipv6_mibs(void) { snmp_mib_free((void **)ipv6_statistics); snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); snmp_mib_free((void **)udplite_stats_in6); } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f9f68916269..66a9139d46e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -344,6 +344,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto out; + skb->ip_summed = CHECKSUM_NONE; + hdr_len = skb->data - skb_network_header(skb); ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; @@ -475,8 +477,16 @@ static int ah6_init_state(struct xfrm_state *x) x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + break; + default: + goto error; + } x->data = ahp; return 0; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9eb92859835..72a659806ca 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -354,8 +354,16 @@ static int esp6_init_state(struct xfrm_state *x) (x->ealg->alg_key_len + 7) / 8)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + break; + default: + goto error; + } x->data = esp; return 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index c82d4d49f71..1e89efd38a0 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv); struct tlvtype_proc { int type; - int (*func)(struct sk_buff **skbp, int offset); + int (*func)(struct sk_buff *skb, int offset); }; /********************* @@ -111,10 +111,8 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; - switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -139,9 +137,8 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); @@ -172,13 +169,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skbp, off) == 0) + if (curr->func(skb, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skbp, off) == 0) + if (ip6_tlvopt_unknown(skb, off) == 0) return 0; } break; @@ -198,9 +195,8 @@ bad: *****************************/ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) +static int ipv6_dest_hao(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -234,22 +230,13 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) goto discard; if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - struct inet6_skb_parm *opt2; - - if (skb2 == NULL) + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto discard; - opt2 = IP6CB(skb2); - memcpy(opt2, opt, sizeof(*opt2)); - - kfree_skb(skb); - /* update all variable using below by copied skbuff */ - *skbp = skb = skb2; - hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) + + hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); - ipv6h = ipv6_hdr(skb2); + ipv6h = ipv6_hdr(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -280,9 +267,8 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp) +static int ipv6_destopt_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; @@ -304,9 +290,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) #endif dst = dst_clone(skb->dst); - if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { dst_release(dst); - skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -337,10 +322,8 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp) +static int ipv6_nodata_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - kfree_skb(skb); return 0; } @@ -360,9 +343,8 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp) +static int ipv6_rthdr_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; struct in6_addr daddr; @@ -464,18 +446,14 @@ looped_back: Do not damage packets queued somewhere. */ if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); /* the copy is a forwarded packet */ - if (skb2 == NULL) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } - kfree_skb(skb); - *skbp = skb = skb2; - opt = IP6CB(skb2); - hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2); + hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -578,9 +556,8 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) +static int ipv6_hop_ra(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { @@ -595,9 +572,8 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) +static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); u32 pkt_len; @@ -648,9 +624,8 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff **skbp) +int ipv6_parse_hopopts(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -667,8 +642,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { - skb = *skbp; + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 47b8ce232e8..9bb031fa1c2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(icmpv6msg_statistics); static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb); +static int icmpv6_rcv(struct sk_buff *skb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -614,9 +614,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb) +static int icmpv6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct in6_addr *saddr, *daddr; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 25b93170974..78de42ada84 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -146,7 +146,7 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, __ip6_dst_store(sk, dst, daddr, saddr); #ifdef CONFIG_XFRM - if (dst) { + { struct rt6_info *rt = (struct rt6_info *)dst; rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1c2c2765543..d6f1026f194 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -261,7 +261,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high); - remaining = high - low; + remaining = (high - low) + 1; local_bh_disable(); for (i = 1; i <= remaining; i++) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 217d60f9fc8..b12cc22e774 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -154,8 +154,10 @@ static void ip6_fl_gc(unsigned long dummy) write_unlock(&ip6_fl_lock); } -static int fl_intern(struct ip6_flowlabel *fl, __be32 label) +static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) { + struct ip6_flowlabel *lfl; + fl->label = label & IPV6_FLOWLABEL_MASK; write_lock_bh(&ip6_fl_lock); @@ -163,12 +165,26 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; if (fl->label) { - struct ip6_flowlabel *lfl; lfl = __fl_lookup(fl->label); if (lfl == NULL) break; } } + } else { + /* + * we dropper the ip6_fl_lock, so this entry could reappear + * and we need to recheck with it. + * + * OTOH no need to search the active socket first, like it is + * done in ipv6_flowlabel_opt - sock is locked, so new entry + * with the same label can only appear on another sock + */ + lfl = __fl_lookup(fl->label); + if (lfl != NULL) { + atomic_inc(&lfl->users); + write_unlock_bh(&ip6_fl_lock); + return lfl; + } } fl->lastuse = jiffies; @@ -176,7 +192,7 @@ static int fl_intern(struct ip6_flowlabel *fl, __be32 label) fl_ht[FL_HASH(fl->label)] = fl; atomic_inc(&fl_size); write_unlock_bh(&ip6_fl_lock); - return 0; + return NULL; } @@ -190,14 +206,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) label &= IPV6_FLOWLABEL_MASK; + read_lock_bh(&ip6_sk_fl_lock); for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label) { fl->lastuse = jiffies; atomic_inc(&fl->users); + read_unlock_bh(&ip6_sk_fl_lock); return fl; } } + read_unlock_bh(&ip6_sk_fl_lock); return NULL; } @@ -409,6 +428,16 @@ static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) return 0; } +static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, + struct ip6_flowlabel *fl) +{ + write_lock_bh(&ip6_sk_fl_lock); + sfl->fl = fl; + sfl->next = np->ipv6_fl_list; + np->ipv6_fl_list = sfl; + write_unlock_bh(&ip6_sk_fl_lock); +} + int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { int err; @@ -416,7 +445,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; struct ipv6_fl_socklist *sfl, **sflp; - struct ip6_flowlabel *fl; + struct ip6_flowlabel *fl, *fl1 = NULL; + if (optlen < sizeof(freq)) return -EINVAL; @@ -472,8 +502,6 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); if (freq.flr_label) { - struct ip6_flowlabel *fl1 = NULL; - err = -EEXIST; read_lock_bh(&ip6_sk_fl_lock); for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) { @@ -492,6 +520,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (fl1 == NULL) fl1 = fl_lookup(freq.flr_label); if (fl1) { +recheck: err = -EEXIST; if (freq.flr_flags&IPV6_FL_F_EXCL) goto release; @@ -513,11 +542,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; - write_lock_bh(&ip6_sk_fl_lock); - sfl1->fl = fl1; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; - write_unlock_bh(&ip6_sk_fl_lock); + fl_link(np, sfl1, fl1); fl_free(fl); return 0; @@ -534,9 +559,9 @@ release: if (sfl1 == NULL || (err = mem_check(sk)) != 0) goto done; - err = fl_intern(fl, freq.flr_label); - if (err) - goto done; + fl1 = fl_intern(fl, freq.flr_label); + if (fl1 != NULL) + goto recheck; if (!freq.flr_label) { if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, @@ -545,9 +570,7 @@ release: } } - sfl1->fl = fl; - sfl1->next = np->ipv6_fl_list; - np->ipv6_fl_list = sfl1; + fl_link(np, sfl1, fl); return 0; default: diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9149fc23975..fac6f7f9dd7 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -125,7 +125,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(&skb) < 0) { + if (ipv6_parse_hopopts(skb) < 0) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; @@ -149,7 +149,7 @@ out: */ -static inline int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; struct sock *raw_sk; @@ -199,7 +199,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb); + ret = ipprot->handler(skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 011082ed921..13565dfb1b4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,7 +70,7 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f spin_unlock_bh(&ip6_id_lock); } -static inline int ip6_output_finish(struct sk_buff *skb) +static int ip6_output_finish(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 28fc8edfdc3..80ef2a1d39f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -411,8 +411,15 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode == XFRM_MODE_TUNNEL) + switch (x->props.mode) { + case XFRM_MODE_BEET: + case XFRM_MODE_TRANSPORT: + break; + case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + default: + goto error; + } mutex_lock(&ipcomp6_resource_mutex); if (!ipcomp6_alloc_scratches()) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6cc33dc83d1..20cfc90d559 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1658,30 +1658,26 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f struct inet6_dev *idev; int ret; - if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME) + if ((strcmp(ctl->procname, "retrans_time") == 0) || + (strcmp(ctl->procname, "base_reachable_time") == 0)) ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); - switch (ctl->ctl_name) { - case NET_NEIGH_RETRANS_TIME: + if (strcmp(ctl->procname, "retrans_time") == 0) ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - break; - case NET_NEIGH_REACHABLE_TIME: + + else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); - break; - case NET_NEIGH_RETRANS_TIME_MS: - case NET_NEIGH_REACHABLE_TIME_MS: + + else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || + (strcmp(ctl->procname, "base_reacable_time_ms") == 0)) ret = proc_dointvec_ms_jiffies(ctl, write, filp, buffer, lenp, ppos); - break; - default: + else ret = -1; - } if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS) + if (ctl->data == &idev->nd_parms->base_reachable_time) idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); idev->tstamp = jiffies; inet6_ifinfo_notify(RTM_NEWLINK, idev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 38b14961391..b1326c2bf8a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -68,15 +68,15 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info) +static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) { struct ip6_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP6_LOCAL_OUT) { - struct ipv6hdr *iph = ipv6_hdr(*pskb); + struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) - return ip6_route_me_harder(*pskb); + return ip6_route_me_harder(skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 0473145ac53..6413a30d9f6 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -332,6 +332,7 @@ static int ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; + int err; struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -344,25 +345,18 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { printk(KERN_WARNING "ip6_queue: OOM " "in mangle, dropping packet\n"); - return -ENOMEM; + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cd9df02bb85..acaba153793 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -205,7 +205,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff **pskb, +ip6t_error(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -350,7 +350,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff **pskb, +ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -389,17 +389,17 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, + if (ip6_packet_match(skb, indev, outdev, &e->ipv6, &protoff, &offset, &hotdrop)) { struct ip6t_entry_target *t; if (IP6T_MATCH_ITERATE(e, do_match, - *pskb, in, out, + skb, in, out, offset, protoff, &hotdrop) != 0) goto no_match; ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(*pskb)->payload_len) + ntohs(ipv6_hdr(skb)->payload_len) + IPV6_HDR_LEN, 1); @@ -409,8 +409,8 @@ ip6t_do_table(struct sk_buff **pskb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely((*pskb)->nf_trace)) - trace_packet(*pskb, hook, in, out, + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -448,7 +448,7 @@ ip6t_do_table(struct sk_buff **pskb, ((struct ip6t_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(pskb, + verdict = t->u.kernel.target->target(skb, in, out, hook, t->u.kernel.target, diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index ad4d94310b8..9afc836fd45 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -18,7 +18,7 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); -static unsigned int ip6t_hl_target(struct sk_buff **pskb, +static unsigned int ip6t_hl_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,10 +29,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct ip6t_HL_info *info = targinfo; int new_hl; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; - ip6h = ipv6_hdr(*pskb); + ip6h = ipv6_hdr(skb); switch (info->mode) { case IP6T_HL_SET: diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 6ab99001dcc..7a48c342df4 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -431,7 +431,7 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -ip6t_log_target(struct sk_buff **pskb, +ip6t_log_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -445,8 +445,7 @@ ip6t_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, - loginfo->prefix); + ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 3fd08d5567a..1a7d2917545 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -172,7 +172,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } -static unsigned int reject6_target(struct sk_buff **pskb, +static unsigned int reject6_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -187,25 +187,25 @@ static unsigned int reject6_target(struct sk_buff **pskb, must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); + send_unreach(skb, ICMPV6_NOROUTE, hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); + send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); + send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); + send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); + send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(*pskb); + send_reset(skb); break; default: if (net_ratelimit()) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 7e32e2aaf7f..1d26b202bf3 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,32 +60,32 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, &packet_filter); } static unsigned int ip6t_local_out_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { #if 0 /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index f0a9efa67fb..a0b6381f1e8 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -68,17 +68,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_route_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler); + return ip6t_do_table(skb, hook, in, out, &packet_mangler); } static unsigned int ip6t_local_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -91,8 +91,8 @@ ip6t_local_hook(unsigned int hook, #if 0 /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; @@ -100,22 +100,22 @@ ip6t_local_hook(unsigned int hook, #endif /* save source/dest address, mark, hoplimit, flowlabel, priority, */ - memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr)); - mark = (*pskb)->mark; - hop_limit = ipv6_hdr(*pskb)->hop_limit; + memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); + mark = skb->mark; + hop_limit = ipv6_hdr(skb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u_int32_t *)ipv6_hdr(*pskb)); + flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); + ret = ip6t_do_table(skb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN - && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr)) - || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr)) - || (*pskb)->mark != mark - || ipv6_hdr(*pskb)->hop_limit != hop_limit)) - return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; + && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) + || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) + || skb->mark != mark + || ipv6_hdr(skb)->hop_limit != hop_limit)) + return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; return ret; } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ec290e4ebdd..8f7109f991e 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -46,12 +46,12 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw); + return ip6t_do_table(skb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 37a3db92695..ad74bab0504 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -18,6 +18,7 @@ #include <linux/icmp.h> #include <linux/sysctl.h> #include <net/ipv6.h> +#include <net/inet_frag.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_conntrack.h> @@ -145,7 +146,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -155,12 +156,12 @@ static unsigned int ipv6_confirm(unsigned int hooknum, struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; - unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; + unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) goto out; @@ -172,23 +173,23 @@ static unsigned int ipv6_confirm(unsigned int hooknum, if (!helper) goto out; - protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, - (*pskb)->len - extoff); - if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { + protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, + skb->len - extoff); + if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { pr_debug("proto header not found\n"); return NF_ACCEPT; } - ret = helper->help(pskb, protoff, ct, ctinfo); + ret = helper->help(skb, protoff, ct, ctinfo); if (ret != NF_ACCEPT) return ret; out: /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); + return nf_conntrack_confirm(skb); } static unsigned int ipv6_defrag(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -196,17 +197,17 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if ((*pskb)->nfct) + if (skb->nfct) return NF_ACCEPT; - reasm = nf_ct_frag6_gather(*pskb); + reasm = nf_ct_frag6_gather(skb); /* queued */ if (reasm == NULL) return NF_STOLEN; /* error occured or not fragmented */ - if (reasm == *pskb) + if (reasm == skb) return NF_ACCEPT; nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, @@ -216,12 +217,12 @@ static unsigned int ipv6_defrag(unsigned int hooknum, } static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *reasm = (*pskb)->nfct_reasm; + struct sk_buff *reasm = skb->nfct_reasm; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -229,32 +230,32 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); + ret = nf_conntrack_in(PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } nf_conntrack_get(reasm->nfct); - (*pskb)->nfct = reasm->nfct; - (*pskb)->nfctinfo = reasm->nfctinfo; + skb->nfct = reasm->nfct; + skb->nfctinfo = reasm->nfctinfo; return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, pskb); + return nf_conntrack_in(PF_INET6, hooknum, skb); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (skb->len < sizeof(struct ipv6hdr)) { if (net_ratelimit()) printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); + return ipv6_conntrack_in(hooknum, skb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] = { @@ -305,9 +306,8 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { #ifdef CONFIG_SYSCTL static ctl_table nf_ct_ipv6_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, .procname = "nf_conntrack_frag6_timeout", - .data = &nf_ct_frag6_timeout, + .data = &nf_frags_ctl.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -315,7 +315,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_ct_frag6_low_thresh, + .data = &nf_frags_ctl.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -323,7 +323,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_ct_frag6_high_thresh, + .data = &nf_frags_ctl.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index fbdc66920de..fd9123f3dc0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -260,7 +260,6 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, .procname = "nf_conntrack_icmpv6_timeout", .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 25442a8c1ba..e170c67c47a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -31,6 +31,7 @@ #include <net/sock.h> #include <net/snmp.h> +#include <net/inet_frag.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -48,10 +49,6 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; -unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; -unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; - struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; @@ -63,51 +60,24 @@ struct nf_ct_frag6_skb_cb struct nf_ct_frag6_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; - ktime_t stamp; unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 __u16 nhoffset; }; -/* Hash table. */ - -#define FRAG6Q_HASHSZ 64 - -static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ]; -static DEFINE_RWLOCK(nf_ct_frag6_lock); -static u32 nf_ct_frag6_hash_rnd; -static LIST_HEAD(nf_ct_frag6_lru_list); -int nf_ct_frag6_nqueues = 0; - -static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) -{ - hlist_del(&fq->list); - list_del(&fq->lru_list); - nf_ct_frag6_nqueues--; -} +struct inet_frags_ctl nf_frags_ctl __read_mostly = { + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + .timeout = IPV6_FRAG_TIMEOUT, + .secret_interval = 10 * 60 * HZ, +}; -static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) -{ - write_lock(&nf_ct_frag6_lock); - __fq_unlink(fq); - write_unlock(&nf_ct_frag6_lock); -} +static struct inet_frags nf_frags; static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) @@ -120,7 +90,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += nf_ct_frag6_hash_rnd; + c += nf_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -133,100 +103,38 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (FRAG6Q_HASHSZ - 1); + return c & (INETFRAGS_HASHSZ - 1); } -static struct timer_list nf_ct_frag6_secret_timer; -int nf_ct_frag6_secret_interval = 10 * 60 * HZ; - -static void nf_ct_frag6_secret_rebuild(unsigned long dummy) +static unsigned int nf_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&nf_ct_frag6_lock); - get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); - for (i = 0; i < FRAG6Q_HASHSZ; i++) { - struct nf_ct_frag6_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - if (hval != i) { - hlist_del(&q->list); - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &nf_ct_frag6_hash[hval]); - } - } - } - write_unlock(&nf_ct_frag6_lock); + struct nf_ct_frag6_queue *nq; - mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); + nq = container_of(q, struct nf_ct_frag6_queue, q); + return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); } -atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); - -/* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) +static void nf_skb_free(struct sk_buff *skb) { - if (work) - *work -= skb->truesize; - atomic_sub(skb->truesize, &nf_ct_frag6_mem); if (NFCT_FRAG6_CB(skb)->orig) kfree_skb(NFCT_FRAG6_CB(skb)->orig); - - kfree_skb(skb); } -static inline void frag_free_queue(struct nf_ct_frag6_queue *fq, - unsigned int *work) +/* Memory Tracking Functions. */ +static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { if (work) - *work -= sizeof(struct nf_ct_frag6_queue); - atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); - kfree(fq); -} - -static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) -{ - struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - - if (!fq) - return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); - return fq; + *work -= skb->truesize; + atomic_sub(skb->truesize, &nf_frags.mem); + nf_skb_free(skb); + kfree_skb(skb); } /* Destruction primitives. */ -/* Complete destruction of fq. */ -static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, - unsigned int *work) +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) { - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} - -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) -{ - if (atomic_dec_and_test(&fq->refcnt)) - nf_ct_frag6_destroy(fq, work); + inet_frag_put(&fq->q, &nf_frags); } /* Kill fq entry. It is not destroyed immediately, @@ -234,151 +142,58 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) */ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + inet_frag_kill(&fq->q, &nf_frags); } static void nf_ct_frag6_evictor(void) { - struct nf_ct_frag6_queue *fq; - struct list_head *tmp; - unsigned int work; - - work = atomic_read(&nf_ct_frag6_mem); - if (work <= nf_ct_frag6_low_thresh) - return; - - work -= nf_ct_frag6_low_thresh; - while (work > 0) { - read_lock(&nf_ct_frag6_lock); - if (list_empty(&nf_ct_frag6_lru_list)) { - read_unlock(&nf_ct_frag6_lock); - return; - } - tmp = nf_ct_frag6_lru_list.next; - BUG_ON(tmp == NULL); - fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&nf_ct_frag6_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - } + inet_frag_evictor(&nf_frags); } static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + struct nf_ct_frag6_queue *fq; + + fq = container_of((struct inet_frag_queue *)data, + struct nf_ct_frag6_queue, q); - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto out; fq_kill(fq); out: - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); } /* Creation primitives. */ -static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, - struct nf_ct_frag6_queue *fq_in) +static __inline__ struct nf_ct_frag6_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) { - struct nf_ct_frag6_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&nf_ct_frag6_lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&nf_ct_frag6_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) - atomic_inc(&fq->refcnt); - - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); - nf_ct_frag6_nqueues++; - write_unlock(&nf_ct_frag6_lock); - return fq; -} + struct inet_frag_queue *q; + struct ip6_create_arg arg; + unsigned int hash; + arg.id = id; + arg.src = src; + arg.dst = dst; + hash = ip6qhashfn(id, src, dst); -static struct nf_ct_frag6_queue * -nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - - if ((fq = frag_alloc_queue()) == NULL) { - pr_debug("Can't alloc new queue\n"); + q = inet_frag_find(&nf_frags, &arg, hash); + if (q == NULL) goto oom; - } - - memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); - - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); - - return nf_ct_frag6_intern(hash, fq); + return container_of(q, struct nf_ct_frag6_queue, q); oom: + pr_debug("Can't alloc new queue\n"); return NULL; } -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) -{ - struct nf_ct_frag6_queue *fq; - struct hlist_node *n; - unsigned int hash = ip6qhashfn(id, src, dst); - - read_lock(&nf_ct_frag6_lock); - hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&nf_ct_frag6_lock); - return fq; - } - } - read_unlock(&nf_ct_frag6_lock); - - return nf_ct_frag6_create(hash, id, src, dst); -} - static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) @@ -386,7 +201,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; int offset, end; - if (fq->last_in & COMPLETE) { + if (fq->q.last_in & COMPLETE) { pr_debug("Allready completed\n"); goto err; } @@ -412,13 +227,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) { + if (end < fq->q.len || + ((fq->q.last_in & LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -430,13 +245,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, pr_debug("end of fragment not rounded to 8 bytes.\n"); return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) { + if (fq->q.last_in & LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } - fq->len = end; + fq->q.len = end; } } @@ -458,7 +273,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for (next = fq->fragments; next != NULL; next = next->next) { + for (next = fq->q.fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -503,7 +318,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* next fragment */ NFCT_FRAG6_CB(next)->offset += i; - fq->meat -= i; + fq->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -518,9 +333,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->fragments = next; + fq->q.fragments = next; - fq->meat -= free_it->len; + fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -532,23 +347,23 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; skb->dev = NULL; - fq->stamp = skb->tstamp; - fq->meat += skb->len; - atomic_add(skb->truesize, &nf_ct_frag6_mem); + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + atomic_add(skb->truesize, &nf_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= FIRST_IN; } - write_lock(&nf_ct_frag6_lock); - list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); - write_unlock(&nf_ct_frag6_lock); + write_lock(&nf_frags.lock); + list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); + write_unlock(&nf_frags.lock); return 0; err: @@ -567,7 +382,7 @@ err: static struct sk_buff * nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->fragments; + struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; fq_kill(fq); @@ -577,7 +392,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->len - + sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { pr_debug("payload len is too large.\n"); @@ -614,7 +429,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_ct_frag6_mem); + atomic_add(clone->truesize, &nf_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -628,7 +443,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_ct_frag6_mem); + atomic_sub(head->truesize, &nf_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -638,12 +453,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_ct_frag6_mem); + atomic_sub(fp->truesize, &nf_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->stamp; + head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ @@ -652,7 +467,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_network_header_len(head), head->csum); - fq->fragments = NULL; + fq->q.fragments = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; @@ -788,7 +603,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) + if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); @@ -797,23 +612,23 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->lock); + spin_unlock(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq, NULL); + fq_put(fq); goto ret_orig; } - if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { + if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->lock); + spin_unlock(&fq->q.lock); - fq_put(fq, NULL); + fq_put(fq); return ret_skb; ret_orig: @@ -859,20 +674,23 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb) int nf_ct_frag6_init(void) { - nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0); - nf_ct_frag6_secret_timer.expires = jiffies - + nf_ct_frag6_secret_interval; - add_timer(&nf_ct_frag6_secret_timer); + nf_frags.ctl = &nf_frags_ctl; + nf_frags.hashfn = nf_hashfn; + nf_frags.constructor = ip6_frag_init; + nf_frags.destructor = NULL; + nf_frags.skb_free = nf_skb_free; + nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.match = ip6_frag_match; + nf_frags.frag_expire = nf_ct_frag6_expire; + inet_frags_init(&nf_frags); return 0; } void nf_ct_frag6_cleanup(void) { - del_timer(&nf_ct_frag6_secret_timer); - nf_ct_frag6_low_thresh = 0; + inet_frags_fini(&nf_frags); + + nf_frags_ctl.low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index db945018579..be526ad9254 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -54,7 +54,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues, atomic_read(&ip6_frag_mem)); + ip6_frag_nqueues(), ip6_frag_mem()); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 31601c99354..76c88a93b9b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -42,6 +42,7 @@ #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/skbuff.h> #include <net/sock.h> #include <net/snmp.h> @@ -53,11 +54,7 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> - -int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; -int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; - -int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; +#include <net/inet_frag.h> struct ip6frag_skb_cb { @@ -74,53 +71,39 @@ struct ip6frag_skb_cb struct frag_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ + struct inet_frag_queue q; __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; int iif; - ktime_t stamp; unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 __u16 nhoffset; }; -/* Hash table. */ - -#define IP6Q_HASHSZ 64 +struct inet_frags_ctl ip6_frags_ctl __read_mostly = { + .high_thresh = 256 * 1024, + .low_thresh = 192 * 1024, + .timeout = IPV6_FRAG_TIMEOUT, + .secret_interval = 10 * 60 * HZ, +}; -static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ]; -static DEFINE_RWLOCK(ip6_frag_lock); -static u32 ip6_frag_hash_rnd; -static LIST_HEAD(ip6_frag_lru_list); -int ip6_frag_nqueues = 0; +static struct inet_frags ip6_frags; -static __inline__ void __fq_unlink(struct frag_queue *fq) +int ip6_frag_nqueues(void) { - hlist_del(&fq->list); - list_del(&fq->lru_list); - ip6_frag_nqueues--; + return ip6_frags.nqueues; } -static __inline__ void fq_unlink(struct frag_queue *fq) +int ip6_frag_mem(void) { - write_lock(&ip6_frag_lock); - __fq_unlink(fq); - write_unlock(&ip6_frag_lock); + return atomic_read(&ip6_frags.mem); } +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, + struct net_device *dev); + /* * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. @@ -136,7 +119,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frag_hash_rnd; + c += ip6_frags.rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -149,98 +132,54 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (IP6Q_HASHSZ - 1); + return c & (INETFRAGS_HASHSZ - 1); } -static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ip6_frag_secret_rebuild(unsigned long dummy) +static unsigned int ip6_hashfn(struct inet_frag_queue *q) { - unsigned long now = jiffies; - int i; - - write_lock(&ip6_frag_lock); - get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32)); - for (i = 0; i < IP6Q_HASHSZ; i++) { - struct frag_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - - if (hval != i) { - hlist_del(&q->list); + struct frag_queue *fq; - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &ip6_frag_hash[hval]); + fq = container_of(q, struct frag_queue, q); + return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); +} - } - } - } - write_unlock(&ip6_frag_lock); +int ip6_frag_match(struct inet_frag_queue *q, void *a) +{ + struct frag_queue *fq; + struct ip6_create_arg *arg = a; - mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval); + fq = container_of(q, struct frag_queue, q); + return (fq->id == arg->id && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst)); } - -atomic_t ip6_frag_mem = ATOMIC_INIT(0); +EXPORT_SYMBOL(ip6_frag_match); /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frag_mem); + atomic_sub(skb->truesize, &ip6_frags.mem); kfree_skb(skb); } -static inline void frag_free_queue(struct frag_queue *fq, int *work) +void ip6_frag_init(struct inet_frag_queue *q, void *a) { - if (work) - *work -= sizeof(struct frag_queue); - atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); - kfree(fq); -} + struct frag_queue *fq = container_of(q, struct frag_queue, q); + struct ip6_create_arg *arg = a; -static inline struct frag_queue *frag_alloc_queue(void) -{ - struct frag_queue *fq = kzalloc(sizeof(struct frag_queue), GFP_ATOMIC); - - if(!fq) - return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); - return fq; + fq->id = arg->id; + ipv6_addr_copy(&fq->saddr, arg->src); + ipv6_addr_copy(&fq->daddr, arg->dst); } +EXPORT_SYMBOL(ip6_frag_init); /* Destruction primitives. */ -/* Complete destruction of fq. */ -static void ip6_frag_destroy(struct frag_queue *fq, int *work) +static __inline__ void fq_put(struct frag_queue *fq) { - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} - -static __inline__ void fq_put(struct frag_queue *fq, int *work) -{ - if (atomic_dec_and_test(&fq->refcnt)) - ip6_frag_destroy(fq, work); + inet_frag_put(&fq->q, &ip6_frags); } /* Kill fq entry. It is not destroyed immediately, @@ -248,55 +187,28 @@ static __inline__ void fq_put(struct frag_queue *fq, int *work) */ static __inline__ void fq_kill(struct frag_queue *fq) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + inet_frag_kill(&fq->q, &ip6_frags); } static void ip6_evictor(struct inet6_dev *idev) { - struct frag_queue *fq; - struct list_head *tmp; - int work; - - work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh; - if (work <= 0) - return; - - while(work > 0) { - read_lock(&ip6_frag_lock); - if (list_empty(&ip6_frag_lru_list)) { - read_unlock(&ip6_frag_lock); - return; - } - tmp = ip6_frag_lru_list.next; - fq = list_entry(tmp, struct frag_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); - } + int evicted; + + evicted = inet_frag_evictor(&ip6_frags); + if (evicted) + IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); } static void ip6_frag_expire(unsigned long data) { - struct frag_queue *fq = (struct frag_queue *) data; + struct frag_queue *fq; struct net_device *dev = NULL; - spin_lock(&fq->lock); + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto out; fq_kill(fq); @@ -311,7 +223,7 @@ static void ip6_frag_expire(unsigned long data) rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ - if (!(fq->last_in&FIRST_IN) || !fq->fragments) + if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments) goto out; /* @@ -319,114 +231,47 @@ static void ip6_frag_expire(unsigned long data) segment was received. And do not use fq->dev pointer directly, device might already disappeared. */ - fq->fragments->dev = dev; - icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + fq->q.fragments->dev = dev; + icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); out: if (dev) dev_put(dev); - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); } -/* Creation primitives. */ - - -static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) +static __inline__ struct frag_queue * +fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, + struct inet6_dev *idev) { - struct frag_queue *fq; + struct inet_frag_queue *q; + struct ip6_create_arg arg; unsigned int hash; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif - - write_lock(&ip6_frag_lock); - hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&ip6_frag_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) - atomic_inc(&fq->refcnt); - - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &ip6_frag_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &ip6_frag_lru_list); - ip6_frag_nqueues++; - write_unlock(&ip6_frag_lock); - return fq; -} - -static struct frag_queue * -ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; + arg.id = id; + arg.src = src; + arg.dst = dst; + hash = ip6qhashfn(id, src, dst); - if ((fq = frag_alloc_queue()) == NULL) + q = inet_frag_find(&ip6_frags, &arg, hash); + if (q == NULL) goto oom; - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - - init_timer(&fq->timer); - fq->timer.function = ip6_frag_expire; - fq->timer.data = (long) fq; - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); - - return ip6_frag_intern(fq); + return container_of(q, struct frag_queue, q); oom: IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); return NULL; } -static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) -{ - struct frag_queue *fq; - struct hlist_node *n; - unsigned int hash; - - read_lock(&ip6_frag_lock); - hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - return fq; - } - } - read_unlock(&ip6_frag_lock); - - return ip6_frag_create(id, src, dst, idev); -} - - -static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + struct net_device *dev; int offset, end; - if (fq->last_in & COMPLETE) + if (fq->q.last_in & COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -439,7 +284,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); - return; + return -1; } if (skb->ip_summed == CHECKSUM_COMPLETE) { @@ -454,11 +299,11 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) + if (end < fq->q.len || + ((fq->q.last_in & LAST_IN) && end != fq->q.len)) goto err; - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -471,13 +316,13 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); - return; + return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) + if (fq->q.last_in & LAST_IN) goto err; - fq->len = end; + fq->q.len = end; } } @@ -496,7 +341,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for(next = fq->fragments; next != NULL; next = next->next) { + for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -533,7 +378,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (!pskb_pull(next, i)) goto err; FRAG6_CB(next)->offset += i; /* next fragment */ - fq->meat -= i; + fq->q.meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -548,9 +393,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->fragments = next; + fq->q.fragments = next; - fq->meat -= free_it->len; + fq->q.meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -562,30 +407,37 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; - if (skb->dev) - fq->iif = skb->dev->ifindex; - skb->dev = NULL; - fq->stamp = skb->tstamp; - fq->meat += skb->len; - atomic_add(skb->truesize, &ip6_frag_mem); + dev = skb->dev; + if (dev) { + fq->iif = dev->ifindex; + skb->dev = NULL; + } + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + atomic_add(skb->truesize, &ip6_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= FIRST_IN; } - write_lock(&ip6_frag_lock); - list_move_tail(&fq->lru_list, &ip6_frag_lru_list); - write_unlock(&ip6_frag_lock); - return; + + if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len) + return ip6_frag_reasm(fq, prev, dev); + + write_lock(&ip6_frags.lock); + list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); + write_unlock(&ip6_frags.lock); + return -1; err: IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); + return -1; } /* @@ -597,21 +449,39 @@ err: * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *head = fq->fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; fq_kill(fq); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + + if (!fp) + goto out_oom; + + fp->next = head->next; + prev->next = fp; + + skb_morph(head, fq->q.fragments); + head->next = fq->q.fragments->next; + + kfree_skb(fq->q.fragments); + fq->q.fragments = head; + } + BUG_TRAP(head != NULL); BUG_TRAP(FRAG6_CB(head)->offset == 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->len - + sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; @@ -640,7 +510,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frag_mem); + atomic_add(clone->truesize, &ip6_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -655,7 +525,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip6_frag_mem); + atomic_sub(head->truesize, &ip6_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -665,17 +535,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frag_mem); + atomic_sub(fp->truesize, &ip6_frags.mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->stamp; + head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); IP6CB(head)->nhoff = nhoff; - *skb_in = head; - /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(skb_network_header(head), @@ -685,7 +553,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, rcu_read_lock(); IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->fragments = NULL; + fq->q.fragments = NULL; return 1; out_oversize: @@ -702,10 +570,8 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp) +static int ipv6_frag_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -739,23 +605,19 @@ static int ipv6_frag_rcv(struct sk_buff **skbp) return 1; } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) + if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { - int ret = -1; + int ret; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); - if (fq->last_in == (FIRST_IN|LAST_IN) && - fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, dev); - - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + fq_put(fq); return ret; } @@ -775,11 +637,13 @@ void __init ipv6_frag_init(void) if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); - ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - init_timer(&ip6_frag_secret_timer); - ip6_frag_secret_timer.function = ip6_frag_secret_rebuild; - ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval; - add_timer(&ip6_frag_secret_timer); + ip6_frags.ctl = &ip6_frags_ctl; + ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; + ip6_frags.destructor = NULL; + ip6_frags.skb_free = NULL; + ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; + ip6_frags.frag_expire = ip6_frag_expire; + inet_frags_init(&ip6_frags); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ff19f9eb9e..95f8e4a62f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -663,7 +663,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -682,7 +682,7 @@ restart_2: fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(fn, fl->iif, strict | reachable); + rt = rt6_select(fn, oif, strict | reachable); BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -735,6 +735,12 @@ out2: return rt; } +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) +{ + return ip6_pol_route(table, fl->iif, fl, flags); +} + void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); @@ -761,72 +767,7 @@ void ip6_route_input(struct sk_buff *skb) static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, struct flowi *fl, int flags) { - struct fib6_node *fn; - struct rt6_info *rt, *nrt; - int strict = 0; - int attempts = 3; - int err; - int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; - - strict |= flags & RT6_LOOKUP_F_IFACE; - -relookup: - read_lock_bh(&table->tb6_lock); - -restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); - -restart: - rt = rt6_select(fn, fl->oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) - goto out; - - dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); - - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else { -#if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); -#else - goto out2; -#endif - } - - dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; - - dst_hold(&rt->u.dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } - - if (--attempts <= 0) - goto out2; - - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - dst_release(&rt->u.dst); - goto relookup; - -out: - if (reachable) { - reachable = 0; - goto restart_2; - } - dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); -out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; - return rt; + return ip6_pol_route(table, fl->oif, fl, flags); } struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) @@ -2456,7 +2397,6 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, ctl_table ipv6_route_table[] = { { - .ctl_name = NET_IPV6_ROUTE_FLUSH, .procname = "flush", .data = &flush_delay, .maxlen = sizeof(int), diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3fb44277207..68bb2548e46 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -12,6 +12,7 @@ #include <net/ndisc.h> #include <net/ipv6.h> #include <net/addrconf.h> +#include <net/inet_frag.h> #ifdef CONFIG_SYSCTL @@ -41,7 +42,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", - .data = &sysctl_ip6frag_high_thresh, + .data = &ip6_frags_ctl.high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -49,7 +50,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", - .data = &sysctl_ip6frag_low_thresh, + .data = &ip6_frags_ctl.low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -57,7 +58,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", - .data = &sysctl_ip6frag_time, + .data = &ip6_frags_ctl.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -66,7 +67,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", - .data = &sysctl_ip6frag_secret_interval, + .data = &ip6_frags_ctl.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a07b59c528f..737b755342b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1668,9 +1668,8 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb) +static int tcp_v6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct tcphdr *th; struct sock *sk; int ret; diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 23e2809878a..6323921b40b 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -87,9 +87,8 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int tunnel6_rcv(struct sk_buff **pskb) +static int tunnel6_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) @@ -106,9 +105,8 @@ drop: return 0; } -static int tunnel46_rcv(struct sk_buff **pskb) +static int tunnel46_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 82ff26dd447..caebad6ee51 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -405,10 +405,9 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], int proto) { - struct sk_buff *skb = *pskb; struct sock *sk; struct udphdr *uh; struct net_device *dev = skb->dev; @@ -494,9 +493,9 @@ discard: return 0; } -static __inline__ int udpv6_rcv(struct sk_buff **pskb) +static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); } /* diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 6e252f318f7..2d3fda60123 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -6,7 +6,7 @@ #include <net/addrconf.h> #include <net/inet_common.h> -extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, int , int , int , __be32 , struct hlist_head []); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f54016a5500..766566f7de4 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,9 +17,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; -static int udplitev6_rcv(struct sk_buff **pskb) +static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index c858537cec4..515783707e8 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,7 +16,7 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { int err; __be32 seq; @@ -24,11 +24,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) struct xfrm_state *x; int xfrm_nr = 0; int decaps = 0; - int nexthdr; unsigned int nhoff; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) @@ -41,7 +39,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) goto drop; x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6); + nexthdr, AF_INET6); if (x == NULL) goto drop; spin_lock(&x->lock); @@ -70,10 +68,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) xfrm_vec[xfrm_nr++] = x; - if (x->mode->input(x, skb)) + if (x->outer_mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } @@ -99,7 +97,6 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, xfrm_nr * sizeof(xfrm_vec[0])); skb->sp->len += xfrm_nr; - skb->ip_summed = CHECKSUM_NONE; nf_reset(skb); @@ -133,9 +130,10 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb) +int xfrm6_rcv(struct sk_buff *skb) { - return xfrm6_rcv_spi(*pskb, 0); + return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 13bb1e85676..2bfb4f05c14 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -79,6 +79,7 @@ static struct xfrm_mode xfrm6_beet_mode = { .output = xfrm6_beet_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_beet_init(void) diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 957ae36b669..a7bc8c62317 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -58,16 +58,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -/* - * Do nothing about routing optimization header unlike IPsec. - */ -static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return 0; -} - static struct xfrm_mode xfrm6_ro_mode = { - .input = xfrm6_ro_input, .output = xfrm6_ro_output, .owner = THIS_MODULE, .encap = XFRM_MODE_ROUTEOPTIMIZATION, diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index ea228387911..fd84e221727 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -118,6 +118,7 @@ static struct xfrm_mode xfrm6_tunnel_mode = { .output = xfrm6_tunnel_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4618c18e611..656976760ad 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -50,7 +50,7 @@ static inline int xfrm6_output_one(struct sk_buff *skb) struct ipv6hdr *iph; int err; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; @@ -80,7 +80,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) while (likely((err = xfrm6_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -88,7 +88,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm6_output_finish2); if (unlikely(err != 1)) break; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 15aa4c58c31..82e27b80d07 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -178,8 +178,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL || - xfrm[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { unsigned short encap_family = xfrm[i]->props.family; switch(encap_family) { case AF_INET: @@ -215,7 +214,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -232,18 +230,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - /* XXX: When IPv4 is implemented as module and can be unloaded, - * we should manage reference to xfrm4_output in afinfo->output. - * Miyazawa - */ - afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); - if (!afinfo) { - dst = *dst_p; - goto error; - } - - dst_prev->output = afinfo->output; - xfrm_state_put_afinfo(afinfo); + dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index cdadb484746..b392bee396f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -93,7 +93,8 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->props.mode == XFRM_MODE_TUNNEL) { + (src[i]->props.mode == XFRM_MODE_TUNNEL || + src[i]->props.mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -146,7 +147,8 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) /* Rule 3: select IPsec tunnel */ for (i = 0; i < n; i++) { if (src[i] && - src[i]->mode == XFRM_MODE_TUNNEL) { + (src[i]->mode == XFRM_MODE_TUNNEL || + src[i]->mode == XFRM_MODE_BEET)) { dst[j++] = src[i]; src[i] = NULL; } @@ -168,6 +170,7 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 3f8a3abde67..fae90ff3108 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -248,7 +248,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - return 0; + return skb_network_header(skb)[IP6CB(skb)->nhoff]; } static int xfrm6_tunnel_rcv(struct sk_buff *skb) @@ -257,7 +257,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, spi) > 0 ? : 0; + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0; } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/irda/discovery.c b/net/irda/discovery.c index af0cea721d2..80c33f408e3 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -202,7 +202,7 @@ void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force) /* Drop the spinlock before calling the higher layers, as * we can't guarantee they won't call us back and create a * deadlock. We will work on our own private data, so we - * don't care to be interupted. - Jean II */ + * don't care to be interrupted. - Jean II */ spin_unlock_irqrestore(&log->hb_spinlock, flags); if(buffer == NULL) diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index 824309dabfe..b5a13882c92 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -381,18 +381,9 @@ static void ircomm_tty_discovery_indication(discinfo_t *discovery, info.daddr = discovery->daddr; info.saddr = discovery->saddr; - /* FIXME. We have a locking problem on the hashbin here. - * We probably need to use hashbin_find_next(), but we first - * need to ensure that "line" is unique. - Jean II */ - self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); - while (self != NULL) { - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, - NULL, &info); - - self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); - } + self = (struct ircomm_tty_cb *) priv; + ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, + NULL, &info); } /* diff --git a/net/irda/iriap.c b/net/irda/iriap.c index ee3889fa49a..dc5e34a0162 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c index 99b18dc7a0b..8fb9d7277ca 100644 --- a/net/irda/iriap_event.c +++ b/net/irda/iriap_event.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index cf302457097..cbcf04380f3 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index 87039c2fb6a..fff52d57a20 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -20,7 +20,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c index 843ab6fbb39..6afcee59e90 100644 --- a/net/irda/irlan/irlan_client_event.c +++ b/net/irda/irlan/irlan_client_event.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index f5778ef3ccc..a4b56e25a91 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 340f04a36b0..7f9c8542e5f 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -19,7 +19,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c index 623e0fd16c1..a9750a80138 100644 --- a/net/irda/irlan/irlan_event.c +++ b/net/irda/irlan/irlan_event.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c index e6346b88f93..4384be9a688 100644 --- a/net/irda/irlan/irlan_filter.c +++ b/net/irda/irlan/irlan_filter.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index aac66434e47..13db942812e 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -20,7 +20,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlan/irlan_provider_event.c b/net/irda/irlan/irlan_provider_event.c index ef401bd6ea0..10ece5a4752 100644 --- a/net/irda/irlan/irlan_provider_event.c +++ b/net/irda/irlan/irlan_provider_event.c @@ -16,7 +16,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index a8b8873aa26..4c33bf5c835 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -19,7 +19,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 77ac27e8116..4f3764546b2 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7db92ced2c0..cedff8068fb 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index 65ffa981510..1bba87e7860 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c index 559302d3fe6..0a79d9aeb08 100644 --- a/net/irda/irlmp_frame.c +++ b/net/irda/irlmp_frame.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 8ba703da279..01554b996b9 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index d058b467f9e..40c28efaed9 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -28,7 +28,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 957e04feb0f..565cbf0421c 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -17,7 +17,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * @@ -31,12 +31,6 @@ #include <net/irda/irda.h> /* irda_debug */ #include <net/irda/irias_object.h> -#define NET_IRDA 412 /* Random number */ -enum { DISCOVERY=1, DEVNAME, DEBUG, FAST_POLL, DISCOVERY_SLOTS, - DISCOVERY_TIMEOUT, SLOT_TIMEOUT, MAX_BAUD_RATE, MIN_TX_TURN_TIME, - MAX_TX_DATA_SIZE, MAX_TX_WINDOW, MAX_NOREPLY_TIME, WARN_NOREPLY_TIME, - LAP_KEEPALIVE_TIME }; - extern int sysctl_discovery; extern int sysctl_discovery_slots; extern int sysctl_discovery_timeout; @@ -94,7 +88,7 @@ static int do_devname(ctl_table *table, int write, struct file *filp, /* One file */ static ctl_table irda_table[] = { { - .ctl_name = DISCOVERY, + .ctl_name = NET_IRDA_DISCOVERY, .procname = "discovery", .data = &sysctl_discovery, .maxlen = sizeof(int), @@ -102,7 +96,7 @@ static ctl_table irda_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = DEVNAME, + .ctl_name = NET_IRDA_DEVNAME, .procname = "devname", .data = sysctl_devname, .maxlen = 65, @@ -112,7 +106,7 @@ static ctl_table irda_table[] = { }, #ifdef CONFIG_IRDA_DEBUG { - .ctl_name = DEBUG, + .ctl_name = NET_IRDA_DEBUG, .procname = "debug", .data = &irda_debug, .maxlen = sizeof(int), @@ -122,7 +116,7 @@ static ctl_table irda_table[] = { #endif #ifdef CONFIG_IRDA_FAST_RR { - .ctl_name = FAST_POLL, + .ctl_name = NET_IRDA_FAST_POLL, .procname = "fast_poll_increase", .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), @@ -131,7 +125,7 @@ static ctl_table irda_table[] = { }, #endif { - .ctl_name = DISCOVERY_SLOTS, + .ctl_name = NET_IRDA_DISCOVERY_SLOTS, .procname = "discovery_slots", .data = &sysctl_discovery_slots, .maxlen = sizeof(int), @@ -142,7 +136,7 @@ static ctl_table irda_table[] = { .extra2 = &max_discovery_slots }, { - .ctl_name = DISCOVERY_TIMEOUT, + .ctl_name = NET_IRDA_DISCOVERY_TIMEOUT, .procname = "discovery_timeout", .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), @@ -150,7 +144,7 @@ static ctl_table irda_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = SLOT_TIMEOUT, + .ctl_name = NET_IRDA_SLOT_TIMEOUT, .procname = "slot_timeout", .data = &sysctl_slot_timeout, .maxlen = sizeof(int), @@ -161,7 +155,7 @@ static ctl_table irda_table[] = { .extra2 = &max_slot_timeout }, { - .ctl_name = MAX_BAUD_RATE, + .ctl_name = NET_IRDA_MAX_BAUD_RATE, .procname = "max_baud_rate", .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), @@ -172,7 +166,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_baud_rate }, { - .ctl_name = MIN_TX_TURN_TIME, + .ctl_name = NET_IRDA_MIN_TX_TURN_TIME, .procname = "min_tx_turn_time", .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), @@ -183,7 +177,7 @@ static ctl_table irda_table[] = { .extra2 = &max_min_tx_turn_time }, { - .ctl_name = MAX_TX_DATA_SIZE, + .ctl_name = NET_IRDA_MAX_TX_DATA_SIZE, .procname = "max_tx_data_size", .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), @@ -194,7 +188,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_tx_data_size }, { - .ctl_name = MAX_TX_WINDOW, + .ctl_name = NET_IRDA_MAX_TX_WINDOW, .procname = "max_tx_window", .data = &sysctl_max_tx_window, .maxlen = sizeof(int), @@ -205,7 +199,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_tx_window }, { - .ctl_name = MAX_NOREPLY_TIME, + .ctl_name = NET_IRDA_MAX_NOREPLY_TIME, .procname = "max_noreply_time", .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), @@ -216,7 +210,7 @@ static ctl_table irda_table[] = { .extra2 = &max_max_noreply_time }, { - .ctl_name = WARN_NOREPLY_TIME, + .ctl_name = NET_IRDA_WARN_NOREPLY_TIME, .procname = "warn_noreply_time", .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), @@ -227,7 +221,7 @@ static ctl_table irda_table[] = { .extra2 = &max_warn_noreply_time }, { - .ctl_name = LAP_KEEPALIVE_TIME, + .ctl_name = NET_IRDA_LAP_KEEPALIVE_TIME, .procname = "lap_keepalive_time", .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 1311976c9df..97db158c927 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/timer.c b/net/irda/timer.c index d3a6ee8cc4a..d730099080a 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -18,7 +18,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c index a7a7f191f1a..e71286768a4 100644 --- a/net/irda/wrapper.c +++ b/net/irda/wrapper.c @@ -20,7 +20,7 @@ * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * - * Neither Dag Brattli nor University of Tromsø admit liability nor + * Neither Dag Brattli nor University of Tromsø admit liability nor * provide warranty for any of this software. This material is * provided "AS-IS" and at no charge. * diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 983058d432d..a2f5a6ea389 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -389,7 +389,7 @@ static void iucv_block_cpu(void *data) * iucv_declare_cpu * @data: unused * - * Declare a interupt buffer on this cpu. + * Declare a interrupt buffer on this cpu. */ static void iucv_declare_cpu(void *data) { diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 49eacba824d..46cf962f7f8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -762,7 +762,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (net_ratelimit()) printk(KERN_DEBUG "LLC(%s:%d): Application " "bug, race in MSG_PEEK.\n", - current->comm, current->pid); + current->comm, task_pid_nr(current)); peek_seq = llc->copied_seq; } continue; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d34a9deca67..4b4ed2a5803 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -37,8 +37,6 @@ struct ieee80211_local; -#define BIT(x) (1 << (x)) - #define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3) /* Maximum number of broadcast/multicast frames to buffer when some of the diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index f0224c2311d..6caa3ec2cff 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -306,9 +306,12 @@ int ieee80211_set_channel(struct ieee80211_local *local, int channel, int freq) ((chan->chan == channel) || (chan->freq == freq))) { local->oper_channel = chan; local->oper_hw_mode = mode; - set++; + set = 1; + break; } } + if (set) + break; } if (set) { @@ -508,10 +511,11 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, static int ieee80211_ioctl_siwscan(struct net_device *dev, struct iw_request_info *info, - struct iw_point *data, char *extra) + union iwreq_data *wrqu, char *extra) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct iw_scan_req *req = NULL; u8 *ssid = NULL; size_t ssid_len = 0; @@ -536,6 +540,14 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, return -EOPNOTSUPP; } + /* if SSID was specified explicitly then use that */ + if (wrqu->data.length == sizeof(struct iw_scan_req) && + wrqu->data.flags & IW_SCAN_THIS_ESSID) { + req = (struct iw_scan_req *)extra; + ssid = req->essid; + ssid_len = req->essid_len; + } + return ieee80211_sta_req_scan(dev, ssid, ssid_len); } diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 1641e8fe44b..f7ffeec3913 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -12,7 +12,6 @@ */ /* TODO: - * BSS table: use <BSSID,SSID> as the key to support multi-SSID APs * order BSS list by RSSI(?) ("quality of AP") * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, * SSID) @@ -61,7 +60,8 @@ static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, u8 *ssid, size_t ssid_len); static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid); +ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel, + u8 *ssid, u8 ssid_len); static void ieee80211_rx_bss_put(struct net_device *dev, struct ieee80211_sta_bss *bss); static int ieee80211_sta_find_ibss(struct net_device *dev, @@ -108,14 +108,11 @@ struct ieee802_11_elems { u8 wmm_param_len; }; -enum ParseRes { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 }; - -static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems) +static void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems) { size_t left = len; u8 *pos = start; - int unknown = 0; memset(elems, 0, sizeof(*elems)); @@ -126,15 +123,8 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len, elen = *pos++; left -= 2; - if (elen > left) { -#if 0 - if (net_ratelimit()) - printk(KERN_DEBUG "IEEE 802.11 element parse " - "failed (id=%d elen=%d left=%d)\n", - id, elen, left); -#endif - return ParseFailed; - } + if (elen > left) + return; switch (id) { case WLAN_EID_SSID: @@ -201,28 +191,15 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len, elems->ext_supp_rates_len = elen; break; default: -#if 0 - printk(KERN_DEBUG "IEEE 802.11 element parse ignored " - "unknown element (id=%d elen=%d)\n", - id, elen); -#endif - unknown++; break; } left -= elen; pos += elen; } - - /* Do not trigger error if left == 1 as Apple Airport base stations - * send AssocResps that are one spurious byte too long. */ - - return unknown ? ParseUnknown : ParseOK; } - - static int ecw2cw(int ecw) { int cw = 1; @@ -427,7 +404,9 @@ static void ieee80211_set_associated(struct net_device *dev, if (sdata->type != IEEE80211_IF_TYPE_STA) return; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { if (bss->has_erp_value) ieee80211_handle_erp_ie(dev, bss->erp_value); @@ -574,7 +553,8 @@ static void ieee80211_send_assoc(struct net_device *dev, capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { if (bss->capability & WLAN_CAPABILITY_PRIVACY) capab |= WLAN_CAPABILITY_PRIVACY; @@ -722,6 +702,7 @@ static void ieee80211_send_disassoc(struct net_device *dev, static int ieee80211_privacy_mismatch(struct net_device *dev, struct ieee80211_if_sta *ifsta) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; int res = 0; @@ -729,7 +710,8 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, ifsta->key_management_enabled) return 0; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (!bss) return 0; @@ -926,12 +908,7 @@ static void ieee80211_auth_challenge(struct net_device *dev, printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name); pos = mgmt->u.auth.variable; - if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems) - == ParseFailed) { - printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n", - dev->name); - return; - } + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.challenge) { printk(KERN_DEBUG "%s: no challenge IE in shared key auth " "frame\n", dev->name); @@ -1203,15 +1180,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); aid = le16_to_cpu(mgmt->u.assoc_resp.aid); - if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " - "set\n", dev->name, aid); - aid &= ~(BIT(15) | BIT(14)); printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x " "status=%d aid=%d)\n", dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), - capab_info, status_code, aid); + capab_info, status_code, aid & ~(BIT(15) | BIT(14))); if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", @@ -1223,13 +1196,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, return; } + if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) + printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " + "set\n", dev->name, aid); + aid &= ~(BIT(15) | BIT(14)); + pos = mgmt->u.assoc_resp.variable; - if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems) - == ParseFailed) { - printk(KERN_DEBUG "%s: failed to parse AssocResp\n", - dev->name); - return; - } + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.supp_rates) { printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", @@ -1241,7 +1214,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, * update our stored copy */ if (elems.erp_info && elems.erp_info_len >= 1) { struct ieee80211_sta_bss *bss - = ieee80211_rx_bss_get(dev, ifsta->bssid); + = ieee80211_rx_bss_get(dev, ifsta->bssid, + local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { bss->erp_value = elems.erp_info[0]; bss->has_erp_value = 1; @@ -1271,7 +1246,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, " AP\n", dev->name); return; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len); if (bss) { sta->last_rssi = bss->rssi; sta->last_signal = bss->signal; @@ -1347,7 +1324,8 @@ static void __ieee80211_rx_bss_hash_del(struct net_device *dev, static struct ieee80211_sta_bss * -ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) +ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int channel, + u8 *ssid, u8 ssid_len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; @@ -1358,6 +1336,11 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) atomic_inc(&bss->users); atomic_inc(&bss->users); memcpy(bss->bssid, bssid, ETH_ALEN); + bss->channel = channel; + if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { + memcpy(bss->ssid, ssid, ssid_len); + bss->ssid_len = ssid_len; + } spin_lock_bh(&local->sta_bss_lock); /* TODO: order by RSSI? */ @@ -1369,7 +1352,8 @@ ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid) static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid) +ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int channel, + u8 *ssid, u8 ssid_len) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; @@ -1377,7 +1361,10 @@ ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid) spin_lock_bh(&local->sta_bss_lock); bss = local->sta_bss_hash[STA_HASH(bssid)]; while (bss) { - if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0) { + if (!memcmp(bss->bssid, bssid, ETH_ALEN) && + bss->channel == channel && + bss->ssid_len == ssid_len && + (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { atomic_inc(&bss->users); break; } @@ -1439,7 +1426,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee802_11_elems elems; size_t baselen; - int channel, invalid = 0, clen; + int channel, clen; struct ieee80211_sta_bss *bss; struct sta_info *sta; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1485,9 +1472,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev, #endif /* CONFIG_MAC80211_IBSS_DEBUG */ } - if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, - &elems) == ParseFailed) - invalid = 1; + ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates && memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && @@ -1545,9 +1530,11 @@ static void ieee80211_rx_bss_info(struct net_device *dev, else channel = rx_status->channel; - bss = ieee80211_rx_bss_get(dev, mgmt->bssid); + bss = ieee80211_rx_bss_get(dev, mgmt->bssid, channel, + elems.ssid, elems.ssid_len); if (!bss) { - bss = ieee80211_rx_bss_add(dev, mgmt->bssid); + bss = ieee80211_rx_bss_add(dev, mgmt->bssid, channel, + elems.ssid, elems.ssid_len); if (!bss) return; } else { @@ -1573,10 +1560,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev, bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); - if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) { - memcpy(bss->ssid, elems.ssid, elems.ssid_len); - bss->ssid_len = elems.ssid_len; - } bss->supp_rates_len = 0; if (elems.supp_rates) { @@ -1647,7 +1630,6 @@ static void ieee80211_rx_bss_info(struct net_device *dev, bss->hw_mode = rx_status->phymode; - bss->channel = channel; bss->freq = rx_status->freq; if (channel != rx_status->channel && (bss->hw_mode == MODE_IEEE80211G || @@ -1707,9 +1689,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, if (baselen > len) return; - if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, - &elems) == ParseFailed) - return; + ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); if (elems.erp_info && elems.erp_info_len >= 1) ieee80211_handle_erp_ie(dev, elems.erp_info[0]); @@ -2375,7 +2355,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_hw_mode *mode; u8 bssid[ETH_ALEN], *pos; int i; @@ -2398,18 +2378,17 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n", dev->name, print_mac(mac, bssid)); - bss = ieee80211_rx_bss_add(dev, bssid); + bss = ieee80211_rx_bss_add(dev, bssid, local->hw.conf.channel, + sdata->u.sta.ssid, sdata->u.sta.ssid_len); if (!bss) return -ENOMEM; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); mode = local->oper_hw_mode; if (local->hw.conf.beacon_int == 0) local->hw.conf.beacon_int = 100; bss->beacon_int = local->hw.conf.beacon_int; bss->hw_mode = local->hw.conf.phymode; - bss->channel = local->hw.conf.channel; bss->freq = local->hw.conf.freq; bss->last_update = jiffies; bss->capability = WLAN_CAPABILITY_IBSS; @@ -2469,7 +2448,8 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && - (bss = ieee80211_rx_bss_get(dev, bssid))) { + (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel, + ifsta->ssid, ifsta->ssid_len))) { printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", dev->name, print_mac(mac, bssid)); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 6675261e958..cc806d640f7 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -138,9 +138,7 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, *icv = cpu_to_le32(~crc32_le(~0, data, data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); - sg.page = virt_to_page(data); - sg.offset = offset_in_page(data); - sg.length = data_len + WEP_ICV_LEN; + sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); } @@ -204,9 +202,7 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, __le32 crc; crypto_blkcipher_setkey(tfm, rc4key, klen); - sg.page = virt_to_page(data); - sg.offset = offset_in_page(data); - sg.length = data_len + WEP_ICV_LEN; + sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a523fa4136e..bed9ba01e8e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -117,7 +117,7 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, + struct sk_buff *skb, int hook, const struct net_device *indev, const struct net_device *outdev, @@ -160,7 +160,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -175,17 +175,17 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, elem = &nf_hooks[pf][hook]; next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, + verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; goto unlock; } else if (verdict == NF_DROP) { - kfree_skb(*pskb); + kfree_skb(skb); ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } @@ -196,34 +196,24 @@ unlock: EXPORT_SYMBOL(nf_hook_slow); -int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) { - struct sk_buff *nskb; - - if (writable_len > (*pskb)->len) + if (writable_len > skb->len) return 0; /* Not exclusive use of packet? Must copy. */ - if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) - goto copy_skb; - if (skb_shared(*pskb)) - goto copy_skb; - - return pskb_may_pull(*pskb, writable_len); - -copy_skb: - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return 0; - BUG_ON(skb_is_nonlinear(nskb)); - - /* Rest of kernel will get very unhappy if we pass it a - suddenly-orphaned skbuff */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - return 1; + if (!skb_cloned(skb)) { + if (writable_len <= skb_headlen(skb)) + return 1; + } else if (skb_clone_writable(skb, writable_len)) + return 1; + + if (writable_len <= skb_headlen(skb)) + writable_len = 0; + else + writable_len -= skb_headlen(skb); + + return !!__pskb_pull_tail(skb, writable_len); } EXPORT_SYMBOL(skb_make_writable); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index e42ab230ad8..7b8239c0cd5 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -36,7 +36,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ static struct { }, }; -static int amanda_help(struct sk_buff **pskb, +static int amanda_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -101,25 +101,25 @@ static int amanda_help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - nf_ct_refresh(ct, *pskb, master_timeout * HZ); + nf_ct_refresh(ct, skb, master_timeout * HZ); /* No data? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) { + if (dataoff >= skb->len) { if (net_ratelimit()) - printk("amanda_help: skblen = %u\n", (*pskb)->len); + printk("amanda_help: skblen = %u\n", skb->len); return NF_ACCEPT; } memset(&ts, 0, sizeof(ts)); - start = skb_find_text(*pskb, dataoff, (*pskb)->len, + start = skb_find_text(skb, dataoff, skb->len, search[SEARCH_CONNECT].ts, &ts); if (start == UINT_MAX) goto out; start += dataoff + search[SEARCH_CONNECT].len; memset(&ts, 0, sizeof(ts)); - stop = skb_find_text(*pskb, start, (*pskb)->len, + stop = skb_find_text(skb, start, skb->len, search[SEARCH_NEWLINE].ts, &ts); if (stop == UINT_MAX) goto out; @@ -127,13 +127,13 @@ static int amanda_help(struct sk_buff **pskb, for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { memset(&ts, 0, sizeof(ts)); - off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); + off = skb_find_text(skb, start, stop, search[i].ts, &ts); if (off == UINT_MAX) continue; off += start + search[i].len; len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); - if (skb_copy_bits(*pskb, off, pbuf, len)) + if (skb_copy_bits(skb, off, pbuf, len)) break; pbuf[len] = '\0'; @@ -153,7 +153,7 @@ static int amanda_help(struct sk_buff **pskb, nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) - ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, + ret = nf_nat_amanda(skb, ctinfo, off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 83c30b45d17..4d6171bc082 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -307,7 +307,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); /* Confirm a connection given skb; places it in hash table */ int -__nf_conntrack_confirm(struct sk_buff **pskb) +__nf_conntrack_confirm(struct sk_buff *skb) { unsigned int hash, repl_hash; struct nf_conntrack_tuple_hash *h; @@ -316,7 +316,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) struct hlist_node *n; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -367,14 +367,14 @@ __nf_conntrack_confirm(struct sk_buff **pskb) write_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + nf_conntrack_event_cache(IPCT_HELPER, skb); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + nf_conntrack_event_cache(IPCT_NATINFO, skb); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + IPCT_RELATED : IPCT_NEW, skb); return NF_ACCEPT; out: @@ -632,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) +nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -644,14 +644,14 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) int ret; /* Previously seen (loopback or untracked)? Ignore. */ - if ((*pskb)->nfct) { + if (skb->nfct) { NF_CT_STAT_INC_ATOMIC(ignore); return NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), + ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); @@ -666,13 +666,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL && - (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } - ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto, + ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ @@ -686,21 +686,21 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) return NF_DROP; } - NF_CT_ASSERT((*pskb)->nfct); + NF_CT_ASSERT(skb->nfct); - ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put((*pskb)->nfct); - (*pskb)->nfct = NULL; + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, *pskb); + nf_conntrack_event_cache(IPCT_STATUS, skb); return ret; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index c763ee74ea0..6df259067f7 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -43,7 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400); static int loose; module_param(loose, bool, 0600); -unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -344,7 +344,7 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, } } -static int help(struct sk_buff **pskb, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -371,21 +371,21 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; } - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; dataoff = protoff + th->doff * 4; /* No data? */ - if (dataoff >= (*pskb)->len) { + if (dataoff >= skb->len) { pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, - (*pskb)->len); + skb->len); return NF_ACCEPT; } - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; spin_lock_bh(&nf_ftp_lock); - fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); + fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer); BUG_ON(fb_ptr == NULL); ends_in_nl = (fb_ptr[datalen - 1] == '\n'); @@ -491,7 +491,7 @@ static int help(struct sk_buff **pskb, * (possibly changed) expectation itself. */ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); if (nf_nat_ftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, + ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ @@ -508,7 +508,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, *pskb); + update_nl_seq(seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index a8a9dfbe7a6..f23fd9598e1 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -47,27 +47,27 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "(determined by routing information)"); /* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff **pskb, +int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff **pskb, +int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_sig_addr_hook) (struct sk_buff **pskb, +int (*set_sig_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*set_ras_addr_hook) (struct sk_buff **pskb, +int (*set_ras_addr_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, +int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -75,25 +75,25 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) __read_mostly; -int (*nat_t120_hook) (struct sk_buff **pskb, +int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_h245_hook) (struct sk_buff **pskb, +int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_callforwarding_hook) (struct sk_buff **pskb, +int (*nat_callforwarding_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_q931_hook) (struct sk_buff **pskb, +int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, @@ -108,7 +108,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[]; static struct nf_conntrack_helper nf_conntrack_helper_ras[]; /****************************************************************************/ -static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, +static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { @@ -122,7 +122,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, int tpktoff; /* Get TCP header */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return 0; @@ -130,13 +130,13 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, tcpdataoff = protoff + th->doff * 4; /* Get TCP data length */ - tcpdatalen = (*pskb)->len - tcpdataoff; + tcpdatalen = skb->len - tcpdataoff; if (tcpdatalen <= 0) /* No TCP data */ goto clear_out; if (*data == NULL) { /* first TPKT */ /* Get first TPKT pointer */ - tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen, + tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -248,7 +248,7 @@ static int get_h245_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr) @@ -297,7 +297,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { @@ -321,7 +321,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int expect_t120(struct sk_buff **pskb, +static int expect_t120(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -355,7 +355,7 @@ static int expect_t120(struct sk_buff **pskb, (nat_t120 = rcu_dereference(nat_t120_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, + ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -371,7 +371,7 @@ static int expect_t120(struct sk_buff **pskb, } /****************************************************************************/ -static int process_h245_channel(struct sk_buff **pskb, +static int process_h245_channel(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -381,7 +381,7 @@ static int process_h245_channel(struct sk_buff **pskb, if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &channel->mediaChannel); if (ret < 0) return -1; @@ -390,7 +390,7 @@ static int process_h245_channel(struct sk_buff **pskb, if (channel-> options & eH2250LogicalChannelParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &channel->mediaControlChannel); if (ret < 0) return -1; @@ -400,7 +400,7 @@ static int process_h245_channel(struct sk_buff **pskb, } /****************************************************************************/ -static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, +static int process_olc(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannel *olc) @@ -412,7 +412,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) { - ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, &olc-> forwardLogicalChannelParameters. multiplexParameters. @@ -430,7 +430,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { ret = - process_h245_channel(pskb, ct, ctinfo, data, dataoff, + process_h245_channel(skb, ct, ctinfo, data, dataoff, &olc-> reverseLogicalChannelParameters. multiplexParameters. @@ -448,7 +448,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, t120.choice == eDataProtocolCapability_separateLANStack && olc->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, data, dataoff, &olc->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -459,7 +459,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, +static int process_olca(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannelAck *olca) @@ -477,7 +477,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, choice == eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { - ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, &olca-> reverseLogicalChannelParameters. multiplexParameters. @@ -496,7 +496,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &ack->mediaChannel); if (ret < 0) return -1; @@ -505,7 +505,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, &ack->mediaControlChannel); if (ret < 0) return -1; @@ -515,7 +515,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, if ((olca->options & eOpenLogicalChannelAck_separateStack) && olca->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(pskb, ct, ctinfo, data, dataoff, + ret = expect_t120(skb, ct, ctinfo, data, dataoff, &olca->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -526,7 +526,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int process_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, MultimediaSystemControlMessage *mscm) @@ -535,7 +535,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, case eMultimediaSystemControlMessage_request: if (mscm->request.choice == eRequestMessage_openLogicalChannel) { - return process_olc(pskb, ct, ctinfo, data, dataoff, + return process_olc(skb, ct, ctinfo, data, dataoff, &mscm->request.openLogicalChannel); } pr_debug("nf_ct_h323: H.245 Request %d\n", @@ -544,7 +544,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, case eMultimediaSystemControlMessage_response: if (mscm->response.choice == eResponseMessage_openLogicalChannelAck) { - return process_olca(pskb, ct, ctinfo, data, dataoff, + return process_olca(skb, ct, ctinfo, data, dataoff, &mscm->response. openLogicalChannelAck); } @@ -560,7 +560,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int h245_help(struct sk_buff **pskb, unsigned int protoff, +static int h245_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static MultimediaSystemControlMessage mscm; @@ -574,12 +574,12 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(pskb, protoff, ct, ctinfo, + while (get_tpkt_data(skb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_h245: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -596,7 +596,7 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff, } /* Process H.245 signal */ - if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0) + if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0) goto drop; } @@ -654,7 +654,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr) @@ -687,7 +687,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, (nat_h245 = rcu_dereference(nat_h245_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, + ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -758,7 +758,7 @@ static int callforward_do_filter(union nf_conntrack_address *src, } /****************************************************************************/ -static int expect_callforwarding(struct sk_buff **pskb, +static int expect_callforwarding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -798,7 +798,7 @@ static int expect_callforwarding(struct sk_buff **pskb, (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, + ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -814,7 +814,7 @@ static int expect_callforwarding(struct sk_buff **pskb, } /****************************************************************************/ -static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, +static int process_setup(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Setup_UUIE *setup) @@ -829,7 +829,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &setup->h245Address); if (ret < 0) return -1; @@ -846,7 +846,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(pskb, data, dataoff, + ret = set_h225_addr(skb, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.u.tcp.port); @@ -864,7 +864,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(pskb, data, dataoff, + ret = set_h225_addr(skb, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple.dst.u.tcp.port); @@ -874,7 +874,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, if (setup->options & eSetup_UUIE_fastStart) { for (i = 0; i < setup->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &setup->fastStart.item[i]); if (ret < 0) return -1; @@ -885,7 +885,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_callproceeding(struct sk_buff **pskb, +static int process_callproceeding(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -897,7 +897,7 @@ static int process_callproceeding(struct sk_buff **pskb, pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &callproc->h245Address); if (ret < 0) return -1; @@ -905,7 +905,7 @@ static int process_callproceeding(struct sk_buff **pskb, if (callproc->options & eCallProceeding_UUIE_fastStart) { for (i = 0; i < callproc->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &callproc->fastStart.item[i]); if (ret < 0) return -1; @@ -916,7 +916,7 @@ static int process_callproceeding(struct sk_buff **pskb, } /****************************************************************************/ -static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, +static int process_connect(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Connect_UUIE *connect) @@ -927,7 +927,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &connect->h245Address); if (ret < 0) return -1; @@ -935,7 +935,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, if (connect->options & eConnect_UUIE_fastStart) { for (i = 0; i < connect->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &connect->fastStart.item[i]); if (ret < 0) return -1; @@ -946,7 +946,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, +static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Alerting_UUIE *alert) @@ -957,7 +957,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &alert->h245Address); if (ret < 0) return -1; @@ -965,7 +965,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, if (alert->options & eAlerting_UUIE_fastStart) { for (i = 0; i < alert->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &alert->fastStart.item[i]); if (ret < 0) return -1; @@ -976,7 +976,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, +static int process_facility(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Facility_UUIE *facility) @@ -988,7 +988,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) - return expect_callforwarding(pskb, ct, ctinfo, data, + return expect_callforwarding(skb, ct, ctinfo, data, dataoff, &facility-> alternativeAddress); @@ -996,7 +996,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, } if (facility->options & eFacility_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &facility->h245Address); if (ret < 0) return -1; @@ -1004,7 +1004,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, if (facility->options & eFacility_UUIE_fastStart) { for (i = 0; i < facility->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &facility->fastStart.item[i]); if (ret < 0) return -1; @@ -1015,7 +1015,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, +static int process_progress(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Progress_UUIE *progress) @@ -1026,7 +1026,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { - ret = expect_h245(pskb, ct, ctinfo, data, dataoff, + ret = expect_h245(skb, ct, ctinfo, data, dataoff, &progress->h245Address); if (ret < 0) return -1; @@ -1034,7 +1034,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, if (progress->options & eProgress_UUIE_fastStart) { for (i = 0; i < progress->fastStart.count; i++) { - ret = process_olc(pskb, ct, ctinfo, data, dataoff, + ret = process_olc(skb, ct, ctinfo, data, dataoff, &progress->fastStart.item[i]); if (ret < 0) return -1; @@ -1045,7 +1045,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int process_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Q931 *q931) { @@ -1055,28 +1055,28 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, switch (pdu->h323_message_body.choice) { case eH323_UU_PDU_h323_message_body_setup: - ret = process_setup(pskb, ct, ctinfo, data, dataoff, + ret = process_setup(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.setup); break; case eH323_UU_PDU_h323_message_body_callProceeding: - ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff, + ret = process_callproceeding(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body. callProceeding); break; case eH323_UU_PDU_h323_message_body_connect: - ret = process_connect(pskb, ct, ctinfo, data, dataoff, + ret = process_connect(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.connect); break; case eH323_UU_PDU_h323_message_body_alerting: - ret = process_alerting(pskb, ct, ctinfo, data, dataoff, + ret = process_alerting(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.alerting); break; case eH323_UU_PDU_h323_message_body_facility: - ret = process_facility(pskb, ct, ctinfo, data, dataoff, + ret = process_facility(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.facility); break; case eH323_UU_PDU_h323_message_body_progress: - ret = process_progress(pskb, ct, ctinfo, data, dataoff, + ret = process_progress(skb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.progress); break; default: @@ -1090,7 +1090,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, if (pdu->options & eH323_UU_PDU_h245Control) { for (i = 0; i < pdu->h245Control.count; i++) { - ret = process_h245(pskb, ct, ctinfo, data, dataoff, + ret = process_h245(skb, ct, ctinfo, data, dataoff, &pdu->h245Control.item[i]); if (ret < 0) return -1; @@ -1101,7 +1101,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int q931_help(struct sk_buff **pskb, unsigned int protoff, +static int q931_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static Q931 q931; @@ -1115,12 +1115,12 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(pskb, protoff, ct, ctinfo, + while (get_tpkt_data(skb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_q931: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -1136,7 +1136,7 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff, } /* Process Q.931 signal */ - if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0) + if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0) goto drop; } @@ -1177,20 +1177,20 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { }; /****************************************************************************/ -static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff, +static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, int *datalen) { struct udphdr _uh, *uh; int dataoff; - uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh); + uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh); if (uh == NULL) return NULL; dataoff = protoff + sizeof(_uh); - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NULL; - *datalen = (*pskb)->len - dataoff; - return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer); + *datalen = skb->len - dataoff; + return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); } /****************************************************************************/ @@ -1227,7 +1227,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp, } /****************************************************************************/ -static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, +static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -1265,7 +1265,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, nat_q931 = rcu_dereference(nat_q931_hook); if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); + ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1283,7 +1283,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_grq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperRequest *grq) { @@ -1293,13 +1293,13 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(pskb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, data, &grq->rasAddress, 1); return 0; } /****************************************************************************/ -static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperConfirm *gcf) { @@ -1343,7 +1343,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { @@ -1353,7 +1353,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, pr_debug("nf_ct_ras: RRQ\n"); - ret = expect_q931(pskb, ct, ctinfo, data, + ret = expect_q931(skb, ct, ctinfo, data, rrq->callSignalAddress.item, rrq->callSignalAddress.count); if (ret < 0) @@ -1361,7 +1361,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(pskb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, data, rrq->rasAddress.item, rrq->rasAddress.count); if (ret < 0) @@ -1378,7 +1378,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { @@ -1392,7 +1392,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); if (ret < 0) @@ -1407,7 +1407,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, if (info->timeout > 0) { pr_debug("nf_ct_ras: set RAS connection timeout to " "%u seconds\n", info->timeout); - nf_ct_refresh(ct, *pskb, info->timeout * HZ); + nf_ct_refresh(ct, skb, info->timeout * HZ); /* Set expect timeout */ read_lock_bh(&nf_conntrack_lock); @@ -1427,7 +1427,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { @@ -1440,7 +1440,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, urq->callSignalAddress.item, urq->callSignalAddress.count); if (ret < 0) @@ -1453,13 +1453,13 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, info->sig_port[!dir] = 0; /* Give it 30 seconds for UCF or URJ */ - nf_ct_refresh(ct, *pskb, 30 * HZ); + nf_ct_refresh(ct, skb, 30 * HZ); return 0; } /****************************************************************************/ -static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { @@ -1479,7 +1479,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, port == info->sig_port[dir] && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(pskb, data, 0, + return set_h225_addr(skb, data, 0, &arq->destCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); @@ -1491,7 +1491,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(pskb, data, 0, + return set_h225_addr(skb, data, 0, &arq->srcCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, port); @@ -1501,7 +1501,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_acf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionConfirm *acf) { @@ -1522,7 +1522,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, /* Answering ACF */ set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) - return set_sig_addr(pskb, ct, ctinfo, data, + return set_sig_addr(skb, ct, ctinfo, data, &acf->destCallSignalAddress, 1); return 0; } @@ -1548,7 +1548,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, +static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationRequest *lrq) { @@ -1558,13 +1558,13 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) - return set_ras_addr(pskb, ct, ctinfo, data, + return set_ras_addr(skb, ct, ctinfo, data, &lrq->replyAddress, 1); return 0; } /****************************************************************************/ -static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, +static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationConfirm *lcf) { @@ -1603,7 +1603,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, +static int process_irr(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, InfoRequestResponse *irr) { @@ -1615,7 +1615,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(pskb, ct, ctinfo, data, + ret = set_ras_addr(skb, ct, ctinfo, data, &irr->rasAddress, 1); if (ret < 0) return -1; @@ -1623,7 +1623,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(pskb, ct, ctinfo, data, + ret = set_sig_addr(skb, ct, ctinfo, data, irr->callSignalAddress.item, irr->callSignalAddress.count); if (ret < 0) @@ -1634,40 +1634,40 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, +static int process_ras(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RasMessage *ras) { switch (ras->choice) { case eRasMessage_gatekeeperRequest: - return process_grq(pskb, ct, ctinfo, data, + return process_grq(skb, ct, ctinfo, data, &ras->gatekeeperRequest); case eRasMessage_gatekeeperConfirm: - return process_gcf(pskb, ct, ctinfo, data, + return process_gcf(skb, ct, ctinfo, data, &ras->gatekeeperConfirm); case eRasMessage_registrationRequest: - return process_rrq(pskb, ct, ctinfo, data, + return process_rrq(skb, ct, ctinfo, data, &ras->registrationRequest); case eRasMessage_registrationConfirm: - return process_rcf(pskb, ct, ctinfo, data, + return process_rcf(skb, ct, ctinfo, data, &ras->registrationConfirm); case eRasMessage_unregistrationRequest: - return process_urq(pskb, ct, ctinfo, data, + return process_urq(skb, ct, ctinfo, data, &ras->unregistrationRequest); case eRasMessage_admissionRequest: - return process_arq(pskb, ct, ctinfo, data, + return process_arq(skb, ct, ctinfo, data, &ras->admissionRequest); case eRasMessage_admissionConfirm: - return process_acf(pskb, ct, ctinfo, data, + return process_acf(skb, ct, ctinfo, data, &ras->admissionConfirm); case eRasMessage_locationRequest: - return process_lrq(pskb, ct, ctinfo, data, + return process_lrq(skb, ct, ctinfo, data, &ras->locationRequest); case eRasMessage_locationConfirm: - return process_lcf(pskb, ct, ctinfo, data, + return process_lcf(skb, ct, ctinfo, data, &ras->locationConfirm); case eRasMessage_infoRequestResponse: - return process_irr(pskb, ct, ctinfo, data, + return process_irr(skb, ct, ctinfo, data, &ras->infoRequestResponse); default: pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); @@ -1678,7 +1678,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, } /****************************************************************************/ -static int ras_help(struct sk_buff **pskb, unsigned int protoff, +static int ras_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static RasMessage ras; @@ -1686,12 +1686,12 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, int datalen = 0; int ret; - pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len); + pr_debug("nf_ct_ras: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); /* Get UDP data */ - data = get_udp_data(pskb, protoff, &datalen); + data = get_udp_data(skb, protoff, &datalen); if (data == NULL) goto accept; pr_debug("nf_ct_ras: RAS message len=%d ", datalen); @@ -1707,7 +1707,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff, } /* Process RAS message */ - if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0) + if (process_ras(skb, ct, ctinfo, &data, &ras) < 0) goto drop; accept: diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1562ca97a34..dfaed4ba83c 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -30,7 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -89,7 +89,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip, return 0; } -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; @@ -116,22 +116,22 @@ static int help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff*4; - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; spin_lock_bh(&irc_buffer_lock); - ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff, + ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff, irc_buffer); BUG_ON(ib_ptr == NULL); data = ib_ptr; - data_limit = ib_ptr + (*pskb)->len - dataoff; + data_limit = ib_ptr + skb->len - dataoff; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -143,7 +143,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, data += 5; /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); @@ -193,7 +193,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) - ret = nf_nat_irc(pskb, ctinfo, + ret = nf_nat_irc(skb, ctinfo, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 1d59fabeb5f..9810d81e2a0 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -42,17 +42,17 @@ static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -static int help(struct sk_buff **pskb, unsigned int protoff, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { struct nf_conntrack_expect *exp; - struct iphdr *iph = ip_hdr(*pskb); - struct rtable *rt = (struct rtable *)(*pskb)->dst; + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = (struct rtable *)skb->dst; struct in_device *in_dev; __be32 mask = 0; /* we're only interested in locally generated packets */ - if ((*pskb)->sk == NULL) + if (skb->sk == NULL) goto out; if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) goto out; @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, nf_ct_expect_related(exp); nf_ct_expect_put(exp); - nf_ct_refresh(ct, *pskb, timeout * HZ); + nf_ct_refresh(ct, skb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index b0804199ab5..099b6df3e2b 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -41,14 +41,14 @@ MODULE_ALIAS("ip_conntrack_pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); int -(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); int -(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, +(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; @@ -254,7 +254,7 @@ out_unexpect_orig: } static inline int -pptp_inbound_pkt(struct sk_buff **pskb, +pptp_inbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -367,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -380,7 +380,7 @@ invalid: } static inline int -pptp_outbound_pkt(struct sk_buff **pskb, +pptp_outbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -462,7 +462,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -492,7 +492,7 @@ static const unsigned int pptp_msg_size[] = { /* track caller id inside control connection, call expect_related */ static int -conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, +conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -502,7 +502,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, struct pptp_pkt_hdr _pptph, *pptph; struct PptpControlHeader _ctlh, *ctlh; union pptp_ctrl_union _pptpReq, *pptpReq; - unsigned int tcplen = (*pskb)->len - protoff; + unsigned int tcplen = skb->len - protoff; unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; @@ -514,12 +514,12 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; nexthdr_off = protoff; - tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { pr_debug("no full PPTP header, can't track\n"); return NF_ACCEPT; @@ -534,7 +534,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, return NF_ACCEPT; } - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) return NF_ACCEPT; nexthdr_off += sizeof(_ctlh); @@ -547,7 +547,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, if (reqlen > sizeof(*pptpReq)) reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq); if (!pptpReq) return NF_ACCEPT; @@ -560,11 +560,11 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct, ctinfo); pr_debug("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d8b501878d9..13f81917964 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -70,7 +70,6 @@ static int new(struct nf_conn *conntrack, const struct sk_buff *skb, static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, .procname = "nf_conntrack_generic_timeout", .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), @@ -84,7 +83,6 @@ static struct ctl_table generic_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table generic_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, .procname = "ip_conntrack_generic_timeout", .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 04192acc7c4..cb046751059 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -476,7 +476,6 @@ static unsigned int sctp_sysctl_table_users; static struct ctl_table_header *sctp_sysctl_header; static struct ctl_table sctp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, .procname = "nf_conntrack_sctp_timeout_closed", .data = &nf_ct_sctp_timeout_closed, .maxlen = sizeof(unsigned int), @@ -484,7 +483,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, .procname = "nf_conntrack_sctp_timeout_cookie_wait", .data = &nf_ct_sctp_timeout_cookie_wait, .maxlen = sizeof(unsigned int), @@ -492,7 +490,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, .procname = "nf_conntrack_sctp_timeout_cookie_echoed", .data = &nf_ct_sctp_timeout_cookie_echoed, .maxlen = sizeof(unsigned int), @@ -500,7 +497,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, .procname = "nf_conntrack_sctp_timeout_established", .data = &nf_ct_sctp_timeout_established, .maxlen = sizeof(unsigned int), @@ -508,7 +504,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, .procname = "nf_conntrack_sctp_timeout_shutdown_sent", .data = &nf_ct_sctp_timeout_shutdown_sent, .maxlen = sizeof(unsigned int), @@ -516,7 +511,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, .procname = "nf_conntrack_sctp_timeout_shutdown_recd", .data = &nf_ct_sctp_timeout_shutdown_recd, .maxlen = sizeof(unsigned int), @@ -524,7 +518,6 @@ static struct ctl_table sctp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", .data = &nf_ct_sctp_timeout_shutdown_ack_sent, .maxlen = sizeof(unsigned int), @@ -539,7 +532,6 @@ static struct ctl_table sctp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table sctp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, .procname = "ip_conntrack_sctp_timeout_closed", .data = &nf_ct_sctp_timeout_closed, .maxlen = sizeof(unsigned int), @@ -547,7 +539,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, .procname = "ip_conntrack_sctp_timeout_cookie_wait", .data = &nf_ct_sctp_timeout_cookie_wait, .maxlen = sizeof(unsigned int), @@ -555,7 +546,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, .procname = "ip_conntrack_sctp_timeout_cookie_echoed", .data = &nf_ct_sctp_timeout_cookie_echoed, .maxlen = sizeof(unsigned int), @@ -563,7 +553,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, .procname = "ip_conntrack_sctp_timeout_established", .data = &nf_ct_sctp_timeout_established, .maxlen = sizeof(unsigned int), @@ -571,7 +560,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, .procname = "ip_conntrack_sctp_timeout_shutdown_sent", .data = &nf_ct_sctp_timeout_shutdown_sent, .maxlen = sizeof(unsigned int), @@ -579,7 +567,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, .procname = "ip_conntrack_sctp_timeout_shutdown_recd", .data = &nf_ct_sctp_timeout_shutdown_recd, .maxlen = sizeof(unsigned int), @@ -587,7 +574,6 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", .data = &nf_ct_sctp_timeout_shutdown_ack_sent, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index c7075345971..7a3f64c1aca 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -834,10 +834,12 @@ static int tcp_packet(struct nf_conn *conntrack, case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; - if (conntrack->proto.tcp.seen[!dir].flags & - IP_CT_TCP_FLAG_CLOSE_INIT) { - /* Attempt to reopen a closed connection. - * Delete this connection and look up again. */ + if ((conntrack->proto.tcp.seen[!dir].flags & + IP_CT_TCP_FLAG_CLOSE_INIT) + || (conntrack->proto.tcp.last_dir == dir + && conntrack->proto.tcp.last_index == TCP_RST_SET)) { + /* Attempt to reopen a closed/aborted connection. + * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) @@ -925,6 +927,7 @@ static int tcp_packet(struct nf_conn *conntrack, in_window: /* From now on we have got in-window packets */ conntrack->proto.tcp.last_index = index; + conntrack->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); NF_CT_DUMP_TUPLE(tuple); @@ -1162,7 +1165,6 @@ static unsigned int tcp_sysctl_table_users; static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "nf_conntrack_tcp_timeout_syn_sent", .data = &nf_ct_tcp_timeout_syn_sent, .maxlen = sizeof(unsigned int), @@ -1170,7 +1172,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, .procname = "nf_conntrack_tcp_timeout_syn_recv", .data = &nf_ct_tcp_timeout_syn_recv, .maxlen = sizeof(unsigned int), @@ -1178,7 +1179,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, .procname = "nf_conntrack_tcp_timeout_established", .data = &nf_ct_tcp_timeout_established, .maxlen = sizeof(unsigned int), @@ -1186,7 +1186,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, .procname = "nf_conntrack_tcp_timeout_fin_wait", .data = &nf_ct_tcp_timeout_fin_wait, .maxlen = sizeof(unsigned int), @@ -1194,7 +1193,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, .procname = "nf_conntrack_tcp_timeout_close_wait", .data = &nf_ct_tcp_timeout_close_wait, .maxlen = sizeof(unsigned int), @@ -1202,7 +1200,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, .procname = "nf_conntrack_tcp_timeout_last_ack", .data = &nf_ct_tcp_timeout_last_ack, .maxlen = sizeof(unsigned int), @@ -1210,7 +1207,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, .procname = "nf_conntrack_tcp_timeout_time_wait", .data = &nf_ct_tcp_timeout_time_wait, .maxlen = sizeof(unsigned int), @@ -1218,7 +1214,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, .procname = "nf_conntrack_tcp_timeout_close", .data = &nf_ct_tcp_timeout_close, .maxlen = sizeof(unsigned int), @@ -1226,7 +1221,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, .procname = "nf_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), @@ -1265,7 +1259,6 @@ static struct ctl_table tcp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table tcp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "ip_conntrack_tcp_timeout_syn_sent", .data = &nf_ct_tcp_timeout_syn_sent, .maxlen = sizeof(unsigned int), @@ -1273,7 +1266,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &nf_ct_tcp_timeout_syn_recv, .maxlen = sizeof(unsigned int), @@ -1281,7 +1273,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, .procname = "ip_conntrack_tcp_timeout_established", .data = &nf_ct_tcp_timeout_established, .maxlen = sizeof(unsigned int), @@ -1289,7 +1280,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, .procname = "ip_conntrack_tcp_timeout_fin_wait", .data = &nf_ct_tcp_timeout_fin_wait, .maxlen = sizeof(unsigned int), @@ -1297,7 +1287,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, .procname = "ip_conntrack_tcp_timeout_close_wait", .data = &nf_ct_tcp_timeout_close_wait, .maxlen = sizeof(unsigned int), @@ -1305,7 +1294,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, .procname = "ip_conntrack_tcp_timeout_last_ack", .data = &nf_ct_tcp_timeout_last_ack, .maxlen = sizeof(unsigned int), @@ -1313,7 +1301,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, .procname = "ip_conntrack_tcp_timeout_time_wait", .data = &nf_ct_tcp_timeout_time_wait, .maxlen = sizeof(unsigned int), @@ -1321,7 +1308,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, .procname = "ip_conntrack_tcp_timeout_close", .data = &nf_ct_tcp_timeout_close, .maxlen = sizeof(unsigned int), @@ -1329,7 +1315,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, .procname = "ip_conntrack_tcp_timeout_max_retrans", .data = &nf_ct_tcp_timeout_max_retrans, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ba80e1a1ea1..b3e7ecb080e 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -146,7 +146,6 @@ static unsigned int udp_sysctl_table_users; static struct ctl_table_header *udp_sysctl_header; static struct ctl_table udp_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, .procname = "nf_conntrack_udp_timeout", .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), @@ -154,7 +153,6 @@ static struct ctl_table udp_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, .procname = "nf_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), @@ -168,7 +166,6 @@ static struct ctl_table udp_sysctl_table[] = { #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table udp_compat_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, .procname = "ip_conntrack_udp_timeout", .data = &nf_ct_udp_timeout, .maxlen = sizeof(unsigned int), @@ -176,7 +173,6 @@ static struct ctl_table udp_compat_sysctl_table[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, .procname = "ip_conntrack_udp_timeout_stream", .data = &nf_ct_udp_timeout_stream, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 355d371bac9..b5a16c6e21c 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -56,7 +56,7 @@ struct sane_reply_net_start { /* other fields aren't interesting for conntrack */ }; -static int help(struct sk_buff **pskb, +static int help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -80,19 +80,19 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff * 4; - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; spin_lock_bh(&nf_sane_lock); - sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer); + sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer); BUG_ON(sb_ptr == NULL); if (dir == IP_CT_DIR_ORIGINAL) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d449fa47491..8f8b5a48df3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -36,13 +36,13 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT; module_param(sip_timeout, uint, 0600); MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); -unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) __read_mostly; @@ -363,7 +363,7 @@ int ct_sip_get_info(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(ct_sip_get_info); -static int set_expected_rtp(struct sk_buff **pskb, +static int set_expected_rtp(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, union nf_conntrack_address *addr, @@ -385,7 +385,7 @@ static int set_expected_rtp(struct sk_buff **pskb, nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); + ret = nf_nat_sdp(skb, ctinfo, exp, dptr); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -397,7 +397,7 @@ static int set_expected_rtp(struct sk_buff **pskb, return ret; } -static int sip_help(struct sk_buff **pskb, +static int sip_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -414,13 +414,13 @@ static int sip_help(struct sk_buff **pskb, /* No Data ? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) + if (dataoff >= skb->len) return NF_ACCEPT; - nf_ct_refresh(ct, *pskb, sip_timeout * HZ); + nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (!skb_is_nonlinear(*pskb)) - dptr = (*pskb)->data + dataoff; + if (!skb_is_nonlinear(skb)) + dptr = skb->data + dataoff; else { pr_debug("Copy of skbuff not supported yet.\n"); goto out; @@ -428,13 +428,13 @@ static int sip_help(struct sk_buff **pskb, nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && ct->status & IPS_NAT_MASK) { - if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) { + if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) { ret = NF_DROP; goto out; } } - datalen = (*pskb)->len - dataoff; + datalen = skb->len - dataoff; if (datalen < sizeof("SIP/2.0 200") - 1) goto out; @@ -464,7 +464,7 @@ static int sip_help(struct sk_buff **pskb, ret = NF_DROP; goto out; } - ret = set_expected_rtp(pskb, ct, ctinfo, &addr, + ret = set_expected_rtp(skb, ct, ctinfo, &addr, htons(port), dptr); } } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index cc19506cf2f..e894aa1ff3a 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -29,12 +29,12 @@ static int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, +unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); -static int tftp_help(struct sk_buff **pskb, +static int tftp_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -46,7 +46,7 @@ static int tftp_help(struct sk_buff **pskb, int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; typeof(nf_nat_tftp_hook) nf_nat_tftp; - tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr), + tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr), sizeof(_tftph), &_tftph); if (tfh == NULL) return NF_ACCEPT; @@ -70,7 +70,7 @@ static int tftp_help(struct sk_buff **pskb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_tftp(pskb, ctinfo, exp); + ret = nf_nat_tftp(skb, ctinfo, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 0df7fff196a..196269c1e58 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,7 +14,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff **skb, + struct sk_buff *skb, int hook, const struct net_device *indev, const struct net_device *outdev, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index a481a349f7b..0cef1433d66 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -256,14 +256,14 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(info->pf); - if (!afinfo || afinfo->reroute(&skb, info) < 0) + if (!afinfo || afinfo->reroute(skb, info) < 0) verdict = NF_DROP; } if (verdict == NF_ACCEPT) { next_hook: verdict = nf_iterate(&nf_hooks[info->pf][info->hook], - &skb, info->hook, + skb, info->hook, info->indev, info->outdev, &elem, info->okfn, INT_MIN); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 49f0480afe0..3ceeffcf6f9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -617,6 +617,7 @@ static int nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) { int diff; + int err; diff = data_len - e->skb->len; if (diff < 0) { @@ -626,25 +627,18 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { + err = pskb_expand_head(e->skb, 0, + diff - skb_tailroom(e->skb), + GFP_ATOMIC); + if (err) { printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); - return -ENOMEM; + return err; } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(&e->skb, data_len)) + if (!skb_make_writable(e->skb, data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, data, data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 07a1b966500..77eeae658d4 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -36,7 +36,7 @@ target(struct sk_buff **pskb, { const struct xt_classify_target_info *clinfo = targinfo; - (*pskb)->priority = clinfo->priority; + skb->priority = clinfo->priority; return XT_CONTINUE; } diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 7043c2757e0..856793e8db7 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -23,7 +23,7 @@ #include <linux/ip.h> #include <net/checksum.h> -MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); +MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); MODULE_DESCRIPTION("IP tables CONNMARK matching module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_CONNMARK"); @@ -34,7 +34,7 @@ MODULE_ALIAS("ip6t_CONNMARK"); #include <net/netfilter/nf_conntrack_ecache.h> static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -48,28 +48,28 @@ target(struct sk_buff **pskb, u_int32_t mark; u_int32_t newmark; - ct = nf_ct_get(*pskb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct) { switch(markinfo->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, skb); } break; case XT_CONNMARK_SAVE: newmark = (ct->mark & ~markinfo->mask) | - ((*pskb)->mark & markinfo->mask); + (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, skb); } break; case XT_CONNMARK_RESTORE: - mark = (*pskb)->mark; + mark = skb->mark; diff = (ct->mark ^ mark) & markinfo->mask; - (*pskb)->mark = mark ^ diff; + skb->mark = mark ^ diff; break; } } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 63d73138c1b..021b5c8d20e 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -61,12 +61,11 @@ static void secmark_restore(struct sk_buff *skb) } } -static unsigned int target(struct sk_buff **pskb, const struct net_device *in, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; switch (info->mode) { diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 798ab731009..6322a933ab7 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -25,7 +25,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_DSCP"); MODULE_ALIAS("ip6t_DSCP"); -static unsigned int target(struct sk_buff **pskb, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -33,20 +33,20 @@ static unsigned int target(struct sk_buff **pskb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK), + ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; } -static unsigned int target6(struct sk_buff **pskb, +static unsigned int target6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -54,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) return NF_DROP; - ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK), + ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index f30fe0baf7d..bc6503d77d7 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); static unsigned int -target_v0(struct sk_buff **pskb, +target_v0(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -31,12 +31,12 @@ target_v0(struct sk_buff **pskb, { const struct xt_mark_target_info *markinfo = targinfo; - (*pskb)->mark = markinfo->mark; + skb->mark = markinfo->mark; return XT_CONTINUE; } static unsigned int -target_v1(struct sk_buff **pskb, +target_v1(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -52,15 +52,15 @@ target_v1(struct sk_buff **pskb, break; case XT_MARK_AND: - mark = (*pskb)->mark & markinfo->mark; + mark = skb->mark & markinfo->mark; break; case XT_MARK_OR: - mark = (*pskb)->mark | markinfo->mark; + mark = skb->mark | markinfo->mark; break; } - (*pskb)->mark = mark; + skb->mark = mark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index d3594c7ccb2..9fb449ffbf8 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -20,7 +20,7 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_target(struct sk_buff **pskb, +nflog_target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -33,7 +33,7 @@ nflog_target(struct sk_buff **pskb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(target->family, hooknum, *pskb, in, out, &li, + nf_log_packet(target->family, hooknum, skb, in, out, &li, "%s", info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 13f59f3e8c3..c3984e9f766 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index fec1aefb1c3..4976ce18661 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -12,7 +12,7 @@ MODULE_ALIAS("ipt_NOTRACK"); MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -20,16 +20,16 @@ target(struct sk_buff **pskb, const void *targinfo) { /* Previously seen (loopback)? Ignore. */ - if ((*pskb)->nfct != NULL) + if (skb->nfct != NULL) return XT_CONTINUE; /* Attach fake conntrack entry. If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - (*pskb)->nfct = &nf_conntrack_untracked.ct_general; - (*pskb)->nfctinfo = IP_CT_NEW; - nf_conntrack_get((*pskb)->nfct); + skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); return XT_CONTINUE; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c83779a941a..235806eb6ec 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; -static unsigned int target(struct sk_buff **pskb, const struct net_device *in, +static unsigned int target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -47,7 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, BUG(); } - (*pskb)->secmark = secmark; + skb->secmark = secmark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index d40f7e4b128..07435a602b1 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -39,7 +39,7 @@ optlen(const u_int8_t *opt, unsigned int offset) } static int -tcpmss_mangle_packet(struct sk_buff **pskb, +tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_tcpmss_info *info, unsigned int tcphoff, unsigned int minlen) @@ -50,11 +50,11 @@ tcpmss_mangle_packet(struct sk_buff **pskb, u16 newmss; u8 *opt; - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return -1; - tcplen = (*pskb)->len - tcphoff; - tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); + tcplen = skb->len - tcphoff; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); /* Since it passed flags test in tcp match, we know it is is not a fragment, and has data >= tcp header length. SYN @@ -64,19 +64,19 @@ tcpmss_mangle_packet(struct sk_buff **pskb, if (tcplen != tcph->doff*4) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", - (*pskb)->len); + skb->len); return -1; } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu((*pskb)->dst) <= minlen) { + if (dst_mtu(skb->dst) <= minlen) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: " "unknown or invalid path-MTU (%u)\n", - dst_mtu((*pskb)->dst)); + dst_mtu(skb->dst)); return -1; } - newmss = dst_mtu((*pskb)->dst) - minlen; + newmss = dst_mtu(skb->dst) - minlen; } else newmss = info->mss; @@ -95,7 +95,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = newmss & 0x00ff; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), 0); return 0; } @@ -104,57 +104,53 @@ tcpmss_mangle_packet(struct sk_buff **pskb, /* * MSS Option not found ?! add it.. */ - if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), - TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) + if (skb_tailroom(skb) < TCPOLEN_MSS) { + if (pskb_expand_head(skb, 0, + TCPOLEN_MSS - skb_tailroom(skb), + GFP_ATOMIC)) return -1; - kfree_skb(*pskb); - *pskb = newskb; - tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); } - skb_put((*pskb), TCPOLEN_MSS); + skb_put(skb, TCPOLEN_MSS); opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); + nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - nf_proto_csum_replace2(&tcph->check, *pskb, + nf_proto_csum_replace2(&tcph->check, skb, oldval, ((__be16 *)tcph)[6], 0); return TCPOLEN_MSS; } static unsigned int -xt_tcpmss_target4(struct sk_buff **pskb, +xt_tcpmss_target4(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct iphdr *iph = ip_hdr(*pskb); + struct iphdr *iph = ip_hdr(skb); __be16 newlen; int ret; - ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4, + ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - iph = ip_hdr(*pskb); + iph = ip_hdr(skb); newlen = htons(ntohs(iph->tot_len) + ret); nf_csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; @@ -164,30 +160,30 @@ xt_tcpmss_target4(struct sk_buff **pskb, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static unsigned int -xt_tcpmss_target6(struct sk_buff **pskb, +xt_tcpmss_target6(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct ipv6hdr *ipv6h = ipv6_hdr(*pskb); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; int tcphoff; int ret; nexthdr = ipv6h->nexthdr; - tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) { WARN_ON(1); return NF_DROP; } - ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, + ret = tcpmss_mangle_packet(skb, targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - ipv6h = ipv6_hdr(*pskb); + ipv6h = ipv6_hdr(skb); ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); } return XT_CONTINUE; diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 4df2dedcc0b..26c5d08ab2c 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -10,14 +10,14 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -target(struct sk_buff **pskb, +target(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - (*pskb)->nf_trace = 1; + skb->nf_trace = 1; return XT_CONTINUE; } diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index af79423bc8e..9ec50139b9a 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -2,13 +2,13 @@ * GPL (C) 2002 Martin Devera (devik@cdi.cz). */ #include <linux/module.h> +#include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connbytes.h> #include <net/netfilter/nf_conntrack.h> #include <asm/div64.h> -#include <asm/bitops.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 1071fc54d6d..9f67920af41 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -25,7 +25,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connmark.h> -MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); +MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); MODULE_DESCRIPTION("IP tables connmark match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connmark"); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 4fcca797150..f263a77e57b 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -1,5 +1,5 @@ -/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> - * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> +/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> + * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index f907770fd4e..3358273a47b 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -42,21 +42,21 @@ match_flags(const struct xt_sctp_flag_info *flag_info, static inline bool match_packet(const struct sk_buff *skb, unsigned int offset, - const u_int32_t *chunkmap, - int chunk_match_type, - const struct xt_sctp_flag_info *flag_info, - const int flag_count, + const struct xt_sctp_info *info, bool *hotdrop) { u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)]; sctp_chunkhdr_t _sch, *sch; + int chunk_match_type = info->chunk_match_type; + const struct xt_sctp_flag_info *flag_info = info->flag_info; + int flag_count = info->flag_count; #ifdef DEBUG_SCTP int i = 0; #endif if (chunk_match_type == SCTP_CHUNK_MATCH_ALL) - SCTP_CHUNKMAP_COPY(chunkmapcopy, chunkmap); + SCTP_CHUNKMAP_COPY(chunkmapcopy, info->chunkmap); do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); @@ -73,7 +73,7 @@ match_packet(const struct sk_buff *skb, duprintf("skb->len: %d\toffset: %d\n", skb->len, offset); - if (SCTP_CHUNKMAP_IS_SET(chunkmap, sch->type)) { + if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) { switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ANY: if (match_flags(flag_info, flag_count, @@ -104,7 +104,7 @@ match_packet(const struct sk_buff *skb, switch (chunk_match_type) { case SCTP_CHUNK_MATCH_ALL: - return SCTP_CHUNKMAP_IS_CLEAR(chunkmap); + return SCTP_CHUNKMAP_IS_CLEAR(info->chunkmap); case SCTP_CHUNK_MATCH_ANY: return false; case SCTP_CHUNK_MATCH_ONLY: @@ -148,9 +148,7 @@ match(const struct sk_buff *skb, && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), - info->chunkmap, info->chunk_match_type, - info->flag_info, info->flag_count, - hotdrop), + info, hotdrop), XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c776bcd9f82..98e313e5e59 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1378,6 +1378,8 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; nl_table[unit].registered = 1; + } else { + kfree(listeners); } netlink_table_ungrab(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e11000a8e95..d0936506b73 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1623,11 +1623,6 @@ static struct vm_operations_struct packet_mmap_ops = { .close =packet_mm_close, }; -static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order) -{ - return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1); -} - static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) { int i; diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index eaabf087c59..d1e9d68f8ba 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -146,18 +146,18 @@ static void rfkill_disconnect(struct input_handle *handle) static const struct input_device_id rfkill_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT(EV_KEY) }, - .keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) }, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) }, }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT(EV_KEY) }, - .keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) }, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) }, }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, - .evbit = { BIT(EV_KEY) }, - .keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) }, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) }, }, { } }; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0803f305ed0..c680017f5c8 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -14,6 +14,7 @@ #include <linux/skbuff.h> #include <linux/poll.h> #include <linux/proc_fs.h> +#include <linux/key-type.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/af_rxrpc.h> diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 7e049ff6ae6..9a8ff684da7 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> -#include <linux/key.h> +#include <linux/key-type.h> #include <linux/crypto.h> #include <net/sock.h> #include <net/af_rxrpc.h> @@ -40,7 +40,6 @@ struct key_type key_type_rxrpc = { .destroy = rxrpc_destroy, .describe = rxrpc_describe, }; - EXPORT_SYMBOL(key_type_rxrpc); /* @@ -330,5 +329,32 @@ error: _leave(" = -ENOMEM [ins %d]", ret); return -ENOMEM; } - EXPORT_SYMBOL(rxrpc_get_server_data_key); + +/** + * rxrpc_get_null_key - Generate a null RxRPC key + * @keyname: The name to give the key. + * + * Generate a null RxRPC key that can be used to indicate anonymous security is + * required for a particular domain. + */ +struct key *rxrpc_get_null_key(const char *keyname) +{ + struct key *key; + int ret; + + key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current, + KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(key)) + return key; + + ret = key_instantiate_and_link(key, NULL, 0, NULL, NULL); + if (ret < 0) { + key_revoke(key); + key_put(key); + return ERR_PTR(ret); + } + + return key; +} +EXPORT_SYMBOL(rxrpc_get_null_key); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 92435a882fa..9c15c4888d1 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -2,9 +2,7 @@ # Traffic control configuration. # -menu "QoS and/or fair queueing" - -config NET_SCHED +menuconfig NET_SCHED bool "QoS and/or fair queueing" select NET_SCH_FIFO ---help--- @@ -41,9 +39,6 @@ config NET_SCHED The available schedulers are listed in the following questions; you can say Y to as many as you like. If unsure, say N now. -config NET_SCH_FIFO - bool - if NET_SCHED comment "Queueing/Scheduling" @@ -500,4 +495,5 @@ config NET_CLS_IND endif # NET_SCHED -endmenu +config NET_SCH_FIFO + bool diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 6b407ece953..fa006e06ce3 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -202,11 +202,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - - /* iptables targets take a double skb pointer in case the skb - * needs to be replaced. We don't own the skb, so this must not - * happen. The pskb_expand_head above should make sure of this */ - ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, + ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, ipt->tcfi_hook, ipt->tcfi_t->u.kernel.target, ipt->tcfi_t->data); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index e9989610712..ceda8890ab0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -55,7 +55,7 @@ * ppp0..9. * * NOTE: Certain meta values depend on other subsystems and are - * only available if that subsytem is enabled in the kernel. + * only available if that subsystem is enabled in the kernel. */ #include <linux/module.h> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 95ae11956f3..fa1a6f45dc4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -249,10 +249,11 @@ static void dev_watchdog_down(struct net_device *dev) */ void netif_carrier_on(struct net_device *dev) { - if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { linkwatch_fire_event(dev); - if (netif_running(dev)) - __netdev_watchdog_up(dev); + if (netif_running(dev)) + __netdev_watchdog_up(dev); + } } /** @@ -555,6 +556,7 @@ void dev_deactivate(struct net_device *dev) { struct Qdisc *qdisc; struct sk_buff *skb; + int running; spin_lock_bh(&dev->queue_lock); qdisc = dev->qdisc; @@ -570,12 +572,31 @@ void dev_deactivate(struct net_device *dev) dev_watchdog_down(dev); - /* Wait for outstanding dev_queue_xmit calls. */ + /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) - yield(); + do { + while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + yield(); + + /* + * Double-check inside queue lock to ensure that all effects + * of the queue run are visible when we return. + */ + spin_lock_bh(&dev->queue_lock); + running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + spin_unlock_bh(&dev->queue_lock); + + /* + * The running flag should never be set at this point because + * we've already set dev->qdisc to noop_qdisc *inside* the same + * pair of spin locks. That is, if any qdisc_run starts after + * our initial test it should see the noop_qdisc and then + * clear the RUNNING bit before dropping the queue lock. So + * if it is set here then we've found a bug. + */ + } while (WARN_ON_ONCE(running)); } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 2d32fd27496..3f8335e6ea2 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -205,20 +205,19 @@ static unsigned int ingress_drop(struct Qdisc *sch) #ifndef CONFIG_NET_CLS_ACT #ifdef CONFIG_NETFILTER static unsigned int -ing_hook(unsigned int hook, struct sk_buff **pskb, +ing_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *indev, const struct net_device *outdev, int (*okfn)(struct sk_buff *)) { struct Qdisc *q; - struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; int fwres=NF_ACCEPT; DPRINTK("ing_hook: skb %s dev=%s len=%u\n", skb->sk ? "(owned)" : "(unowned)", - skb->dev ? (*pskb)->dev->name : "(no dev)", + skb->dev ? skb->dev->name : "(no dev)", skb->len); if (dev->qdisc_ingress) { diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index be57cf317a7..421281d9dd1 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -266,7 +266,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int busy; int nores; int len = skb->len; - int subq = skb->queue_mapping; + int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; start = master->slaves; @@ -284,7 +284,7 @@ restart: if (slave->qdisc_sleeping != q) continue; if (netif_queue_stopped(slave) || - netif_subqueue_stopped(slave, subq) || + __netif_subqueue_stopped(slave, subq) || !netif_running(slave)) { busy = 1; continue; @@ -294,7 +294,7 @@ restart: case 0: if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && - !netif_subqueue_stopped(slave, subq) && + !__netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 78181072471..cbd64b216cc 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -726,7 +726,8 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc, /* set up scatter list */ end = skb_tail_pointer(skb); - sg.page = virt_to_page(auth); + sg_init_table(&sg, 1); + sg_set_page(&sg, virt_to_page(auth)); sg.offset = (unsigned long)(auth) % PAGE_SIZE; sg.length = end - (unsigned char *)auth; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 9de3ddaa276..eb4deaf5891 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -954,9 +954,9 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff **pskb) +static int sctp6_rcv(struct sk_buff *skb) { - return sctp_rcv(*pskb) ? -1 : 0; + return sctp_rcv(skb) ? -1 : 0; } static struct inet6_protocol sctpv6_protocol = { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 81b26c5ffd4..f5cd96f5fe7 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1228,7 +1228,6 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_v6_add_protocol; - __unsafe(THIS_MODULE); status = 0; out: return status; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f983a369d4e..658476c4d58 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -56,7 +56,7 @@ #include <linux/ipv6.h> #include <linux/net.h> #include <linux/inet.h> -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> #include <linux/crypto.h> #include <net/sock.h> @@ -1513,7 +1513,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, struct hash_desc desc; /* Sign the message. */ - sg.page = virt_to_page(&cookie->c); + sg_init_table(&sg, 1); + sg_set_page(&sg, virt_to_page(&cookie->c)); sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE; sg.length = bodysize; keylen = SCTP_SECRET_SIZE; @@ -1585,7 +1586,8 @@ struct sctp_association *sctp_unpack_cookie( /* Check the signature. */ keylen = SCTP_SECRET_SIZE; - sg.page = virt_to_page(bear_cookie); + sg_init_table(&sg, 1); + sg_set_page(&sg, virt_to_page(bear_cookie)); sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE; sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; diff --git a/net/socket.c b/net/socket.c index 379b3a39075..540013ea862 100644 --- a/net/socket.c +++ b/net/socket.c @@ -258,7 +258,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) +static void init_once(struct kmem_cache *cachep, void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -364,26 +364,26 @@ static int sock_alloc_fd(struct file **filep) static int sock_attach_fd(struct socket *sock, struct file *file) { + struct dentry *dentry; struct qstr name = { .name = "" }; - file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!file->f_path.dentry)) + dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + if (unlikely(!dentry)) return -ENOMEM; - file->f_path.dentry->d_op = &sockfs_dentry_operations; + dentry->d_op = &sockfs_dentry_operations; /* * We dont want to push this dentry into global dentry hash table. * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED * This permits a working /proc/$pid/fd/XXX on sockets */ - file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); - file->f_path.mnt = mntget(sock_mnt); - file->f_mapping = file->f_path.dentry->d_inode->i_mapping; + dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(dentry, SOCK_INODE(sock)); sock->file = file; - file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_mode = FMODE_READ | FMODE_WRITE; + init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, + &socket_file_ops); + SOCK_INODE(sock)->i_fop = &socket_file_ops; file->f_flags = O_RDWR; file->f_pos = 0; file->private_data = sock; diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8ebfc4db7f5..5c69a725e53 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index bfb6a29633d..32be431affc 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -197,9 +197,9 @@ encryptor(struct scatterlist *sg, void *data) int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; in_page = desc->pages[i]; } else { - in_page = sg->page; + in_page = sg_page(sg); } - desc->infrags[desc->fragno].page = in_page; + sg_set_page(&desc->infrags[desc->fragno], in_page); desc->fragno++; desc->fraglen += sg->length; desc->pos += sg->length; @@ -215,11 +215,11 @@ encryptor(struct scatterlist *sg, void *data) if (ret) return ret; if (fraglen) { - desc->outfrags[0].page = sg->page; + sg_set_page(&desc->outfrags[0], sg_page(sg)); desc->outfrags[0].offset = sg->offset + sg->length - fraglen; desc->outfrags[0].length = fraglen; desc->infrags[0] = desc->outfrags[0]; - desc->infrags[0].page = in_page; + sg_set_page(&desc->infrags[0], in_page); desc->fragno = 1; desc->fraglen = fraglen; } else { @@ -287,7 +287,7 @@ decryptor(struct scatterlist *sg, void *data) if (ret) return ret; if (fraglen) { - desc->frags[0].page = sg->page; + sg_set_page(&desc->frags[0], sg_page(sg)); desc->frags[0].offset = sg->offset + sg->length - fraglen; desc->frags[0].length = fraglen; desc->fragno = 1; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 42b3220bed3..8bd074df27d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) { u8 *ptr; u8 pad; - int len = buf->len; + size_t len = buf->len; if (len <= buf->head[0].iov_len) { pad = *(u8 *)(buf->head[0].iov_base + len - 1); @@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) } else len -= buf->head[0].iov_len; if (len <= buf->page_len) { - int last = (buf->page_base + len - 1) + unsigned int last = (buf->page_base + len - 1) >>PAGE_CACHE_SHIFT; - int offset = (buf->page_base + len - 1) + unsigned int offset = (buf->page_base + len - 1) & (PAGE_CACHE_SIZE - 1); ptr = kmap_atomic(buf->pages[last], KM_USER0); pad = *(ptr + offset); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 7da7050f06c..73940df6c46 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -631,7 +631,8 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) return 0; } -/* Verify the checksum on the header and return SVC_OK on success. +/* + * Verify the checksum on the header and return SVC_OK on success. * Otherwise, return SVC_DROP (in the case of a bad sequence number) * or return SVC_DENIED and indicate error in authp. */ @@ -961,6 +962,78 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip) } /* + * Having read the cred already and found we're in the context + * initiation case, read the verifier and initiate (or check the results + * of) upcalls to userspace for help with context initiation. If + * the upcall results are available, write the verifier and result. + * Otherwise, drop the request pending an answer to the upcall. + */ +static int svcauth_gss_handle_init(struct svc_rqst *rqstp, + struct rpc_gss_wire_cred *gc, __be32 *authp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct xdr_netobj tmpobj; + struct rsi *rsip, rsikey; + + /* Read the verifier; should be NULL: */ + *authp = rpc_autherr_badverf; + if (argv->iov_len < 2 * 4) + return SVC_DENIED; + if (svc_getnl(argv) != RPC_AUTH_NULL) + return SVC_DENIED; + if (svc_getnl(argv) != 0) + return SVC_DENIED; + + /* Martial context handle and token for upcall: */ + *authp = rpc_autherr_badcred; + if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) + return SVC_DENIED; + memset(&rsikey, 0, sizeof(rsikey)); + if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) + return SVC_DROP; + *authp = rpc_autherr_badverf; + if (svc_safe_getnetobj(argv, &tmpobj)) { + kfree(rsikey.in_handle.data); + return SVC_DENIED; + } + if (dup_netobj(&rsikey.in_token, &tmpobj)) { + kfree(rsikey.in_handle.data); + return SVC_DROP; + } + + /* Perform upcall, or find upcall result: */ + rsip = rsi_lookup(&rsikey); + rsi_free(&rsikey); + if (!rsip) + return SVC_DROP; + switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { + case -EAGAIN: + case -ETIMEDOUT: + case -ENOENT: + /* No upcall result: */ + return SVC_DROP; + case 0: + /* Got an answer to the upcall; use it: */ + if (gss_write_init_verf(rqstp, rsip)) + return SVC_DROP; + if (resv->iov_len + 4 > PAGE_SIZE) + return SVC_DROP; + svc_putnl(resv, RPC_SUCCESS); + if (svc_safe_putnetobj(resv, &rsip->out_handle)) + return SVC_DROP; + if (resv->iov_len + 3 * 4 > PAGE_SIZE) + return SVC_DROP; + svc_putnl(resv, rsip->major_status); + svc_putnl(resv, rsip->minor_status); + svc_putnl(resv, GSS_SEQ_WIN); + if (svc_safe_putnetobj(resv, &rsip->out_token)) + return SVC_DROP; + } + return SVC_COMPLETE; +} + +/* * Accept an rpcsec packet. * If context establishment, punt to user space * If data exchange, verify/decrypt @@ -974,11 +1047,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; u32 crlen; - struct xdr_netobj tmpobj; struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; - struct rsi *rsip, rsikey; __be32 *rpcstart; __be32 *reject_stat = resv->iov_base + resv->iov_len; int ret; @@ -1023,30 +1094,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) goto auth_err; - /* - * We've successfully parsed the credential. Let's check out the - * verifier. An AUTH_NULL verifier is allowed (and required) for - * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for - * PROC_DATA and PROC_DESTROY. - * - * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length). - * AUTH_RPCSEC_GSS verifier is: - * 6 (AUTH_RPCSEC_GSS), length, checksum. - * checksum is calculated over rpcheader from xid up to here. - */ *authp = rpc_autherr_badverf; switch (gc->gc_proc) { case RPC_GSS_PROC_INIT: case RPC_GSS_PROC_CONTINUE_INIT: - if (argv->iov_len < 2 * 4) - goto auth_err; - if (svc_getnl(argv) != RPC_AUTH_NULL) - goto auth_err; - if (svc_getnl(argv) != 0) - goto auth_err; - break; + return svcauth_gss_handle_init(rqstp, gc, authp); case RPC_GSS_PROC_DATA: case RPC_GSS_PROC_DESTROY: + /* Look up the context, and check the verifier: */ *authp = rpcsec_gsserr_credproblem; rsci = gss_svc_searchbyctx(&gc->gc_ctx); if (!rsci) @@ -1067,51 +1122,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* now act upon the command: */ switch (gc->gc_proc) { - case RPC_GSS_PROC_INIT: - case RPC_GSS_PROC_CONTINUE_INIT: - *authp = rpc_autherr_badcred; - if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) - goto auth_err; - memset(&rsikey, 0, sizeof(rsikey)); - if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) - goto drop; - *authp = rpc_autherr_badverf; - if (svc_safe_getnetobj(argv, &tmpobj)) { - kfree(rsikey.in_handle.data); - goto auth_err; - } - if (dup_netobj(&rsikey.in_token, &tmpobj)) { - kfree(rsikey.in_handle.data); - goto drop; - } - - rsip = rsi_lookup(&rsikey); - rsi_free(&rsikey); - if (!rsip) { - goto drop; - } - switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { - case -EAGAIN: - case -ETIMEDOUT: - case -ENOENT: - goto drop; - case 0: - if (gss_write_init_verf(rqstp, rsip)) - goto drop; - if (resv->iov_len + 4 > PAGE_SIZE) - goto drop; - svc_putnl(resv, RPC_SUCCESS); - if (svc_safe_putnetobj(resv, &rsip->out_handle)) - goto drop; - if (resv->iov_len + 3 * 4 > PAGE_SIZE) - goto drop; - svc_putnl(resv, rsip->major_status); - svc_putnl(resv, rsip->minor_status); - svc_putnl(resv, GSS_SEQ_WIN); - if (svc_safe_putnetobj(resv, &rsip->out_token)) - goto drop; - } - goto complete; case RPC_GSS_PROC_DESTROY: if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; @@ -1158,7 +1168,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } auth_err: - /* Restore write pointer to original value: */ + /* Restore write pointer to its original value: */ xdr_ressize_check(rqstp, reject_stat); ret = SVC_DENIED; goto out; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 52429b1ffcc..76be83ee4b0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; - int len; + size_t len; + + /* sanity check the name before trying to print it */ + err = -EINVAL; + len = strlen(servname); + if (len > RPC_MAXNETNAMELEN) + goto out_no_rpciod; + len++; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; - len = strlen(servname) + 1; if (len > sizeof(clnt->cl_inline_name)) { char *buf = kmalloc(len, GFP_KERNEL); if (buf != 0) @@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; - struct rpc_xprtsock_create xprtargs = { - .proto = args->protocol, + struct xprt_create xprtargs = { + .ident = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, @@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) */ if (args->servername == NULL) { struct sockaddr_in *addr = - (struct sockaddr_in *) &args->address; + (struct sockaddr_in *) args->address; snprintf(servername, sizeof(servername), NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); args->servername = servername; @@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - dprintk("RPC: creating %s client for %s (xprt %p)\n", - args->program->name, args->servername, xprt); - clnt = rpc_new_client(xprt, args->servername, args->program, args->version, args->authflavor); if (IS_ERR(clnt)) @@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt) */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, struct rpc_program *program, - int vers) + u32 vers) { struct rpc_clnt *clnt; struct rpc_version *version; @@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); if (RPC_IS_ASYNC(task) || !signalled()) { - xprt_release(task); - task->tk_action = call_reserve; + task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; } @@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; buf->page_len = 0; + buf->flags = 0; buf->len = 0; buf->buflen = len; } @@ -937,7 +940,7 @@ call_bind(struct rpc_task *task) static void call_bind_status(struct rpc_task *task) { - int status = -EACCES; + int status = -EIO; if (task->tk_status >= 0) { dprint_status(task); @@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { + case -EAGAIN: + dprintk("RPC: %5u rpcbind waiting for another request " + "to finish\n", task->tk_pid); + /* avoid busy-waiting here -- could be a network outage. */ + rpc_delay(task, 5*HZ); + goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " "unavailable\n", task->tk_pid); + /* fail immediately if this is an RPC ping */ + if (task->tk_msg.rpc_proc->p_proc == 0) { + status = -EOPNOTSUPP; + break; + } rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: @@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task) task->tk_pid); goto retry_timeout; case -EPFNOSUPPORT: + /* server doesn't support any rpcbind version we know of */ dprintk("RPC: %5u remote rpcbind service unavailable\n", task->tk_pid); break; @@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task) default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); - status = -EIO; } rpc_exit(task, status); @@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task) { dprint_status(task); - xprt_release(task); /* Must do to obtain new XID */ task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task) dprintk("RPC: %5u %s: retry stale creds\n", task->tk_pid, __FUNCTION__); rpcauth_invalcred(task); + /* Ensure we obtain a new XID! */ + xprt_release(task); task->tk_action = call_refresh; goto out_retry; case RPC_AUTH_BADCRED: @@ -1523,13 +1538,18 @@ void rpc_show_tasks(void) spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; + int proc; + + if (t->tk_msg.rpc_proc) + proc = t->tk_msg.rpc_proc->p_proc; + else + proc = -1; if (RPC_IS_QUEUED(t)) rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), + t->tk_pid, proc, t->tk_flags, t->tk_status, t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 669e12a4ed1..18f0a8dcc09 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -14,7 +14,7 @@ #include <linux/pagemap.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/dnotify.h> +#include <linux/fsnotify.h> #include <linux/kernel.h> #include <asm/ioctls.h> @@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v) clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); + seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); return 0; } @@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent, if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); + fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->flags = flags; rpci->ops = ops; rpci->nkern_readwriters = 1; - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry); dget(dentry); out: mutex_unlock(&dir->i_mutex); @@ -840,7 +842,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) +init_once(struct kmem_cache * cachep, void *foo) { struct rpc_inode *rpci = (struct rpc_inode *) foo; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d1740dbab99..a05493aedb6 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,11 +16,14 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprtsock.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND @@ -91,26 +94,6 @@ enum { #define RPCB_MAXADDRLEN (128u) /* - * r_netid - * - * Quoting RFC 3530, section 2.2: - * - * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP - * over IPv4 the value of r_netid is the string "udp". - * - * ... - * - * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP - * over IPv6 the value of r_netid is the string "udp6". - */ -#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ -#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ -#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ -#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ - -#define RPCB_MAXNETIDLEN (4u) - -/* * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. @@ -120,7 +103,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); -extern struct rpc_program rpcb_program; +static struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -137,10 +120,13 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; -static struct rpcb_info { +struct rpcb_info { int rpc_vers; struct rpc_procinfo * rpc_proc; -} rpcb_next_version[]; +}; + +static struct rpcb_info rpcb_next_version[]; +static struct rpcb_info rpcb_next_version6[]; static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) { @@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, RPC_CLNT_CREATE_INTR), }; - ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + switch (srvaddr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + break; + case AF_INET6: + ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); + break; + default: + return NULL; + } + if (!privileged) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); @@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - IPPROTO_UDP, 2, 1); + XPRT_TRANSPORT_UDP, 2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task) struct rpc_task *child; struct sockaddr addr; int status; + struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, @@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task) BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { - status = -EACCES; /* tell caller to check again */ + status = -EAGAIN; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __FUNCTION__); goto bailout_nowake; @@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + + /* Don't ever use rpcbind v2 for AF_INET6 requests */ + switch (addr.sa_family) { + case AF_INET: + info = rpcb_next_version; + break; + case AF_INET6: + info = rpcb_next_version6; + break; + default: + status = -EAFNOSUPPORT; + dprintk("RPC: %5u %s: bad address family\n", + task->tk_pid, __FUNCTION__); + goto bailout_nofree; + } + if (info[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; - status = -EACCES; /* tell caller to try again later */ + status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } - bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, + bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); + goto bailout_nofree; + } + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; @@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : - RPCB_NETID_UDP; - memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), - sizeof(map->r_addr)); + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + memcpy(map->r_addr, + rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), + sizeof(map->r_addr)); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); - if (IS_ERR(rpcb_clnt)) { - status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); - goto bailout; - } - child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); - goto bailout_nofree; + goto bailout; } rpc_put_task(child); @@ -403,6 +416,7 @@ bailout_nofree: bailout_nowake: task->tk_status = status; } +EXPORT_SYMBOL_GPL(rpcb_getport_async); /* * Rpcbind child task calls this callback via tk_exit. @@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) struct rpc_xprt *xprt = map->r_xprt; int status = child->tk_status; + /* Garbage reply: retry with a lesser rpcbind version */ + if (status == -EIO) + status = -EPROTONOSUPPORT; + /* rpcbind server doesn't support this rpcbind protocol version */ if (status == -EPROTONOSUPPORT) xprt->bind_index++; @@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { char *addr; - int addr_len, c, i, f, first, val; + u32 addr_len; + int c, i, f, first, val; *portp = 0; - addr_len = (unsigned int) ntohl(*p++); - if (addr_len > RPCB_MAXADDRLEN) /* sanity */ - return -EINVAL; - - dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", - (char *) p); - + addr_len = ntohl(*p++); + + /* + * Simple sanity check. The smallest possible universal + * address is an IPv4 address string containing 11 bytes. + */ + if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) + goto out_err; + + /* + * Start at the end and walk backwards until the first dot + * is encountered. When the second dot is found, we have + * both parts of the port number. + */ addr = (char *)p; val = 0; first = 1; @@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, } } + /* + * Simple sanity check. If we never saw a dot in the reply, + * then this was probably just garbage. + */ + if (first) + goto out_err; + dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; + +out_err: + dprintk("RPC: rpcbind server returned malformed reply\n"); + return -EIO; } #define RPCB_program_sz (1u) @@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, #define RPCB_port_sz (1u) #define RPCB_boolean_sz (1u) -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) @@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = { { 0, NULL }, }; +static struct rpcb_info rpcb_next_version6[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 0, NULL }, +}; + static struct rpc_version rpcb_version2 = { .number = 2, .nrprocs = RPCB_HIGHPROC_2, @@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -struct rpc_program rpcb_program = { +static struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 954d7ec86c7..c98873f39ae 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size) task->tk_pid, size, buf); return &buf->data; } +EXPORT_SYMBOL_GPL(rpc_malloc); /** * rpc_free - free buffer allocated via rpc_malloc @@ -802,6 +803,7 @@ void rpc_free(void *buffer) else kfree(buf); } +EXPORT_SYMBOL_GPL(rpc_free); /* * Creation and deletion of RPC task structures @@ -845,7 +847,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_start = jiffies; dprintk("RPC: new task initialized, procpid %u\n", - current->pid); + task_pid_nr(current)); } static struct rpc_task * diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 1d377d1ab7f..97ac45f034d 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) desc->offset += len; return len; } +EXPORT_SYMBOL_GPL(xdr_skb_read_bits); /** * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer @@ -137,6 +138,7 @@ copy_tail: out: return copied; } +EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb); /** * csum_partial_copy_to_xdr - checksum and copy data @@ -179,3 +181,4 @@ no_checksum: return -1; return 0; } +EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 384c4ad5ab8..33d89e842c8 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -20,7 +20,7 @@ #include <linux/sunrpc/auth.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> - +#include <linux/sunrpc/xprtsock.h> /* RPC scheduler */ EXPORT_SYMBOL(rpc_execute); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 55ea6df069d..a4a6bf7deaa 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -777,6 +777,30 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) } /* + * Printk the given error with the address of the client that caused it. + */ +static int +__attribute__ ((format (printf, 2, 3))) +svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) +{ + va_list args; + int r; + char buf[RPC_MAX_ADDRBUFLEN]; + + if (!net_ratelimit()) + return 0; + + printk(KERN_WARNING "svc: %s: ", + svc_print_addr(rqstp, buf, sizeof(buf))); + + va_start(args, fmt); + r = vprintk(fmt, args); + va_end(args); + + return r; +} + +/* * Process the RPC request. */ int @@ -963,14 +987,13 @@ svc_process(struct svc_rqst *rqstp) return 0; err_short_len: - if (net_ratelimit()) - printk("svc: short len %Zd, dropping request\n", argv->iov_len); + svc_printk(rqstp, "short len %Zd, dropping request\n", + argv->iov_len); goto dropit; /* drop request */ err_bad_dir: - if (net_ratelimit()) - printk("svc: bad direction %d, dropping request\n", dir); + svc_printk(rqstp, "bad direction %d, dropping request\n", dir); serv->sv_stats->rpcbadfmt++; goto dropit; /* drop request */ @@ -1000,8 +1023,7 @@ err_bad_prog: goto sendit; err_bad_vers: - if (net_ratelimit()) - printk("svc: unknown version (%d for prog %d, %s)\n", + svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n", vers, prog, progp->pg_name); serv->sv_stats->rpcbadfmt++; @@ -1011,16 +1033,14 @@ err_bad_vers: goto sendit; err_bad_proc: - if (net_ratelimit()) - printk("svc: unknown procedure (%d)\n", proc); + svc_printk(rqstp, "unknown procedure (%d)\n", proc); serv->sv_stats->rpcbadfmt++; svc_putnl(resv, RPC_PROC_UNAVAIL); goto sendit; err_garbage: - if (net_ratelimit()) - printk("svc: failed to decode args\n"); + svc_printk(rqstp, "failed to decode args\n"); rpc_stat = rpc_garbage_args; err_bad: diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 738db32a287..864b541bbf5 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -114,7 +114,6 @@ done: static ctl_table debug_table[] = { { - .ctl_name = CTL_RPCDEBUG, .procname = "rpc_debug", .data = &rpc_debug, .maxlen = sizeof(int), @@ -122,7 +121,6 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_NFSDEBUG, .procname = "nfs_debug", .data = &nfs_debug, .maxlen = sizeof(int), @@ -130,7 +128,6 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_NFSDDEBUG, .procname = "nfsd_debug", .data = &nfsd_debug, .maxlen = sizeof(int), @@ -138,7 +135,6 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_NLMDEBUG, .procname = "nlm_debug", .data = &nlm_debug, .maxlen = sizeof(int), diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 8142fdb8a93..31becbf0926 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/unistd.h> +#include <linux/module.h> #include <linux/sunrpc/clnt.h> @@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) rt->ntimeouts[i] = 0; } } +EXPORT_SYMBOL_GPL(rpc_init_rtt); /* * NB: When computing the smoothed RTT and standard deviation, @@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) if (*sdrtt < RPC_RTO_MIN) *sdrtt = RPC_RTO_MIN; } +EXPORT_SYMBOL_GPL(rpc_update_rtt); /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. @@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) return res; } +EXPORT_SYMBOL_GPL(rpc_calc_rto); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6a59180e166..3d1f7cdf9dd 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1059,7 +1059,7 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, do { if (thislen > page_len) thislen = page_len; - sg->page = buf->pages[i]; + sg_set_page(sg, buf->pages[i]); sg->offset = page_offset; sg->length = thislen; ret = actor(sg, data); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c8c2edccad7..282a9a2ec90 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(xprt_list); + /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion @@ -81,6 +84,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) /** + * xprt_register_transport - register a transport implementation + * @transport: transport to register + * + * If a transport implementation is loaded as a kernel module, it can + * call this interface to make itself known to the RPC client. + * + * Returns: + * 0: transport successfully registered + * -EEXIST: transport already registered + * -EINVAL: transport module being unloaded + */ +int xprt_register_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = -EEXIST; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + /* don't register the same transport class twice */ + if (t->ident == transport->ident) + goto out; + } + + result = -EINVAL; + if (try_module_get(THIS_MODULE)) { + list_add_tail(&transport->list, &xprt_list); + printk(KERN_INFO "RPC: Registered %s transport module.\n", + transport->name); + result = 0; + } + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_register_transport); + +/** + * xprt_unregister_transport - unregister a transport implementation + * transport: transport to unregister + * + * Returns: + * 0: transport successfully unregistered + * -ENOENT: transport never registered + */ +int xprt_unregister_transport(struct xprt_class *transport) +{ + struct xprt_class *t; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t == transport) { + printk(KERN_INFO + "RPC: Unregistered %s transport module.\n", + transport->name); + list_del_init(&transport->list); + module_put(THIS_MODULE); + goto out; + } + } + result = -ENOENT; + +out: + spin_unlock(&xprt_list_lock); + return result; +} +EXPORT_SYMBOL_GPL(xprt_unregister_transport); + +/** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport * @@ -118,6 +193,7 @@ out_sleep: rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } +EXPORT_SYMBOL_GPL(xprt_reserve_xprt); static void xprt_clear_locked(struct rpc_xprt *xprt) { @@ -167,6 +243,7 @@ out_sleep: rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } +EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next(xprt); } } +EXPORT_SYMBOL_GPL(xprt_release_xprt); /** * xprt_release_xprt_cong - allow other requests to use a transport @@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next_cong(xprt); } } +EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task) { __xprt_put_cong(task->tk_xprt, task->tk_rqstp); } +EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); /** * xprt_adjust_cwnd - adjust transport congestion window @@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result) xprt->cwnd = cwnd; __xprt_put_cong(xprt, req); } +EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); /** * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue @@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) else rpc_wake_up(&xprt->pending); } +EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); /** * xprt_wait_for_buffer_space - wait for transport output buffer to clear @@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) task->tk_timeout = req->rq_timeout; rpc_sleep_on(&xprt->pending, task, NULL, NULL); } +EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); /** * xprt_write_space - wake the task waiting for transport output buffer space @@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt) } spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_write_space); /** * xprt_set_retrans_timeout_def - set a request's retransmit timeout @@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task) { task->tk_timeout = task->tk_rqstp->rq_timeout; } +EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); /* * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout @@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task) if (task->tk_timeout > max_timeout || task->tk_timeout == 0) task->tk_timeout = max_timeout; } +EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); static void xprt_reset_majortimeo(struct rpc_rqst *req) { @@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt) xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) xprt->stat.bad_xids++; return NULL; } +EXPORT_SYMBOL_GPL(xprt_lookup_rqst); /** * xprt_update_rtt - update an RPC client's RTT state after receiving a reply @@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task) rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); } } +EXPORT_SYMBOL_GPL(xprt_update_rtt); /** * xprt_complete_rqst - called when reply processing is complete @@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) req->rq_received = req->rq_private_buf.len = copied; rpc_wake_up_task(task); } +EXPORT_SYMBOL_GPL(xprt_complete_rqst); static void xprt_timer(struct rpc_task *task) { @@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) +struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; + struct xprt_class *t; - switch (args->proto) { - case IPPROTO_UDP: - xprt = xs_setup_udp(args); - break; - case IPPROTO_TCP: - xprt = xs_setup_tcp(args); - break; - default: - printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - args->proto); - return ERR_PTR(-EIO); + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (t->ident == args->ident) { + spin_unlock(&xprt_list_lock); + goto found; + } } + spin_unlock(&xprt_list_lock); + printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + return ERR_PTR(-EIO); + +found: + xprt = t->setup(args); if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile new file mode 100644 index 00000000000..264f0feeb51 --- /dev/null +++ b/net/sunrpc/xprtrdma/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o + +xprtrdma-y := transport.o rpc_rdma.o verbs.o diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c new file mode 100644 index 00000000000..12db6358042 --- /dev/null +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -0,0 +1,868 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * rpc_rdma.c + * + * This file contains the guts of the RPC RDMA protocol, and + * does marshaling/unmarshaling, etc. It is also where interfacing + * to the Linux RPC framework lives. + */ + +#include "xprt_rdma.h" + +#include <linux/highmem.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + +#ifdef RPC_DEBUG +static const char transfertypes[][12] = { + "pure inline", /* no chunks */ + " read chunk", /* some argument via rdma read */ + "*read chunk", /* entire request via rdma read */ + "write chunk", /* some result via rdma write */ + "reply chunk" /* entire reply via rdma write */ +}; +#endif + +/* + * Chunk assembly from upper layer xdr_buf. + * + * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk + * elements. Segments are then coalesced when registered, if possible + * within the selected memreg mode. + * + * Note, this routine is never called if the connection's memory + * registration strategy is 0 (bounce buffers). + */ + +static int +rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, + enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) +{ + int len, n = 0, p; + + if (pos == 0 && xdrbuf->head[0].iov_len) { + seg[n].mr_page = NULL; + seg[n].mr_offset = xdrbuf->head[0].iov_base; + seg[n].mr_len = xdrbuf->head[0].iov_len; + pos += xdrbuf->head[0].iov_len; + ++n; + } + + if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { + if (n == nsegs) + return 0; + seg[n].mr_page = xdrbuf->pages[0]; + seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; + seg[n].mr_len = min_t(u32, + PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); + len = xdrbuf->page_len - seg[n].mr_len; + pos += len; + ++n; + p = 1; + while (len > 0) { + if (n == nsegs) + return 0; + seg[n].mr_page = xdrbuf->pages[p]; + seg[n].mr_offset = NULL; + seg[n].mr_len = min_t(u32, PAGE_SIZE, len); + len -= seg[n].mr_len; + ++n; + ++p; + } + } + + if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) { + if (n == nsegs) + return 0; + seg[n].mr_page = NULL; + seg[n].mr_offset = xdrbuf->tail[0].iov_base; + seg[n].mr_len = xdrbuf->tail[0].iov_len; + pos += xdrbuf->tail[0].iov_len; + ++n; + } + + if (pos < xdrbuf->len) + dprintk("RPC: %s: marshaled only %d of %d\n", + __func__, pos, xdrbuf->len); + + return n; +} + +/* + * Create read/write chunk lists, and reply chunks, for RDMA + * + * Assume check against THRESHOLD has been done, and chunks are required. + * Assume only encoding one list entry for read|write chunks. The NFSv3 + * protocol is simple enough to allow this as it only has a single "bulk + * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The + * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.) + * + * When used for a single reply chunk (which is a special write + * chunk used for the entire reply, rather than just the data), it + * is used primarily for READDIR and READLINK which would otherwise + * be severely size-limited by a small rdma inline read max. The server + * response will come back as an RDMA Write, followed by a message + * of type RDMA_NOMSG carrying the xid and length. As a result, reply + * chunks do not provide data alignment, however they do not require + * "fixup" (moving the response to the upper layer buffer) either. + * + * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): + * + * Read chunklist (a linked list): + * N elements, position P (same P for all chunks of same arg!): + * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 + * + * Write chunklist (a list of (one) counted array): + * N elements: + * 1 - N - HLOO - HLOO - ... - HLOO - 0 + * + * Reply chunk (a counted array): + * N elements: + * 1 - N - HLOO - HLOO - ... - HLOO + */ + +static unsigned int +rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, + struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) +{ + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); + int nsegs, nchunks = 0; + int pos; + struct rpcrdma_mr_seg *seg = req->rl_segments; + struct rpcrdma_read_chunk *cur_rchunk = NULL; + struct rpcrdma_write_array *warray = NULL; + struct rpcrdma_write_chunk *cur_wchunk = NULL; + u32 *iptr = headerp->rm_body.rm_chunks; + + if (type == rpcrdma_readch || type == rpcrdma_areadch) { + /* a read chunk - server will RDMA Read our memory */ + cur_rchunk = (struct rpcrdma_read_chunk *) iptr; + } else { + /* a write or reply chunk - server will RDMA Write our memory */ + *iptr++ = xdr_zero; /* encode a NULL read chunk list */ + if (type == rpcrdma_replych) + *iptr++ = xdr_zero; /* a NULL write chunk list */ + warray = (struct rpcrdma_write_array *) iptr; + cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1); + } + + if (type == rpcrdma_replych || type == rpcrdma_areadch) + pos = 0; + else + pos = target->head[0].iov_len; + + nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); + if (nsegs == 0) + return 0; + + do { + /* bind/register the memory, then build chunk from result. */ + int n = rpcrdma_register_external(seg, nsegs, + cur_wchunk != NULL, r_xprt); + if (n <= 0) + goto out; + if (cur_rchunk) { /* read */ + cur_rchunk->rc_discrim = xdr_one; + /* all read chunks have the same "position" */ + cur_rchunk->rc_position = htonl(pos); + cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); + cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); + xdr_encode_hyper( + (u32 *)&cur_rchunk->rc_target.rs_offset, + seg->mr_base); + dprintk("RPC: %s: read chunk " + "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, + seg->mr_len, seg->mr_base, seg->mr_rkey, pos, + n < nsegs ? "more" : "last"); + cur_rchunk++; + r_xprt->rx_stats.read_chunk_count++; + } else { /* write/reply */ + cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); + cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); + xdr_encode_hyper( + (u32 *)&cur_wchunk->wc_target.rs_offset, + seg->mr_base); + dprintk("RPC: %s: %s chunk " + "elem %d@0x%llx:0x%x (%s)\n", __func__, + (type == rpcrdma_replych) ? "reply" : "write", + seg->mr_len, seg->mr_base, seg->mr_rkey, + n < nsegs ? "more" : "last"); + cur_wchunk++; + if (type == rpcrdma_replych) + r_xprt->rx_stats.reply_chunk_count++; + else + r_xprt->rx_stats.write_chunk_count++; + r_xprt->rx_stats.total_rdma_request += seg->mr_len; + } + nchunks++; + seg += n; + nsegs -= n; + } while (nsegs); + + /* success. all failures return above */ + req->rl_nchunks = nchunks; + + BUG_ON(nchunks == 0); + + /* + * finish off header. If write, marshal discrim and nchunks. + */ + if (cur_rchunk) { + iptr = (u32 *) cur_rchunk; + *iptr++ = xdr_zero; /* finish the read chunk list */ + *iptr++ = xdr_zero; /* encode a NULL write chunk list */ + *iptr++ = xdr_zero; /* encode a NULL reply chunk */ + } else { + warray->wc_discrim = xdr_one; + warray->wc_nchunks = htonl(nchunks); + iptr = (u32 *) cur_wchunk; + if (type == rpcrdma_writech) { + *iptr++ = xdr_zero; /* finish the write chunk list */ + *iptr++ = xdr_zero; /* encode a NULL reply chunk */ + } + } + + /* + * Return header size. + */ + return (unsigned char *)iptr - (unsigned char *)headerp; + +out: + for (pos = 0; nchunks--;) + pos += rpcrdma_deregister_external( + &req->rl_segments[pos], r_xprt, NULL); + return 0; +} + +/* + * Copy write data inline. + * This function is used for "small" requests. Data which is passed + * to RPC via iovecs (or page list) is copied directly into the + * pre-registered memory buffer for this request. For small amounts + * of data, this is efficient. The cutoff value is tunable. + */ +static int +rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) +{ + int i, npages, curlen; + int copy_len; + unsigned char *srcp, *destp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + + destp = rqst->rq_svec[0].iov_base; + curlen = rqst->rq_svec[0].iov_len; + destp += curlen; + /* + * Do optional padding where it makes sense. Alignment of write + * payload can help the server, if our setting is accurate. + */ + pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/); + if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH) + pad = 0; /* don't pad this request */ + + dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n", + __func__, pad, destp, rqst->rq_slen, curlen); + + copy_len = rqst->rq_snd_buf.page_len; + r_xprt->rx_stats.pullup_copy_count += copy_len; + npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; + for (i = 0; copy_len && i < npages; i++) { + if (i == 0) + curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; + else + curlen = PAGE_SIZE; + if (curlen > copy_len) + curlen = copy_len; + dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", + __func__, i, destp, copy_len, curlen); + srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], + KM_SKB_SUNRPC_DATA); + if (i == 0) + memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); + else + memcpy(destp, srcp, curlen); + kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); + rqst->rq_svec[0].iov_len += curlen; + destp += curlen; + copy_len -= curlen; + } + if (rqst->rq_snd_buf.tail[0].iov_len) { + curlen = rqst->rq_snd_buf.tail[0].iov_len; + if (destp != rqst->rq_snd_buf.tail[0].iov_base) { + memcpy(destp, + rqst->rq_snd_buf.tail[0].iov_base, curlen); + r_xprt->rx_stats.pullup_copy_count += curlen; + } + dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", + __func__, destp, copy_len, curlen); + rqst->rq_svec[0].iov_len += curlen; + } + /* header now contains entire send message */ + return pad; +} + +/* + * Marshal a request: the primary job of this routine is to choose + * the transfer modes. See comments below. + * + * Uses multiple RDMA IOVs for a request: + * [0] -- RPC RDMA header, which uses memory from the *start* of the + * preregistered buffer that already holds the RPC data in + * its middle. + * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. + * [2] -- optional padding. + * [3] -- if padded, header only in [1] and data here. + */ + +int +rpcrdma_marshal_req(struct rpc_rqst *rqst) +{ + struct rpc_xprt *xprt = rqst->rq_task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + char *base; + size_t hdrlen, rpclen, padlen; + enum rpcrdma_chunktype rtype, wtype; + struct rpcrdma_msg *headerp; + + /* + * rpclen gets amount of data in first buffer, which is the + * pre-registered buffer. + */ + base = rqst->rq_svec[0].iov_base; + rpclen = rqst->rq_svec[0].iov_len; + + /* build RDMA header in private area at front */ + headerp = (struct rpcrdma_msg *) req->rl_base; + /* don't htonl XID, it's already done in request */ + headerp->rm_xid = rqst->rq_xid; + headerp->rm_vers = xdr_one; + headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); + headerp->rm_type = __constant_htonl(RDMA_MSG); + + /* + * Chunks needed for results? + * + * o If the expected result is under the inline threshold, all ops + * return as inline (but see later). + * o Large non-read ops return as a single reply chunk. + * o Large read ops return data as write chunk(s), header as inline. + * + * Note: the NFS code sending down multiple result segments implies + * the op is one of read, readdir[plus], readlink or NFSv4 getacl. + */ + + /* + * This code can handle read chunks, write chunks OR reply + * chunks -- only one type. If the request is too big to fit + * inline, then we will choose read chunks. If the request is + * a READ, then use write chunks to separate the file data + * into pages; otherwise use reply chunks. + */ + if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) + wtype = rpcrdma_noch; + else if (rqst->rq_rcv_buf.page_len == 0) + wtype = rpcrdma_replych; + else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) + wtype = rpcrdma_writech; + else + wtype = rpcrdma_replych; + + /* + * Chunks needed for arguments? + * + * o If the total request is under the inline threshold, all ops + * are sent as inline. + * o Large non-write ops are sent with the entire message as a + * single read chunk (protocol 0-position special case). + * o Large write ops transmit data as read chunk(s), header as + * inline. + * + * Note: the NFS code sending down multiple argument segments + * implies the op is a write. + * TBD check NFSv4 setacl + */ + if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) + rtype = rpcrdma_noch; + else if (rqst->rq_snd_buf.page_len == 0) + rtype = rpcrdma_areadch; + else + rtype = rpcrdma_readch; + + /* The following simplification is not true forever */ + if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) + wtype = rpcrdma_noch; + BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); + + if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && + (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { + /* forced to "pure inline"? */ + dprintk("RPC: %s: too much data (%d/%d) for inline\n", + __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); + return -1; + } + + hdrlen = 28; /*sizeof *headerp;*/ + padlen = 0; + + /* + * Pull up any extra send data into the preregistered buffer. + * When padding is in use and applies to the transfer, insert + * it and change the message type. + */ + if (rtype == rpcrdma_noch) { + + padlen = rpcrdma_inline_pullup(rqst, + RPCRDMA_INLINE_PAD_VALUE(rqst)); + + if (padlen) { + headerp->rm_type = __constant_htonl(RDMA_MSGP); + headerp->rm_body.rm_padded.rm_align = + htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); + headerp->rm_body.rm_padded.rm_thresh = + __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); + headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; + headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; + headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; + hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ + BUG_ON(wtype != rpcrdma_noch); + + } else { + headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; + headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; + headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero; + /* new length after pullup */ + rpclen = rqst->rq_svec[0].iov_len; + /* + * Currently we try to not actually use read inline. + * Reply chunks have the desirable property that + * they land, packed, directly in the target buffers + * without headers, so they require no fixup. The + * additional RDMA Write op sends the same amount + * of data, streams on-the-wire and adds no overhead + * on receive. Therefore, we request a reply chunk + * for non-writes wherever feasible and efficient. + */ + if (wtype == rpcrdma_noch && + r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) + wtype = rpcrdma_replych; + } + } + + /* + * Marshal chunks. This routine will return the header length + * consumed by marshaling. + */ + if (rtype != rpcrdma_noch) { + hdrlen = rpcrdma_create_chunks(rqst, + &rqst->rq_snd_buf, headerp, rtype); + wtype = rtype; /* simplify dprintk */ + + } else if (wtype != rpcrdma_noch) { + hdrlen = rpcrdma_create_chunks(rqst, + &rqst->rq_rcv_buf, headerp, wtype); + } + + if (hdrlen == 0) + return -1; + + dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" + " headerp 0x%p base 0x%p lkey 0x%x\n", + __func__, transfertypes[wtype], hdrlen, rpclen, padlen, + headerp, base, req->rl_iov.lkey); + + /* + * initialize send_iov's - normally only two: rdma chunk header and + * single preregistered RPC header buffer, but if padding is present, + * then use a preregistered (and zeroed) pad buffer between the RPC + * header and any write data. In all non-rdma cases, any following + * data has been copied into the RPC header buffer. + */ + req->rl_send_iov[0].addr = req->rl_iov.addr; + req->rl_send_iov[0].length = hdrlen; + req->rl_send_iov[0].lkey = req->rl_iov.lkey; + + req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); + req->rl_send_iov[1].length = rpclen; + req->rl_send_iov[1].lkey = req->rl_iov.lkey; + + req->rl_niovs = 2; + + if (padlen) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + req->rl_send_iov[2].addr = ep->rep_pad.addr; + req->rl_send_iov[2].length = padlen; + req->rl_send_iov[2].lkey = ep->rep_pad.lkey; + + req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; + req->rl_send_iov[3].length = rqst->rq_slen - rpclen; + req->rl_send_iov[3].lkey = req->rl_iov.lkey; + + req->rl_niovs = 4; + } + + return 0; +} + +/* + * Chase down a received write or reply chunklist to get length + * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) + */ +static int +rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp) +{ + unsigned int i, total_len; + struct rpcrdma_write_chunk *cur_wchunk; + + i = ntohl(**iptrp); /* get array count */ + if (i > max) + return -1; + cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); + total_len = 0; + while (i--) { + struct rpcrdma_segment *seg = &cur_wchunk->wc_target; + ifdebug(FACILITY) { + u64 off; + xdr_decode_hyper((u32 *)&seg->rs_offset, &off); + dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", + __func__, + ntohl(seg->rs_length), + off, + ntohl(seg->rs_handle)); + } + total_len += ntohl(seg->rs_length); + ++cur_wchunk; + } + /* check and adjust for properly terminated write chunk */ + if (wrchunk) { + u32 *w = (u32 *) cur_wchunk; + if (*w++ != xdr_zero) + return -1; + cur_wchunk = (struct rpcrdma_write_chunk *) w; + } + if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + return -1; + + *iptrp = (u32 *) cur_wchunk; + return total_len; +} + +/* + * Scatter inline received data back into provided iov's. + */ +static void +rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) +{ + int i, npages, curlen, olen; + char *destp; + + curlen = rqst->rq_rcv_buf.head[0].iov_len; + if (curlen > copy_len) { /* write chunk header fixup */ + curlen = copy_len; + rqst->rq_rcv_buf.head[0].iov_len = curlen; + } + + dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", + __func__, srcp, copy_len, curlen); + + /* Shift pointer for first receive segment only */ + rqst->rq_rcv_buf.head[0].iov_base = srcp; + srcp += curlen; + copy_len -= curlen; + + olen = copy_len; + i = 0; + rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; + if (copy_len && rqst->rq_rcv_buf.page_len) { + npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + + rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; + for (; i < npages; i++) { + if (i == 0) + curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; + else + curlen = PAGE_SIZE; + if (curlen > copy_len) + curlen = copy_len; + dprintk("RPC: %s: page %d" + " srcp 0x%p len %d curlen %d\n", + __func__, i, srcp, copy_len, curlen); + destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], + KM_SKB_SUNRPC_DATA); + if (i == 0) + memcpy(destp + rqst->rq_rcv_buf.page_base, + srcp, curlen); + else + memcpy(destp, srcp, curlen); + flush_dcache_page(rqst->rq_rcv_buf.pages[i]); + kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); + srcp += curlen; + copy_len -= curlen; + if (copy_len == 0) + break; + } + rqst->rq_rcv_buf.page_len = olen - copy_len; + } else + rqst->rq_rcv_buf.page_len = 0; + + if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { + curlen = copy_len; + if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) + curlen = rqst->rq_rcv_buf.tail[0].iov_len; + if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) + memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); + dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", + __func__, srcp, copy_len, curlen); + rqst->rq_rcv_buf.tail[0].iov_len = curlen; + copy_len -= curlen; ++i; + } else + rqst->rq_rcv_buf.tail[0].iov_len = 0; + + if (copy_len) + dprintk("RPC: %s: %d bytes in" + " %d extra segments (%d lost)\n", + __func__, olen, i, copy_len); + + /* TBD avoid a warning from call_decode() */ + rqst->rq_private_buf = rqst->rq_rcv_buf; +} + +/* + * This function is called when an async event is posted to + * the connection which changes the connection state. All it + * does at this point is mark the connection up/down, the rpc + * timers do the rest. + */ +void +rpcrdma_conn_func(struct rpcrdma_ep *ep) +{ + struct rpc_xprt *xprt = ep->rep_xprt; + + spin_lock_bh(&xprt->transport_lock); + if (ep->rep_connected > 0) { + if (!xprt_test_and_set_connected(xprt)) + xprt_wake_pending_tasks(xprt, 0); + } else { + if (xprt_test_and_clear_connected(xprt)) + xprt_wake_pending_tasks(xprt, ep->rep_connected); + } + spin_unlock_bh(&xprt->transport_lock); +} + +/* + * This function is called when memory window unbind which we are waiting + * for completes. Just use rr_func (zeroed by upcall) to signal completion. + */ +static void +rpcrdma_unbind_func(struct rpcrdma_rep *rep) +{ + wake_up(&rep->rr_unbind); +} + +/* + * Called as a tasklet to do req/reply match and complete a request + * Errors must result in the RPC task either being awakened, or + * allowed to timeout, to discover the errors at that time. + */ +void +rpcrdma_reply_handler(struct rpcrdma_rep *rep) +{ + struct rpcrdma_msg *headerp; + struct rpcrdma_req *req; + struct rpc_rqst *rqst; + struct rpc_xprt *xprt = rep->rr_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + u32 *iptr; + int i, rdmalen, status; + + /* Check status. If bad, signal disconnect and return rep to pool */ + if (rep->rr_len == ~0U) { + rpcrdma_recv_buffer_put(rep); + if (r_xprt->rx_ep.rep_connected == 1) { + r_xprt->rx_ep.rep_connected = -EIO; + rpcrdma_conn_func(&r_xprt->rx_ep); + } + return; + } + if (rep->rr_len < 28) { + dprintk("RPC: %s: short/invalid reply\n", __func__); + goto repost; + } + headerp = (struct rpcrdma_msg *) rep->rr_base; + if (headerp->rm_vers != xdr_one) { + dprintk("RPC: %s: invalid version %d\n", + __func__, ntohl(headerp->rm_vers)); + goto repost; + } + + /* Get XID and try for a match. */ + spin_lock(&xprt->transport_lock); + rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); + if (rqst == NULL) { + spin_unlock(&xprt->transport_lock); + dprintk("RPC: %s: reply 0x%p failed " + "to match any request xid 0x%08x len %d\n", + __func__, rep, headerp->rm_xid, rep->rr_len); +repost: + r_xprt->rx_stats.bad_reply_count++; + rep->rr_func = rpcrdma_reply_handler; + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + rpcrdma_recv_buffer_put(rep); + + return; + } + + /* get request object */ + req = rpcr_to_rdmar(rqst); + + dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" + " RPC request 0x%p xid 0x%08x\n", + __func__, rep, req, rqst, headerp->rm_xid); + + BUG_ON(!req || req->rl_reply); + + /* from here on, the reply is no longer an orphan */ + req->rl_reply = rep; + + /* check for expected message types */ + /* The order of some of these tests is important. */ + switch (headerp->rm_type) { + case __constant_htonl(RDMA_MSG): + /* never expect read chunks */ + /* never expect reply chunks (two ways to check) */ + /* never expect write chunks without having offered RDMA */ + if (headerp->rm_body.rm_chunks[0] != xdr_zero || + (headerp->rm_body.rm_chunks[1] == xdr_zero && + headerp->rm_body.rm_chunks[2] != xdr_zero) || + (headerp->rm_body.rm_chunks[1] != xdr_zero && + req->rl_nchunks == 0)) + goto badheader; + if (headerp->rm_body.rm_chunks[1] != xdr_zero) { + /* count any expected write chunks in read reply */ + /* start at write chunk array count */ + iptr = &headerp->rm_body.rm_chunks[2]; + rdmalen = rpcrdma_count_chunks(rep, + req->rl_nchunks, 1, &iptr); + /* check for validity, and no reply chunk after */ + if (rdmalen < 0 || *iptr++ != xdr_zero) + goto badheader; + rep->rr_len -= + ((unsigned char *)iptr - (unsigned char *)headerp); + status = rep->rr_len + rdmalen; + r_xprt->rx_stats.total_rdma_reply += rdmalen; + } else { + /* else ordinary inline */ + iptr = (u32 *)((unsigned char *)headerp + 28); + rep->rr_len -= 28; /*sizeof *headerp;*/ + status = rep->rr_len; + } + /* Fix up the rpc results for upper layer */ + rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); + break; + + case __constant_htonl(RDMA_NOMSG): + /* never expect read or write chunks, always reply chunks */ + if (headerp->rm_body.rm_chunks[0] != xdr_zero || + headerp->rm_body.rm_chunks[1] != xdr_zero || + headerp->rm_body.rm_chunks[2] != xdr_one || + req->rl_nchunks == 0) + goto badheader; + iptr = (u32 *)((unsigned char *)headerp + 28); + rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); + if (rdmalen < 0) + goto badheader; + r_xprt->rx_stats.total_rdma_reply += rdmalen; + /* Reply chunk buffer already is the reply vector - no fixup. */ + status = rdmalen; + break; + +badheader: + default: + dprintk("%s: invalid rpcrdma reply header (type %d):" + " chunks[012] == %d %d %d" + " expected chunks <= %d\n", + __func__, ntohl(headerp->rm_type), + headerp->rm_body.rm_chunks[0], + headerp->rm_body.rm_chunks[1], + headerp->rm_body.rm_chunks[2], + req->rl_nchunks); + status = -EIO; + r_xprt->rx_stats.bad_reply_count++; + break; + } + + /* If using mw bind, start the deregister process now. */ + /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ + if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS: + for (i = 0; req->rl_nchunks-- > 1;) + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + /* Optionally wait (not here) for unbinds to complete */ + rep->rr_func = rpcrdma_unbind_func; + (void) rpcrdma_deregister_external(&req->rl_segments[i], + r_xprt, rep); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + for (i = 0; req->rl_nchunks--;) + i += rpcrdma_deregister_external(&req->rl_segments[i], + r_xprt, NULL); + break; + default: + break; + } + + dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", + __func__, xprt, rqst, status); + xprt_complete_rqst(rqst->rq_task, status); + spin_unlock(&xprt->transport_lock); +} diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c new file mode 100644 index 00000000000..dc55cc974c9 --- /dev/null +++ b/net/sunrpc/xprtrdma/transport.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * transport.c + * + * This file contains the top-level implementation of an RPC RDMA + * transport. + * + * Naming convention: functions beginning with xprt_ are part of the + * transport switch. All others are RPC RDMA internal. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/seq_file.h> + +#include "xprt_rdma.h" + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +MODULE_LICENSE("Dual BSD/GPL"); + +MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); +MODULE_AUTHOR("Network Appliance, Inc."); + +/* + * tunables + */ + +static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; +static unsigned int xprt_rdma_inline_write_padding; +#if !RPCRDMA_PERSISTENT_REGISTRATION +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ +#else +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; +#endif + +#ifdef RPC_DEBUG + +static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; +static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; +static unsigned int zero; +static unsigned int max_padding = PAGE_SIZE; +static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; +static unsigned int max_memreg = RPCRDMA_LAST - 1; + +static struct ctl_table_header *sunrpc_table_header; + +static ctl_table xr_tunables_table[] = { + { + .ctl_name = CTL_SLOTTABLE_RDMA, + .procname = "rdma_slot_table_entries", + .data = &xprt_rdma_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_slot_table_size, + .extra2 = &max_slot_table_size + }, + { + .ctl_name = CTL_RDMA_MAXINLINEREAD, + .procname = "rdma_max_inline_read", + .data = &xprt_rdma_max_inline_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_MAXINLINEWRITE, + .procname = "rdma_max_inline_write", + .data = &xprt_rdma_max_inline_write, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = CTL_RDMA_WRITEPADDING, + .procname = "rdma_inline_write_padding", + .data = &xprt_rdma_inline_write_padding, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &max_padding, + }, + { + .ctl_name = CTL_RDMA_MEMREG, + .procname = "rdma_memreg_strategy", + .data = &xprt_rdma_memreg_strategy, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &min_memreg, + .extra2 = &max_memreg, + }, + { + .ctl_name = 0, + }, +}; + +static ctl_table sunrpc_table[] = { + { + .ctl_name = CTL_SUNRPC, + .procname = "sunrpc", + .mode = 0555, + .child = xr_tunables_table + }, + { + .ctl_name = 0, + }, +}; + +#endif + +static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ + +static void +xprt_rdma_format_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) + &rpcx_to_rdmad(xprt).addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%u", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), "rdma"); + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) + snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + /* netid */ + xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; +} + +static void +xprt_rdma_free_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); +} + +static void +xprt_rdma_connect_worker(struct work_struct *work) +{ + struct rpcrdma_xprt *r_xprt = + container_of(work, struct rpcrdma_xprt, rdma_connect.work); + struct rpc_xprt *xprt = &r_xprt->xprt; + int rc = 0; + + if (!xprt->shutdown) { + xprt_clear_connected(xprt); + + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + goto out; + } + goto out_clear; + +out: + xprt_wake_pending_tasks(xprt, rc); + +out_clear: + dprintk("RPC: %s: exit\n", __func__); + xprt_clear_connecting(xprt); +} + +/* + * xprt_rdma_destroy + * + * Destroy the xprt. + * Free all memory associated with the object, including its own. + * NOTE: none of the *destroy methods free memory for their top-level + * objects, even though they may have allocated it (they do free + * private memory). It's up to the caller to handle it. In this + * case (RDMA transport), all structure memory is inlined with the + * struct rpcrdma_xprt. + */ +static void +xprt_rdma_destroy(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int rc; + + dprintk("RPC: %s: called\n", __func__); + + cancel_delayed_work(&r_xprt->rdma_connect); + flush_scheduled_work(); + + xprt_clear_connected(xprt); + + rpcrdma_buffer_destroy(&r_xprt->rx_buf); + rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", + __func__, rc); + rpcrdma_ia_close(&r_xprt->rx_ia); + + xprt_rdma_free_addresses(xprt); + + kfree(xprt->slot); + xprt->slot = NULL; + kfree(xprt); + + dprintk("RPC: %s: returning\n", __func__); + + module_put(THIS_MODULE); +} + +/** + * xprt_setup_rdma - Set up transport to use RDMA + * + * @args: rpc transport arguments + */ +static struct rpc_xprt * +xprt_setup_rdma(struct xprt_create *args) +{ + struct rpcrdma_create_data_internal cdata; + struct rpc_xprt *xprt; + struct rpcrdma_xprt *new_xprt; + struct rpcrdma_ep *new_ep; + struct sockaddr_in *sin; + int rc; + + if (args->addrlen > sizeof(xprt->addr)) { + dprintk("RPC: %s: address too large\n", __func__); + return ERR_PTR(-EBADF); + } + + xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + if (xprt == NULL) { + dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", + __func__); + return ERR_PTR(-ENOMEM); + } + + xprt->max_reqs = xprt_rdma_slot_table_entries; + xprt->slot = kcalloc(xprt->max_reqs, + sizeof(struct rpc_rqst), GFP_KERNEL); + if (xprt->slot == NULL) { + kfree(xprt); + dprintk("RPC: %s: couldn't allocate %d slots\n", + __func__, xprt->max_reqs); + return ERR_PTR(-ENOMEM); + } + + /* 60 second timeout, no retries */ + xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); + xprt->bind_timeout = (60U * HZ); + xprt->connect_timeout = (60U * HZ); + xprt->reestablish_timeout = (5U * HZ); + xprt->idle_timeout = (5U * 60 * HZ); + + xprt->resvport = 0; /* privileged port not needed */ + xprt->tsh_size = 0; /* RPC-RDMA handles framing */ + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; + xprt->ops = &xprt_rdma_procs; + + /* + * Set up RDMA-specific connect data. + */ + + /* Put server RDMA address in local cdata */ + memcpy(&cdata.addr, args->dstaddr, args->addrlen); + + /* Ensure xprt->addr holds valid server TCP (not RDMA) + * address, for any side protocols which peek at it */ + xprt->prot = IPPROTO_TCP; + xprt->addrlen = args->addrlen; + memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); + + sin = (struct sockaddr_in *)&cdata.addr; + if (ntohs(sin->sin_port) != 0) + xprt_set_bound(xprt); + + dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, + NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + + /* Set max requests */ + cdata.max_requests = xprt->max_reqs; + + /* Set some length limits */ + cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ + cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ + + cdata.inline_wsize = xprt_rdma_max_inline_write; + if (cdata.inline_wsize > cdata.wsize) + cdata.inline_wsize = cdata.wsize; + + cdata.inline_rsize = xprt_rdma_max_inline_read; + if (cdata.inline_rsize > cdata.rsize) + cdata.inline_rsize = cdata.rsize; + + cdata.padding = xprt_rdma_inline_write_padding; + + /* + * Create new transport instance, which includes initialized + * o ia + * o endpoint + * o buffers + */ + + new_xprt = rpcx_to_rdmax(xprt); + + rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, + xprt_rdma_memreg_strategy); + if (rc) + goto out1; + + /* + * initialize and create ep + */ + new_xprt->rx_data = cdata; + new_ep = &new_xprt->rx_ep; + new_ep->rep_remote_addr = cdata.addr; + + rc = rpcrdma_ep_create(&new_xprt->rx_ep, + &new_xprt->rx_ia, &new_xprt->rx_data); + if (rc) + goto out2; + + /* + * Allocate pre-registered send and receive buffers for headers and + * any inline data. Also specify any padding which will be provided + * from a preregistered zero buffer. + */ + rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, + &new_xprt->rx_data); + if (rc) + goto out3; + + /* + * Register a callback for connection events. This is necessary because + * connection loss notification is async. We also catch connection loss + * when reaping receives. + */ + INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); + new_ep->rep_func = rpcrdma_conn_func; + new_ep->rep_xprt = xprt; + + xprt_rdma_format_addresses(xprt); + + if (!try_module_get(THIS_MODULE)) + goto out4; + + return xprt; + +out4: + xprt_rdma_free_addresses(xprt); + rc = -EINVAL; +out3: + (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); +out2: + rpcrdma_ia_close(&new_xprt->rx_ia); +out1: + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(rc); +} + +/* + * Close a connection, during shutdown or timeout/reconnect + */ +static void +xprt_rdma_close(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + dprintk("RPC: %s: closing\n", __func__); + xprt_disconnect(xprt); + (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); +} + +static void +xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) +{ + struct sockaddr_in *sap; + + sap = (struct sockaddr_in *)&xprt->addr; + sap->sin_port = htons(port); + sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; + sap->sin_port = htons(port); + dprintk("RPC: %s: %u\n", __func__, port); +} + +static void +xprt_rdma_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + if (!xprt_test_and_set_connecting(xprt)) { + if (r_xprt->rx_ep.rep_connected != 0) { + /* Reconnect */ + schedule_delayed_work(&r_xprt->rdma_connect, + xprt->reestablish_timeout); + } else { + schedule_delayed_work(&r_xprt->rdma_connect, 0); + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } + } +} + +static int +xprt_rdma_reserve_xprt(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int credits = atomic_read(&r_xprt->rx_buf.rb_credits); + + /* == RPC_CWNDSCALE @ init, but *after* setup */ + if (r_xprt->rx_buf.rb_cwndscale == 0UL) { + r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; + dprintk("RPC: %s: cwndscale %lu\n", __func__, + r_xprt->rx_buf.rb_cwndscale); + BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); + } + xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; + return xprt_reserve_xprt_cong(task); +} + +/* + * The RDMA allocate/free functions need the task structure as a place + * to hide the struct rpcrdma_req, which is necessary for the actual send/recv + * sequence. For this reason, the recv buffers are attached to send + * buffers for portions of the RPC. Note that the RPC layer allocates + * both send and receive buffers in the same call. We may register + * the receive buffer portion when using reply chunks. + */ +static void * +xprt_rdma_allocate(struct rpc_task *task, size_t size) +{ + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req, *nreq; + + req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); + BUG_ON(NULL == req); + + if (size > req->rl_size) { + dprintk("RPC: %s: size %zd too large for buffer[%zd]: " + "prog %d vers %d proc %d\n", + __func__, size, req->rl_size, + task->tk_client->cl_prog, task->tk_client->cl_vers, + task->tk_msg.rpc_proc->p_proc); + /* + * Outgoing length shortage. Our inline write max must have + * been configured to perform direct i/o. + * + * This is therefore a large metadata operation, and the + * allocate call was made on the maximum possible message, + * e.g. containing long filename(s) or symlink data. In + * fact, while these metadata operations *might* carry + * large outgoing payloads, they rarely *do*. However, we + * have to commit to the request here, so reallocate and + * register it now. The data path will never require this + * reallocation. + * + * If the allocation or registration fails, the RPC framework + * will (doggedly) retry. + */ + if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == + RPCRDMA_BOUNCEBUFFERS) { + /* forced to "pure inline" */ + dprintk("RPC: %s: too much data (%zd) for inline " + "(r/w max %d/%d)\n", __func__, size, + rpcx_to_rdmad(xprt).inline_rsize, + rpcx_to_rdmad(xprt).inline_wsize); + size = req->rl_size; + rpc_exit(task, -EIO); /* fail the operation */ + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + goto out; + } + if (task->tk_flags & RPC_TASK_SWAPPER) + nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); + else + nreq = kmalloc(sizeof *req + size, GFP_NOFS); + if (nreq == NULL) + goto outfail; + + if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, + nreq->rl_base, size + sizeof(struct rpcrdma_req) + - offsetof(struct rpcrdma_req, rl_base), + &nreq->rl_handle, &nreq->rl_iov)) { + kfree(nreq); + goto outfail; + } + rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; + nreq->rl_size = size; + nreq->rl_niovs = 0; + nreq->rl_nchunks = 0; + nreq->rl_buffer = (struct rpcrdma_buffer *)req; + nreq->rl_reply = req->rl_reply; + memcpy(nreq->rl_segments, + req->rl_segments, sizeof nreq->rl_segments); + /* flag the swap with an unused field */ + nreq->rl_iov.length = 0; + req->rl_reply = NULL; + req = nreq; + } + dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); +out: + return req->rl_xdr_buf; + +outfail: + rpcrdma_buffer_put(req); + rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + return NULL; +} + +/* + * This function returns all RDMA resources to the pool. + */ +static void +xprt_rdma_free(void *buffer) +{ + struct rpcrdma_req *req; + struct rpcrdma_xprt *r_xprt; + struct rpcrdma_rep *rep; + int i; + + if (buffer == NULL) + return; + + req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); + rep = req->rl_reply; + + dprintk("RPC: %s: called on 0x%p%s\n", + __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); + + /* + * Finish the deregistration. When using mw bind, this was + * begun in rpcrdma_reply_handler(). In all other modes, we + * do it here, in thread context. The process is considered + * complete when the rr_func vector becomes NULL - this + * was put in place during rpcrdma_reply_handler() - the wait + * call below will not block if the dereg is "done". If + * interrupted, our framework will clean up. + */ + for (i = 0; req->rl_nchunks;) { + --req->rl_nchunks; + i += rpcrdma_deregister_external( + &req->rl_segments[i], r_xprt, NULL); + } + + if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { + rep->rr_func = NULL; /* abandon the callback */ + req->rl_reply = NULL; + } + + if (req->rl_iov.length == 0) { /* see allocate above */ + struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; + oreq->rl_reply = req->rl_reply; + (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, + req->rl_handle, + &req->rl_iov); + kfree(req); + req = oreq; + } + + /* Put back request+reply buffers */ + rpcrdma_buffer_put(req); +} + +/* + * send_request invokes the meat of RPC RDMA. It must do the following: + * 1. Marshal the RPC request into an RPC RDMA request, which means + * putting a header in front of data, and creating IOVs for RDMA + * from those in the request. + * 2. In marshaling, detect opportunities for RDMA, and use them. + * 3. Post a recv message to set up asynch completion, then send + * the request (rpcrdma_ep_post). + * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). + */ + +static int +xprt_rdma_send_request(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + /* marshal the send itself */ + if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { + r_xprt->rx_stats.failed_marshal_count++; + dprintk("RPC: %s: rpcrdma_marshal_req failed\n", + __func__); + return -EIO; + } + + if (req->rl_reply == NULL) /* e.g. reconnection */ + rpcrdma_recv_buffer_get(req); + + if (req->rl_reply) { + req->rl_reply->rr_func = rpcrdma_reply_handler; + /* this need only be done once, but... */ + req->rl_reply->rr_xprt = xprt; + } + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { + xprt_disconnect(xprt); + return -ENOTCONN; /* implies disconnect */ + } + + rqst->rq_bytes_sent = 0; + return 0; +} + +static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, + "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " + "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", + + 0, /* need a local port? */ + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u, + + r_xprt->rx_stats.read_chunk_count, + r_xprt->rx_stats.write_chunk_count, + r_xprt->rx_stats.reply_chunk_count, + r_xprt->rx_stats.total_rdma_request, + r_xprt->rx_stats.total_rdma_reply, + r_xprt->rx_stats.pullup_copy_count, + r_xprt->rx_stats.fixup_copy_count, + r_xprt->rx_stats.hardway_register_count, + r_xprt->rx_stats.failed_marshal_count, + r_xprt->rx_stats.bad_reply_count); +} + +/* + * Plumbing for rpc transport switch and kernel module + */ + +static struct rpc_xprt_ops xprt_rdma_procs = { + .reserve_xprt = xprt_rdma_reserve_xprt, + .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .release_request = xprt_release_rqst_cong, /* ditto */ + .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ + .set_port = xprt_rdma_set_port, + .connect = xprt_rdma_connect, + .buf_alloc = xprt_rdma_allocate, + .buf_free = xprt_rdma_free, + .send_request = xprt_rdma_send_request, + .close = xprt_rdma_close, + .destroy = xprt_rdma_destroy, + .print_stats = xprt_rdma_print_stats +}; + +static struct xprt_class xprt_rdma = { + .list = LIST_HEAD_INIT(xprt_rdma.list), + .name = "rdma", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_RDMA, + .setup = xprt_setup_rdma, +}; + +static void __exit xprt_rdma_cleanup(void) +{ + int rc; + + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); +#ifdef RPC_DEBUG + if (sunrpc_table_header) { + unregister_sysctl_table(sunrpc_table_header); + sunrpc_table_header = NULL; + } +#endif + rc = xprt_unregister_transport(&xprt_rdma); + if (rc) + dprintk("RPC: %s: xprt_unregister returned %i\n", + __func__, rc); +} + +static int __init xprt_rdma_init(void) +{ + int rc; + + rc = xprt_register_transport(&xprt_rdma); + + if (rc) + return rc; + + dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + + dprintk(KERN_INFO "Defaults:\n"); + dprintk(KERN_INFO "\tSlots %d\n" + "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", + xprt_rdma_slot_table_entries, + xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); + dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); + +#ifdef RPC_DEBUG + if (!sunrpc_table_header) + sunrpc_table_header = register_sysctl_table(sunrpc_table); +#endif + return 0; +} + +module_init(xprt_rdma_init); +module_exit(xprt_rdma_cleanup); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c new file mode 100644 index 00000000000..44b0fb942e8 --- /dev/null +++ b/net/sunrpc/xprtrdma/verbs.c @@ -0,0 +1,1627 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * verbs.c + * + * Encapsulates the major functions managing: + * o adapters + * o endpoints + * o connections + * o buffer memory + */ + +#include <linux/pci.h> /* for Tavor hack below */ + +#include "xprt_rdma.h" + +/* + * Globals/Macros + */ + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +/* + * internal functions + */ + +/* + * handle replies in tasklet context, using a single, global list + * rdma tasklet function -- just turn around and call the func + * for all replies on the list + */ + +static DEFINE_SPINLOCK(rpcrdma_tk_lock_g); +static LIST_HEAD(rpcrdma_tasklets_g); + +static void +rpcrdma_run_tasklet(unsigned long data) +{ + struct rpcrdma_rep *rep; + void (*func)(struct rpcrdma_rep *); + unsigned long flags; + + data = data; + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + while (!list_empty(&rpcrdma_tasklets_g)) { + rep = list_entry(rpcrdma_tasklets_g.next, + struct rpcrdma_rep, rr_list); + list_del(&rep->rr_list); + func = rep->rr_func; + rep->rr_func = NULL; + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + + if (func) + func(rep); + else + rpcrdma_recv_buffer_put(rep); + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + } + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); +} + +static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); + +static inline void +rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + +static void +rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) +{ + struct rpcrdma_ep *ep = context; + + dprintk("RPC: %s: QP error %X on device %s ep %p\n", + __func__, event->event, event->device->name, context); + if (ep->rep_connected == 1) { + ep->rep_connected = -EIO; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + } +} + +static void +rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) +{ + struct rpcrdma_ep *ep = context; + + dprintk("RPC: %s: CQ error %X on device %s ep %p\n", + __func__, event->event, event->device->name, context); + if (ep->rep_connected == 1) { + ep->rep_connected = -EIO; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + } +} + +static inline +void rpcrdma_event_process(struct ib_wc *wc) +{ + struct rpcrdma_rep *rep = + (struct rpcrdma_rep *)(unsigned long) wc->wr_id; + + dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", + __func__, rep, wc->status, wc->opcode, wc->byte_len); + + if (!rep) /* send or bind completion that we don't care about */ + return; + + if (IB_WC_SUCCESS != wc->status) { + dprintk("RPC: %s: %s WC status %X, connection lost\n", + __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", + wc->status); + rep->rr_len = ~0U; + rpcrdma_schedule_tasklet(rep); + return; + } + + switch (wc->opcode) { + case IB_WC_RECV: + rep->rr_len = wc->byte_len; + ib_dma_sync_single_for_cpu( + rdmab_to_ia(rep->rr_buffer)->ri_id->device, + rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); + /* Keep (only) the most recent credits, after check validity */ + if (rep->rr_len >= 16) { + struct rpcrdma_msg *p = + (struct rpcrdma_msg *) rep->rr_base; + unsigned int credits = ntohl(p->rm_credit); + if (credits == 0) { + dprintk("RPC: %s: server" + " dropped credits to 0!\n", __func__); + /* don't deadlock */ + credits = 1; + } else if (credits > rep->rr_buffer->rb_max_requests) { + dprintk("RPC: %s: server" + " over-crediting: %d (%d)\n", + __func__, credits, + rep->rr_buffer->rb_max_requests); + credits = rep->rr_buffer->rb_max_requests; + } + atomic_set(&rep->rr_buffer->rb_credits, credits); + } + /* fall through */ + case IB_WC_BIND_MW: + rpcrdma_schedule_tasklet(rep); + break; + default: + dprintk("RPC: %s: unexpected WC event %X\n", + __func__, wc->opcode); + break; + } +} + +static inline int +rpcrdma_cq_poll(struct ib_cq *cq) +{ + struct ib_wc wc; + int rc; + + for (;;) { + rc = ib_poll_cq(cq, 1, &wc); + if (rc < 0) { + dprintk("RPC: %s: ib_poll_cq failed %i\n", + __func__, rc); + return rc; + } + if (rc == 0) + break; + + rpcrdma_event_process(&wc); + } + + return 0; +} + +/* + * rpcrdma_cq_event_upcall + * + * This upcall handles recv, send, bind and unbind events. + * It is reentrant but processes single events in order to maintain + * ordering of receives to keep server credits. + * + * It is the responsibility of the scheduled tasklet to return + * recv buffers to the pool. NOTE: this affects synchronization of + * connection shutdown. That is, the structures required for + * the completion of the reply handler must remain intact until + * all memory has been reclaimed. + * + * Note that send events are suppressed and do not result in an upcall. + */ +static void +rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) +{ + int rc; + + rc = rpcrdma_cq_poll(cq); + if (rc) + return; + + rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed %i\n", + __func__, rc); + return; + } + + rpcrdma_cq_poll(cq); +} + +#ifdef RPC_DEBUG +static const char * const conn[] = { + "address resolved", + "address error", + "route resolved", + "route error", + "connect request", + "connect response", + "connect error", + "unreachable", + "rejected", + "established", + "disconnected", + "device removal" +}; +#endif + +static int +rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct rpcrdma_xprt *xprt = id->context; + struct rpcrdma_ia *ia = &xprt->rx_ia; + struct rpcrdma_ep *ep = &xprt->rx_ep; + struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; + struct ib_qp_attr attr; + struct ib_qp_init_attr iattr; + int connstate = 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + ia->ri_async_rc = -EHOSTUNREACH; + dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", + __func__, ep); + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ROUTE_ERROR: + ia->ri_async_rc = -ENETUNREACH; + dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", + __func__, ep); + complete(&ia->ri_done); + break; + case RDMA_CM_EVENT_ESTABLISHED: + connstate = 1; + ib_query_qp(ia->ri_id->qp, &attr, + IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, + &iattr); + dprintk("RPC: %s: %d responder resources" + " (%d initiator)\n", + __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); + goto connected; + case RDMA_CM_EVENT_CONNECT_ERROR: + connstate = -ENOTCONN; + goto connected; + case RDMA_CM_EVENT_UNREACHABLE: + connstate = -ENETDOWN; + goto connected; + case RDMA_CM_EVENT_REJECTED: + connstate = -ECONNREFUSED; + goto connected; + case RDMA_CM_EVENT_DISCONNECTED: + connstate = -ECONNABORTED; + goto connected; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + connstate = -ENODEV; +connected: + dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" + " (ep 0x%p event 0x%x)\n", + __func__, + (event->event <= 11) ? conn[event->event] : + "unknown connection error", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + ep, event->event); + atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); + dprintk("RPC: %s: %sconnected\n", + __func__, connstate > 0 ? "" : "dis"); + ep->rep_connected = connstate; + ep->rep_func(ep); + wake_up_all(&ep->rep_connect_wait); + break; + default: + ia->ri_async_rc = -EINVAL; + dprintk("RPC: %s: unexpected CM event %X\n", + __func__, event->event); + complete(&ia->ri_done); + break; + } + + return 0; +} + +static struct rdma_cm_id * +rpcrdma_create_id(struct rpcrdma_xprt *xprt, + struct rpcrdma_ia *ia, struct sockaddr *addr) +{ + struct rdma_cm_id *id; + int rc; + + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + dprintk("RPC: %s: rdma_create_id() failed %i\n", + __func__, rc); + return id; + } + + ia->ri_async_rc = 0; + rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); + if (rc) { + dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", + __func__, rc); + goto out; + } + wait_for_completion(&ia->ri_done); + rc = ia->ri_async_rc; + if (rc) + goto out; + + ia->ri_async_rc = 0; + rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); + if (rc) { + dprintk("RPC: %s: rdma_resolve_route() failed %i\n", + __func__, rc); + goto out; + } + wait_for_completion(&ia->ri_done); + rc = ia->ri_async_rc; + if (rc) + goto out; + + return id; + +out: + rdma_destroy_id(id); + return ERR_PTR(rc); +} + +/* + * Drain any cq, prior to teardown. + */ +static void +rpcrdma_clean_cq(struct ib_cq *cq) +{ + struct ib_wc wc; + int count = 0; + + while (1 == ib_poll_cq(cq, 1, &wc)) + ++count; + + if (count) + dprintk("RPC: %s: flushed %d events (last 0x%x)\n", + __func__, count, wc.opcode); +} + +/* + * Exported functions. + */ + +/* + * Open and initialize an Interface Adapter. + * o initializes fields of struct rpcrdma_ia, including + * interface and provider attributes and protection zone. + */ +int +rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) +{ + int rc; + struct rpcrdma_ia *ia = &xprt->rx_ia; + + init_completion(&ia->ri_done); + + ia->ri_id = rpcrdma_create_id(xprt, ia, addr); + if (IS_ERR(ia->ri_id)) { + rc = PTR_ERR(ia->ri_id); + goto out1; + } + + ia->ri_pd = ib_alloc_pd(ia->ri_id->device); + if (IS_ERR(ia->ri_pd)) { + rc = PTR_ERR(ia->ri_pd); + dprintk("RPC: %s: ib_alloc_pd() failed %i\n", + __func__, rc); + goto out2; + } + + /* + * Optionally obtain an underlying physical identity mapping in + * order to do a memory window-based bind. This base registration + * is protected from remote access - that is enabled only by binding + * for the specific bytes targeted during each RPC operation, and + * revoked after the corresponding completion similar to a storage + * adapter. + */ + if (memreg > RPCRDMA_REGISTER) { + int mem_priv = IB_ACCESS_LOCAL_WRITE; + switch (memreg) { +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + mem_priv |= IB_ACCESS_REMOTE_WRITE; + mem_priv |= IB_ACCESS_REMOTE_READ; + break; +#endif + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + mem_priv |= IB_ACCESS_MW_BIND; + break; + default: + break; + } + ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); + if (IS_ERR(ia->ri_bind_mem)) { + printk(KERN_ALERT "%s: ib_get_dma_mr for " + "phys register failed with %lX\n\t" + "Will continue with degraded performance\n", + __func__, PTR_ERR(ia->ri_bind_mem)); + memreg = RPCRDMA_REGISTER; + ia->ri_bind_mem = NULL; + } + } + + /* Else will do memory reg/dereg for each chunk */ + ia->ri_memreg_strategy = memreg; + + return 0; +out2: + rdma_destroy_id(ia->ri_id); +out1: + return rc; +} + +/* + * Clean up/close an IA. + * o if event handles and PD have been initialized, free them. + * o close the IA + */ +void +rpcrdma_ia_close(struct rpcrdma_ia *ia) +{ + int rc; + + dprintk("RPC: %s: entering\n", __func__); + if (ia->ri_bind_mem != NULL) { + rc = ib_dereg_mr(ia->ri_bind_mem); + dprintk("RPC: %s: ib_dereg_mr returned %i\n", + __func__, rc); + } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) + rdma_destroy_qp(ia->ri_id); + if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { + rc = ib_dealloc_pd(ia->ri_pd); + dprintk("RPC: %s: ib_dealloc_pd returned %i\n", + __func__, rc); + } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) + rdma_destroy_id(ia->ri_id); +} + +/* + * Create unconnected endpoint. + */ +int +rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, + struct rpcrdma_create_data_internal *cdata) +{ + struct ib_device_attr devattr; + int rc; + + rc = ib_query_device(ia->ri_id->device, &devattr); + if (rc) { + dprintk("RPC: %s: ib_query_device failed %d\n", + __func__, rc); + return rc; + } + + /* check provider's send/recv wr limits */ + if (cdata->max_requests > devattr.max_qp_wr) + cdata->max_requests = devattr.max_qp_wr; + + ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; + ep->rep_attr.qp_context = ep; + /* send_cq and recv_cq initialized below */ + ep->rep_attr.srq = NULL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + /* Add room for mw_binds+unbinds - overkill! */ + ep->rep_attr.cap.max_send_wr++; + ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) + return -EINVAL; + break; + default: + break; + } + ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); + ep->rep_attr.cap.max_recv_sge = 1; + ep->rep_attr.cap.max_inline_data = 0; + ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + ep->rep_attr.qp_type = IB_QPT_RC; + ep->rep_attr.port_num = ~0; + + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " + "iovs: send %d recv %d\n", + __func__, + ep->rep_attr.cap.max_send_wr, + ep->rep_attr.cap.max_recv_wr, + ep->rep_attr.cap.max_send_sge, + ep->rep_attr.cap.max_recv_sge); + + /* set trigger for requesting send completion */ + ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + ep->rep_cqinit -= RPCRDMA_MAX_SEGS; + break; + default: + break; + } + if (ep->rep_cqinit <= 2) + ep->rep_cqinit = 0; + INIT_CQCOUNT(ep); + ep->rep_ia = ia; + init_waitqueue_head(&ep->rep_connect_wait); + + /* + * Create a single cq for receive dto and mw_bind (only ever + * care about unbind, really). Send completions are suppressed. + * Use single threaded tasklet upcalls to maintain ordering. + */ + ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, + rpcrdma_cq_async_error_upcall, NULL, + ep->rep_attr.cap.max_recv_wr + + ep->rep_attr.cap.max_send_wr + 1, 0); + if (IS_ERR(ep->rep_cq)) { + rc = PTR_ERR(ep->rep_cq); + dprintk("RPC: %s: ib_create_cq failed: %i\n", + __func__, rc); + goto out1; + } + + rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); + if (rc) { + dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", + __func__, rc); + goto out2; + } + + ep->rep_attr.send_cq = ep->rep_cq; + ep->rep_attr.recv_cq = ep->rep_cq; + + /* Initialize cma parameters */ + + /* RPC/RDMA does not use private data */ + ep->rep_remote_cma.private_data = NULL; + ep->rep_remote_cma.private_data_len = 0; + + /* Client offers RDMA Read but does not initiate */ + switch (ia->ri_memreg_strategy) { + case RPCRDMA_BOUNCEBUFFERS: + ep->rep_remote_cma.responder_resources = 0; + break; + case RPCRDMA_MTHCAFMR: + case RPCRDMA_REGISTER: + ep->rep_remote_cma.responder_resources = cdata->max_requests * + (RPCRDMA_MAX_DATA_SEGS / 8); + break; + case RPCRDMA_MEMWINDOWS: + case RPCRDMA_MEMWINDOWS_ASYNC: +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: +#endif + ep->rep_remote_cma.responder_resources = cdata->max_requests * + (RPCRDMA_MAX_DATA_SEGS / 2); + break; + default: + break; + } + if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) + ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; + ep->rep_remote_cma.initiator_depth = 0; + + ep->rep_remote_cma.retry_count = 7; + ep->rep_remote_cma.flow_control = 0; + ep->rep_remote_cma.rnr_retry_count = 0; + + return 0; + +out2: + if (ib_destroy_cq(ep->rep_cq)) + ; +out1: + return rc; +} + +/* + * rpcrdma_ep_destroy + * + * Disconnect and destroy endpoint. After this, the only + * valid operations on the ep are to free it (if dynamically + * allocated) or re-create it. + * + * The caller's error handling must be sure to not leak the endpoint + * if this function fails. + */ +int +rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + int rc; + + dprintk("RPC: %s: entering, connected is %d\n", + __func__, ep->rep_connected); + + if (ia->ri_id->qp) { + rc = rpcrdma_ep_disconnect(ep, ia); + if (rc) + dprintk("RPC: %s: rpcrdma_ep_disconnect" + " returned %i\n", __func__, rc); + } + + ep->rep_func = NULL; + + /* padding - could be done in rpcrdma_buffer_destroy... */ + if (ep->rep_pad_mr) { + rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); + ep->rep_pad_mr = NULL; + } + + if (ia->ri_id->qp) { + rdma_destroy_qp(ia->ri_id); + ia->ri_id->qp = NULL; + } + + rpcrdma_clean_cq(ep->rep_cq); + rc = ib_destroy_cq(ep->rep_cq); + if (rc) + dprintk("RPC: %s: ib_destroy_cq returned %i\n", + __func__, rc); + + return rc; +} + +/* + * Connect unconnected endpoint. + */ +int +rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + struct rdma_cm_id *id; + int rc = 0; + int retry_count = 0; + int reconnect = (ep->rep_connected != 0); + + if (reconnect) { + struct rpcrdma_xprt *xprt; +retry: + rc = rpcrdma_ep_disconnect(ep, ia); + if (rc && rc != -ENOTCONN) + dprintk("RPC: %s: rpcrdma_ep_disconnect" + " status %i\n", __func__, rc); + rpcrdma_clean_cq(ep->rep_cq); + + xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); + id = rpcrdma_create_id(xprt, ia, + (struct sockaddr *)&xprt->rx_data.addr); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + goto out; + } + /* TEMP TEMP TEMP - fail if new device: + * Deregister/remarshal *all* requests! + * Close and recreate adapter, pd, etc! + * Re-determine all attributes still sane! + * More stuff I haven't thought of! + * Rrrgh! + */ + if (ia->ri_id->device != id->device) { + printk("RPC: %s: can't reconnect on " + "different device!\n", __func__); + rdma_destroy_id(id); + rc = -ENETDOWN; + goto out; + } + /* END TEMP */ + rdma_destroy_id(ia->ri_id); + ia->ri_id = id; + } + + rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); + if (rc) { + dprintk("RPC: %s: rdma_create_qp failed %i\n", + __func__, rc); + goto out; + } + +/* XXX Tavor device performs badly with 2K MTU! */ +if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { + struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); + if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && + (pcid->vendor == PCI_VENDOR_ID_MELLANOX || + pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { + struct ib_qp_attr attr = { + .path_mtu = IB_MTU_1024 + }; + rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); + } +} + + /* Theoretically a client initiator_depth > 0 is not needed, + * but many peers fail to complete the connection unless they + * == responder_resources! */ + if (ep->rep_remote_cma.initiator_depth != + ep->rep_remote_cma.responder_resources) + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; + + ep->rep_connected = 0; + + rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); + if (rc) { + dprintk("RPC: %s: rdma_connect() failed with %i\n", + __func__, rc); + goto out; + } + + if (reconnect) + return 0; + + wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); + + /* + * Check state. A non-peer reject indicates no listener + * (ECONNREFUSED), which may be a transient state. All + * others indicate a transport condition which has already + * undergone a best-effort. + */ + if (ep->rep_connected == -ECONNREFUSED + && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { + dprintk("RPC: %s: non-peer_reject, retry\n", __func__); + goto retry; + } + if (ep->rep_connected <= 0) { + /* Sometimes, the only way to reliably connect to remote + * CMs is to use same nonzero values for ORD and IRD. */ + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; + if (ep->rep_remote_cma.initiator_depth == 0) + ++ep->rep_remote_cma.initiator_depth; + if (ep->rep_remote_cma.responder_resources == 0) + ++ep->rep_remote_cma.responder_resources; + if (retry_count++ == 0) + goto retry; + rc = ep->rep_connected; + } else { + dprintk("RPC: %s: connected\n", __func__); + } + +out: + if (rc) + ep->rep_connected = rc; + return rc; +} + +/* + * rpcrdma_ep_disconnect + * + * This is separate from destroy to facilitate the ability + * to reconnect without recreating the endpoint. + * + * This call is not reentrant, and must not be made in parallel + * on the same endpoint. + */ +int +rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + int rc; + + rpcrdma_clean_cq(ep->rep_cq); + rc = rdma_disconnect(ia->ri_id); + if (!rc) { + /* returns without wait if not connected */ + wait_event_interruptible(ep->rep_connect_wait, + ep->rep_connected != 1); + dprintk("RPC: %s: after wait, %sconnected\n", __func__, + (ep->rep_connected == 1) ? "still " : "dis"); + } else { + dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); + ep->rep_connected = rc; + } + return rc; +} + +/* + * Initialize buffer memory + */ +int +rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, + struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) +{ + char *p; + size_t len; + int i, rc; + + buf->rb_max_requests = cdata->max_requests; + spin_lock_init(&buf->rb_lock); + atomic_set(&buf->rb_credits, 1); + + /* Need to allocate: + * 1. arrays for send and recv pointers + * 2. arrays of struct rpcrdma_req to fill in pointers + * 3. array of struct rpcrdma_rep for replies + * 4. padding, if any + * 5. mw's, if any + * Send/recv buffers in req/rep need to be registered + */ + + len = buf->rb_max_requests * + (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); + len += cdata->padding; + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + /* TBD we are perhaps overallocating here */ + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; + default: + break; + } + + /* allocate 1, 4 and 5 in one shot */ + p = kzalloc(len, GFP_KERNEL); + if (p == NULL) { + dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", + __func__, len); + rc = -ENOMEM; + goto out; + } + buf->rb_pool = p; /* for freeing it later */ + + buf->rb_send_bufs = (struct rpcrdma_req **) p; + p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; + buf->rb_recv_bufs = (struct rpcrdma_rep **) p; + p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; + + /* + * Register the zeroed pad buffer, if any. + */ + if (cdata->padding) { + rc = rpcrdma_register_internal(ia, p, cdata->padding, + &ep->rep_pad_mr, &ep->rep_pad); + if (rc) + goto out; + } + p += cdata->padding; + + /* + * Allocate the fmr's, or mw's for mw_bind chunk registration. + * We "cycle" the mw's in order to minimize rkey reuse, + * and also reduce unbind-to-bind collision. + */ + INIT_LIST_HEAD(&buf->rb_mws); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + { + struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; + struct ib_fmr_attr fa = { + RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT + }; + /* TBD we are perhaps overallocating here */ + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + r->r.fmr = ib_alloc_fmr(ia->ri_pd, + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, + &fa); + if (IS_ERR(r->r.fmr)) { + rc = PTR_ERR(r->r.fmr); + dprintk("RPC: %s: ib_alloc_fmr" + " failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + } + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; + /* Allocate one extra request's worth, for full cycling */ + for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + r->r.mw = ib_alloc_mw(ia->ri_pd); + if (IS_ERR(r->r.mw)) { + rc = PTR_ERR(r->r.mw); + dprintk("RPC: %s: ib_alloc_mw" + " failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + } + break; + default: + break; + } + + /* + * Allocate/init the request/reply buffers. Doing this + * using kmalloc for now -- one for each buf. + */ + for (i = 0; i < buf->rb_max_requests; i++) { + struct rpcrdma_req *req; + struct rpcrdma_rep *rep; + + len = cdata->inline_wsize + sizeof(struct rpcrdma_req); + /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ + /* Typical ~2400b, so rounding up saves work later */ + if (len < 4096) + len = 4096; + req = kmalloc(len, GFP_KERNEL); + if (req == NULL) { + dprintk("RPC: %s: request buffer %d alloc" + " failed\n", __func__, i); + rc = -ENOMEM; + goto out; + } + memset(req, 0, sizeof(struct rpcrdma_req)); + buf->rb_send_bufs[i] = req; + buf->rb_send_bufs[i]->rl_buffer = buf; + + rc = rpcrdma_register_internal(ia, req->rl_base, + len - offsetof(struct rpcrdma_req, rl_base), + &buf->rb_send_bufs[i]->rl_handle, + &buf->rb_send_bufs[i]->rl_iov); + if (rc) + goto out; + + buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); + + len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); + rep = kmalloc(len, GFP_KERNEL); + if (rep == NULL) { + dprintk("RPC: %s: reply buffer %d alloc failed\n", + __func__, i); + rc = -ENOMEM; + goto out; + } + memset(rep, 0, sizeof(struct rpcrdma_rep)); + buf->rb_recv_bufs[i] = rep; + buf->rb_recv_bufs[i]->rr_buffer = buf; + init_waitqueue_head(&rep->rr_unbind); + + rc = rpcrdma_register_internal(ia, rep->rr_base, + len - offsetof(struct rpcrdma_rep, rr_base), + &buf->rb_recv_bufs[i]->rr_handle, + &buf->rb_recv_bufs[i]->rr_iov); + if (rc) + goto out; + + } + dprintk("RPC: %s: max_requests %d\n", + __func__, buf->rb_max_requests); + /* done */ + return 0; +out: + rpcrdma_buffer_destroy(buf); + return rc; +} + +/* + * Unregister and destroy buffer memory. Need to deal with + * partial initialization, so it's callable from failed create. + * Must be called before destroying endpoint, as registrations + * reference it. + */ +void +rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) +{ + int rc, i; + struct rpcrdma_ia *ia = rdmab_to_ia(buf); + + /* clean up in reverse order from create + * 1. recv mr memory (mr free, then kfree) + * 1a. bind mw memory + * 2. send mr memory (mr free, then kfree) + * 3. padding (if any) [moved to rpcrdma_ep_destroy] + * 4. arrays + */ + dprintk("RPC: %s: entering\n", __func__); + + for (i = 0; i < buf->rb_max_requests; i++) { + if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { + rpcrdma_deregister_internal(ia, + buf->rb_recv_bufs[i]->rr_handle, + &buf->rb_recv_bufs[i]->rr_iov); + kfree(buf->rb_recv_bufs[i]); + } + if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { + while (!list_empty(&buf->rb_mws)) { + struct rpcrdma_mw *r; + r = list_entry(buf->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + rc = ib_dealloc_fmr(r->r.fmr); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_fmr" + " failed %i\n", + __func__, rc); + break; + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + rc = ib_dealloc_mw(r->r.mw); + if (rc) + dprintk("RPC: %s:" + " ib_dealloc_mw" + " failed %i\n", + __func__, rc); + break; + default: + break; + } + } + rpcrdma_deregister_internal(ia, + buf->rb_send_bufs[i]->rl_handle, + &buf->rb_send_bufs[i]->rl_iov); + kfree(buf->rb_send_bufs[i]); + } + } + + kfree(buf->rb_pool); +} + +/* + * Get a set of request/reply buffers. + * + * Reply buffer (if needed) is attached to send buffer upon return. + * Rule: + * rb_send_index and rb_recv_index MUST always be pointing to the + * *next* available buffer (non-NULL). They are incremented after + * removing buffers, and decremented *before* returning them. + */ +struct rpcrdma_req * +rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) +{ + struct rpcrdma_req *req; + unsigned long flags; + + spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_send_index == buffers->rb_max_requests) { + spin_unlock_irqrestore(&buffers->rb_lock, flags); + dprintk("RPC: %s: out of request buffers\n", __func__); + return ((struct rpcrdma_req *)NULL); + } + + req = buffers->rb_send_bufs[buffers->rb_send_index]; + if (buffers->rb_send_index < buffers->rb_recv_index) { + dprintk("RPC: %s: %d extra receives outstanding (ok)\n", + __func__, + buffers->rb_recv_index - buffers->rb_send_index); + req->rl_reply = NULL; + } else { + req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; + buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; + } + buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; + if (!list_empty(&buffers->rb_mws)) { + int i = RPCRDMA_MAX_SEGS - 1; + do { + struct rpcrdma_mw *r; + r = list_entry(buffers->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + req->rl_segments[i].mr_chunk.rl_mw = r; + } while (--i >= 0); + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); + return req; +} + +/* + * Put request/reply buffers back into pool. + * Pre-decrement counter/array index. + */ +void +rpcrdma_buffer_put(struct rpcrdma_req *req) +{ + struct rpcrdma_buffer *buffers = req->rl_buffer; + struct rpcrdma_ia *ia = rdmab_to_ia(buffers); + int i; + unsigned long flags; + + BUG_ON(req->rl_nchunks != 0); + spin_lock_irqsave(&buffers->rb_lock, flags); + buffers->rb_send_bufs[--buffers->rb_send_index] = req; + req->rl_niovs = 0; + if (req->rl_reply) { + buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; + init_waitqueue_head(&req->rl_reply->rr_unbind); + req->rl_reply->rr_func = NULL; + req->rl_reply = NULL; + } + switch (ia->ri_memreg_strategy) { + case RPCRDMA_MTHCAFMR: + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + /* + * Cycle mw's back in reverse order, and "spin" them. + * This delays and scrambles reuse as much as possible. + */ + i = 1; + do { + struct rpcrdma_mw **mw; + mw = &req->rl_segments[i].mr_chunk.rl_mw; + list_add_tail(&(*mw)->mw_list, &buffers->rb_mws); + *mw = NULL; + } while (++i < RPCRDMA_MAX_SEGS); + list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list, + &buffers->rb_mws); + req->rl_segments[0].mr_chunk.rl_mw = NULL; + break; + default: + break; + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Recover reply buffers from pool. + * This happens when recovering from error conditions. + * Post-increment counter/array index. + */ +void +rpcrdma_recv_buffer_get(struct rpcrdma_req *req) +{ + struct rpcrdma_buffer *buffers = req->rl_buffer; + unsigned long flags; + + if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ + buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; + spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_recv_index < buffers->rb_max_requests) { + req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; + buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; + } + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Put reply buffers back into pool when not attached to + * request. This happens in error conditions, and when + * aborting unbinds. Pre-decrement counter/array index. + */ +void +rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) +{ + struct rpcrdma_buffer *buffers = rep->rr_buffer; + unsigned long flags; + + rep->rr_func = NULL; + spin_lock_irqsave(&buffers->rb_lock, flags); + buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; + spin_unlock_irqrestore(&buffers->rb_lock, flags); +} + +/* + * Wrappers for internal-use kmalloc memory registration, used by buffer code. + */ + +int +rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, + struct ib_mr **mrp, struct ib_sge *iov) +{ + struct ib_phys_buf ipb; + struct ib_mr *mr; + int rc; + + /* + * All memory passed here was kmalloc'ed, therefore phys-contiguous. + */ + iov->addr = ib_dma_map_single(ia->ri_id->device, + va, len, DMA_BIDIRECTIONAL); + iov->length = len; + + if (ia->ri_bind_mem != NULL) { + *mrp = NULL; + iov->lkey = ia->ri_bind_mem->lkey; + return 0; + } + + ipb.addr = iov->addr; + ipb.size = iov->length; + mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1, + IB_ACCESS_LOCAL_WRITE, &iov->addr); + + dprintk("RPC: %s: phys convert: 0x%llx " + "registered 0x%llx length %d\n", + __func__, (unsigned long long)ipb.addr, + (unsigned long long)iov->addr, len); + + if (IS_ERR(mr)) { + *mrp = NULL; + rc = PTR_ERR(mr); + dprintk("RPC: %s: failed with %i\n", __func__, rc); + } else { + *mrp = mr; + iov->lkey = mr->lkey; + rc = 0; + } + + return rc; +} + +int +rpcrdma_deregister_internal(struct rpcrdma_ia *ia, + struct ib_mr *mr, struct ib_sge *iov) +{ + int rc; + + ib_dma_unmap_single(ia->ri_id->device, + iov->addr, iov->length, DMA_BIDIRECTIONAL); + + if (NULL == mr) + return 0; + + rc = ib_dereg_mr(mr); + if (rc) + dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc); + return rc; +} + +/* + * Wrappers for chunk registration, shared by read/write chunk code. + */ + +static void +rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) +{ + seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + seg->mr_dmalen = seg->mr_len; + if (seg->mr_page) + seg->mr_dma = ib_dma_map_page(ia->ri_id->device, + seg->mr_page, offset_in_page(seg->mr_offset), + seg->mr_dmalen, seg->mr_dir); + else + seg->mr_dma = ib_dma_map_single(ia->ri_id->device, + seg->mr_offset, + seg->mr_dmalen, seg->mr_dir); +} + +static void +rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) +{ + if (seg->mr_page) + ib_dma_unmap_page(ia->ri_id->device, + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); + else + ib_dma_unmap_single(ia->ri_id->device, + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); +} + +int +rpcrdma_register_external(struct rpcrdma_mr_seg *seg, + int nsegs, int writing, struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct rpcrdma_mr_seg *seg1 = seg; + int i; + int rc = 0; + + switch (ia->ri_memreg_strategy) { + +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + rpcrdma_map_one(ia, seg, writing); + seg->mr_rkey = ia->ri_bind_mem->rkey; + seg->mr_base = seg->mr_dma; + seg->mr_nsegs = 1; + nsegs = 1; + break; +#endif + + /* Registration using fast memory registration */ + case RPCRDMA_MTHCAFMR: + { + u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; + int len, pageoff = offset_in_page(seg->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (nsegs > RPCRDMA_MAX_DATA_SEGS) + nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < nsegs;) { + rpcrdma_map_one(ia, seg, writing); + physaddrs[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + nsegs = i; + rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, + physaddrs, nsegs, seg1->mr_dma); + if (rc) { + dprintk("RPC: %s: failed ib_map_phys_fmr " + "%u@0x%llx+%i (%d)... status %i\n", __func__, + len, (unsigned long long)seg1->mr_dma, + pageoff, nsegs, rc); + while (nsegs--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = nsegs; + seg1->mr_len = len; + } + } + break; + + /* Registration using memory windows */ + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct ib_mw_bind param; + rpcrdma_map_one(ia, seg, writing); + param.mr = ia->ri_bind_mem; + param.wr_id = 0ULL; /* no send cookie */ + param.addr = seg->mr_dma; + param.length = seg->mr_len; + param.send_flags = 0; + param.mw_access_flags = mem_priv; + + DECR_CQCOUNT(&r_xprt->rx_ep); + rc = ib_bind_mw(ia->ri_id->qp, + seg->mr_chunk.rl_mw->r.mw, ¶m); + if (rc) { + dprintk("RPC: %s: failed ib_bind_mw " + "%u@0x%llx status %i\n", + __func__, seg->mr_len, + (unsigned long long)seg->mr_dma, rc); + rpcrdma_unmap_one(ia, seg); + } else { + seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; + seg->mr_base = param.addr; + seg->mr_nsegs = 1; + nsegs = 1; + } + } + break; + + /* Default registration each time */ + default: + { + struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; + int len = 0; + if (nsegs > RPCRDMA_MAX_DATA_SEGS) + nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < nsegs;) { + rpcrdma_map_one(ia, seg, writing); + ipb[i].addr = seg->mr_dma; + ipb[i].size = seg->mr_len; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + nsegs = i; + seg1->mr_base = seg1->mr_dma; + seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, + ipb, nsegs, mem_priv, &seg1->mr_base); + if (IS_ERR(seg1->mr_chunk.rl_mr)) { + rc = PTR_ERR(seg1->mr_chunk.rl_mr); + dprintk("RPC: %s: failed ib_reg_phys_mr " + "%u@0x%llx (%d)... status %i\n", + __func__, len, + (unsigned long long)seg1->mr_dma, nsegs, rc); + while (nsegs--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; + seg1->mr_nsegs = nsegs; + seg1->mr_len = len; + } + } + break; + } + if (rc) + return -1; + + return nsegs; +} + +int +rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_xprt *r_xprt, void *r) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_mr_seg *seg1 = seg; + int nsegs = seg->mr_nsegs, rc; + + switch (ia->ri_memreg_strategy) { + +#if RPCRDMA_PERSISTENT_REGISTRATION + case RPCRDMA_ALLPHYSICAL: + BUG_ON(nsegs != 1); + rpcrdma_unmap_one(ia, seg); + rc = 0; + break; +#endif + + case RPCRDMA_MTHCAFMR: + { + LIST_HEAD(l); + list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + } + if (rc) + dprintk("RPC: %s: failed ib_unmap_fmr," + " status %i\n", __func__, rc); + break; + + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + { + struct ib_mw_bind param; + BUG_ON(nsegs != 1); + param.mr = ia->ri_bind_mem; + param.addr = 0ULL; /* unbind */ + param.length = 0; + param.mw_access_flags = 0; + if (r) { + param.wr_id = (u64) (unsigned long) r; + param.send_flags = IB_SEND_SIGNALED; + INIT_CQCOUNT(&r_xprt->rx_ep); + } else { + param.wr_id = 0ULL; + param.send_flags = 0; + DECR_CQCOUNT(&r_xprt->rx_ep); + } + rc = ib_bind_mw(ia->ri_id->qp, + seg->mr_chunk.rl_mw->r.mw, ¶m); + rpcrdma_unmap_one(ia, seg); + } + if (rc) + dprintk("RPC: %s: failed ib_(un)bind_mw," + " status %i\n", __func__, rc); + else + r = NULL; /* will upcall on completion */ + break; + + default: + rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); + seg1->mr_chunk.rl_mr = NULL; + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_dereg_mr," + " status %i\n", __func__, rc); + break; + } + if (r) { + struct rpcrdma_rep *rep = r; + void (*func)(struct rpcrdma_rep *) = rep->rr_func; + rep->rr_func = NULL; + func(rep); /* dereg done, callback now */ + } + return nsegs; +} + +/* + * Prepost any receive buffer, then post send. + * + * Receive buffer is donated to hardware, reclaimed upon recv completion. + */ +int +rpcrdma_ep_post(struct rpcrdma_ia *ia, + struct rpcrdma_ep *ep, + struct rpcrdma_req *req) +{ + struct ib_send_wr send_wr, *send_wr_fail; + struct rpcrdma_rep *rep = req->rl_reply; + int rc; + + if (rep) { + rc = rpcrdma_ep_post_recv(ia, ep, rep); + if (rc) + goto out; + req->rl_reply = NULL; + } + + send_wr.next = NULL; + send_wr.wr_id = 0ULL; /* no send cookie */ + send_wr.sg_list = req->rl_send_iov; + send_wr.num_sge = req->rl_niovs; + send_wr.opcode = IB_WR_SEND; + send_wr.imm_data = 0; + if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[3].addr, req->rl_send_iov[3].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[1].addr, req->rl_send_iov[1].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_id->device, + req->rl_send_iov[0].addr, req->rl_send_iov[0].length, + DMA_TO_DEVICE); + + if (DECR_CQCOUNT(ep) > 0) + send_wr.send_flags = 0; + else { /* Provider must take a send completion every now and then */ + INIT_CQCOUNT(ep); + send_wr.send_flags = IB_SEND_SIGNALED; + } + + rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); + if (rc) + dprintk("RPC: %s: ib_post_send returned %i\n", __func__, + rc); +out: + return rc; +} + +/* + * (Re)post a receive buffer. + */ +int +rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, + struct rpcrdma_ep *ep, + struct rpcrdma_rep *rep) +{ + struct ib_recv_wr recv_wr, *recv_wr_fail; + int rc; + + recv_wr.next = NULL; + recv_wr.wr_id = (u64) (unsigned long) rep; + recv_wr.sg_list = &rep->rr_iov; + recv_wr.num_sge = 1; + + ib_dma_sync_single_for_cpu(ia->ri_id->device, + rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + + DECR_CQCOUNT(ep); + rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); + + if (rc) + dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, + rc); + return rc; +} diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h new file mode 100644 index 00000000000..2427822f8bd --- /dev/null +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the BSD-type + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * Neither the name of the Network Appliance, Inc. nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_SUNRPC_XPRT_RDMA_H +#define _LINUX_SUNRPC_XPRT_RDMA_H + +#include <linux/wait.h> /* wait_queue_head_t, etc */ +#include <linux/spinlock.h> /* spinlock_t, etc */ +#include <asm/atomic.h> /* atomic_t, etc */ + +#include <rdma/rdma_cm.h> /* RDMA connection api */ +#include <rdma/ib_verbs.h> /* RDMA verbs api */ + +#include <linux/sunrpc/clnt.h> /* rpc_xprt */ +#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ +#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ + +/* + * Interface Adapter -- one per transport instance + */ +struct rpcrdma_ia { + struct rdma_cm_id *ri_id; + struct ib_pd *ri_pd; + struct ib_mr *ri_bind_mem; + struct completion ri_done; + int ri_async_rc; + enum rpcrdma_memreg ri_memreg_strategy; +}; + +/* + * RDMA Endpoint -- one per transport instance + */ + +struct rpcrdma_ep { + atomic_t rep_cqcount; + int rep_cqinit; + int rep_connected; + struct rpcrdma_ia *rep_ia; + struct ib_cq *rep_cq; + struct ib_qp_init_attr rep_attr; + wait_queue_head_t rep_connect_wait; + struct ib_sge rep_pad; /* holds zeroed pad */ + struct ib_mr *rep_pad_mr; /* holds zeroed pad */ + void (*rep_func)(struct rpcrdma_ep *); + struct rpc_xprt *rep_xprt; /* for rep_func */ + struct rdma_conn_param rep_remote_cma; + struct sockaddr_storage rep_remote_addr; +}; + +#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) +#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) + +/* + * struct rpcrdma_rep -- this structure encapsulates state required to recv + * and complete a reply, asychronously. It needs several pieces of + * state: + * o recv buffer (posted to provider) + * o ib_sge (also donated to provider) + * o status of reply (length, success or not) + * o bookkeeping state to get run by tasklet (list, etc) + * + * These are allocated during initialization, per-transport instance; + * however, the tasklet execution list itself is global, as it should + * always be pretty short. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + */ + +/* temporary static scatter/gather max */ +#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ +#define MAX_RPCRDMAHDR (\ + /* max supported RPC/RDMA header */ \ + sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ + (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) + +struct rpcrdma_buffer; + +struct rpcrdma_rep { + unsigned int rr_len; /* actual received reply length */ + struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ + struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ + void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ + struct list_head rr_list; /* tasklet list */ + wait_queue_head_t rr_unbind; /* optional unbind wait */ + struct ib_sge rr_iov; /* for posting */ + struct ib_mr *rr_handle; /* handle for mem in rr_iov */ + char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ +}; + +/* + * struct rpcrdma_req -- structure central to the request/reply sequence. + * + * N of these are associated with a transport instance, and stored in + * struct rpcrdma_buffer. N is the max number of outstanding requests. + * + * It includes pre-registered buffer memory for send AND recv. + * The recv buffer, however, is not owned by this structure, and + * is "donated" to the hardware when a recv is posted. When a + * reply is handled, the recv buffer used is given back to the + * struct rpcrdma_req associated with the request. + * + * In addition to the basic memory, this structure includes an array + * of iovs for send operations. The reason is that the iovs passed to + * ib_post_{send,recv} must not be modified until the work request + * completes. + * + * NOTES: + * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we + * marshal. The number needed varies depending on the iov lists that + * are passed to us, the memory registration mode we are in, and if + * physical addressing is used, the layout. + */ + +struct rpcrdma_mr_seg { /* chunk descriptors */ + union { /* chunk memory handles */ + struct ib_mr *rl_mr; /* if registered directly */ + struct rpcrdma_mw { /* if registered from region */ + union { + struct ib_mw *mw; + struct ib_fmr *fmr; + } r; + struct list_head mw_list; + } *rl_mw; + } mr_chunk; + u64 mr_base; /* registration result */ + u32 mr_rkey; /* registration result */ + u32 mr_len; /* length of chunk or segment */ + int mr_nsegs; /* number of segments in chunk or 0 */ + enum dma_data_direction mr_dir; /* segment mapping direction */ + dma_addr_t mr_dma; /* segment mapping address */ + size_t mr_dmalen; /* segment mapping length */ + struct page *mr_page; /* owning page, if any */ + char *mr_offset; /* kva if no page, else offset */ +}; + +struct rpcrdma_req { + size_t rl_size; /* actual length of buffer */ + unsigned int rl_niovs; /* 0, 2 or 4 */ + unsigned int rl_nchunks; /* non-zero if chunks */ + struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ + struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ + struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ + struct ib_sge rl_send_iov[4]; /* for active requests */ + struct ib_sge rl_iov; /* for posting */ + struct ib_mr *rl_handle; /* handle for mem in rl_iov */ + char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ + __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ +}; +#define rpcr_to_rdmar(r) \ + container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) + +/* + * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for + * inline requests/replies, and client/server credits. + * + * One of these is associated with a transport instance + */ +struct rpcrdma_buffer { + spinlock_t rb_lock; /* protects indexes */ + atomic_t rb_credits; /* most recent server credits */ + unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ + int rb_max_requests;/* client max requests */ + struct list_head rb_mws; /* optional memory windows/fmrs */ + int rb_send_index; + struct rpcrdma_req **rb_send_bufs; + int rb_recv_index; + struct rpcrdma_rep **rb_recv_bufs; + char *rb_pool; +}; +#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) + +/* + * Internal structure for transport instance creation. This + * exists primarily for modularity. + * + * This data should be set with mount options + */ +struct rpcrdma_create_data_internal { + struct sockaddr_storage addr; /* RDMA server address */ + unsigned int max_requests; /* max requests (slots) in flight */ + unsigned int rsize; /* mount rsize - max read hdr+data */ + unsigned int wsize; /* mount wsize - max write hdr+data */ + unsigned int inline_rsize; /* max non-rdma read data payload */ + unsigned int inline_wsize; /* max non-rdma write data payload */ + unsigned int padding; /* non-rdma write header padding */ +}; + +#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) + +#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ + (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) + +#define RPCRDMA_INLINE_PAD_VALUE(rq)\ + rpcx_to_rdmad(rq->rq_task->tk_xprt).padding + +/* + * Statistics for RPCRDMA + */ +struct rpcrdma_stats { + unsigned long read_chunk_count; + unsigned long write_chunk_count; + unsigned long reply_chunk_count; + + unsigned long long total_rdma_request; + unsigned long long total_rdma_reply; + + unsigned long long pullup_copy_count; + unsigned long long fixup_copy_count; + unsigned long hardway_register_count; + unsigned long failed_marshal_count; + unsigned long bad_reply_count; +}; + +/* + * RPCRDMA transport -- encapsulates the structures above for + * integration with RPC. + * + * The contained structures are embedded, not pointers, + * for convenience. This structure need not be visible externally. + * + * It is allocated and initialized during mount, and released + * during unmount. + */ +struct rpcrdma_xprt { + struct rpc_xprt xprt; + struct rpcrdma_ia rx_ia; + struct rpcrdma_ep rx_ep; + struct rpcrdma_buffer rx_buf; + struct rpcrdma_create_data_internal rx_data; + struct delayed_work rdma_connect; + struct rpcrdma_stats rx_stats; +}; + +#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) +#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) + +/* + * Interface Adapter calls - xprtrdma/verbs.c + */ +int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); +void rpcrdma_ia_close(struct rpcrdma_ia *); + +/* + * Endpoint calls - xprtrdma/verbs.c + */ +int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); + +int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_req *); +int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, + struct rpcrdma_rep *); + +/* + * Buffer calls - xprtrdma/verbs.c + */ +int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, + struct rpcrdma_ia *, + struct rpcrdma_create_data_internal *); +void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); + +struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); +void rpcrdma_buffer_put(struct rpcrdma_req *); +void rpcrdma_recv_buffer_get(struct rpcrdma_req *); +void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); + +int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, + struct ib_mr **, struct ib_sge *); +int rpcrdma_deregister_internal(struct rpcrdma_ia *, + struct ib_mr *, struct ib_sge *); + +int rpcrdma_register_external(struct rpcrdma_mr_seg *, + int, int, struct rpcrdma_xprt *); +int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, + struct rpcrdma_xprt *, void *); + +/* + * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c + */ +void rpcrdma_conn_func(struct rpcrdma_ep *); +void rpcrdma_reply_handler(struct rpcrdma_rep *); + +/* + * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c + */ +int rpcrdma_marshal_req(struct rpc_rqst *); + +#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 282efd447a6..02298f529da 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -13,10 +13,14 @@ * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> * * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> + * + * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. + * <gilles.quillard@bull.net> */ #include <linux/types.h> #include <linux/slab.h> +#include <linux/module.h> #include <linux/capability.h> #include <linux/pagemap.h> #include <linux/errno.h> @@ -28,6 +32,7 @@ #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> +#include <linux/sunrpc/xprtsock.h> #include <linux/file.h> #include <net/sock.h> @@ -260,14 +265,29 @@ struct sock_xprt { #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -static void xs_format_peer_addresses(struct rpc_xprt *xprt) +static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) +{ + return (struct sockaddr *) &xprt->addr; +} + +static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + return (struct sockaddr_in *) &xprt->addr; +} + +static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in6 *) &xprt->addr; +} + +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, "%u.%u.%u.%u", + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - if (xprt->prot == IPPROTO_UDP) - xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; - else - xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); +} + +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in6 *addr = xs_addr_in6(xprt); + char *buf; + + buf = kzalloc(40, GFP_KERNEL); + if (buf) { + snprintf(buf, 40, NIP6_FMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + + buf = kzalloc(64, GFP_KERNEL); + if (buf) { + snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(36, GFP_KERNEL); + if (buf) { + snprintf(buf, 36, NIP6_SEQFMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, NIP6_FMT".%u.%u", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + + xprt->address_strings[RPC_DISPLAY_NETID] = + kstrdup(xprt->prot == IPPROTO_UDP ? + RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + int i; + + for (i = 0; i < RPC_DISPLAY_MAX; i++) + kfree(xprt->address_strings[i]); } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, - (struct sockaddr *) &xprt->addr, + xs_addr(xprt), xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (likely(status >= (int) req->rq_slen)) - return 0; - - /* Still some bytes left; set up for a retry later. */ - if (status > 0) + if (status >= 0) { + task->tk_bytes_sent += status; + if (status >= req->rq_slen) + return 0; + /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; + } switch (status) { case -ENETUNREACH: @@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - int status, retry = 0; + int status; + unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_free_peer_addresses(xprt); kfree(xprt->slot); kfree(xprt); + module_put(THIS_MODULE); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + struct sockaddr *addr = xs_addr(xprt); dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - sap->sin_port = htons(port); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + default: + BUG(); + } } -static int xs_bind(struct sock_xprt *transport, struct socket *sock) +static int xs_bind4(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, @@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) else port--; } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", - NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", + __FUNCTION__, NIPQUAD(myaddr.sin_addr), + port, err ? "failed" : "ok", err); + return err; +} + +static int xs_bind6(struct sock_xprt *transport, struct socket *sock) +{ + struct sockaddr_in6 myaddr = { + .sin6_family = AF_INET6, + }; + struct sockaddr_in6 *sa; + int err; + unsigned short port = transport->port; + + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in6 *)&transport->addr; + myaddr.sin6_addr = sa->sin6_addr; + do { + myaddr.sin6_port = htons(port); + err = kernel_bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (!transport->xprt.resvport) + break; + if (err == 0) { + transport->port = port; + break; + } + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; + } while (err == -EADDRINUSE && port != transport->port); + dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", + NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); return err; } @@ -1183,38 +1346,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; + BUG_ON(sock_owned_by_user(sk)); - switch (sk->sk_family) { - case AF_INET: - sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", - &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); - break; + sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", + &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); +} - case AF_INET6: - sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", - &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); - break; +static inline void xs_reclassify_socket6(struct socket *sock) +{ + struct sock *sk = sock->sk; - default: - BUG(); - } + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", + &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } #else -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) +{ +} + +static inline void xs_reclassify_socket6(struct socket *sock) { } #endif +static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + + xprt_set_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + xs_udp_do_set_buffer_size(xprt); +} + /** - * xs_udp_connect_worker - set up a UDP socket + * xs_udp_connect_worker4 - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker(struct work_struct *work) +static void xs_udp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1232,9 +1426,9 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock)) { sock_release(sock); goto out; } @@ -1242,29 +1436,48 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; + xs_udp_finish_connecting(xprt, sock); + status = 0; +out: + xprt_wake_pending_tasks(xprt, status); + xprt_clear_connecting(xprt); +} - write_lock_bh(&sk->sk_callback_lock); +/** + * xs_udp_connect_worker6 - set up a UDP socket + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_udp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_udp_data_ready; - sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; - sk->sk_allocation = GFP_ATOMIC; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_set_connected(xprt); + /* Start by resetting any existing state */ + xs_close(xprt); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - write_unlock_bh(&sk->sk_callback_lock); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; } - xs_udp_do_set_buffer_size(xprt); + + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + + xs_udp_finish_connecting(xprt, sock); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1295,13 +1508,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } +static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); +} + /** - * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1315,13 +1567,12 @@ static void xs_tcp_connect_worker(struct work_struct *work) if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport " - "socket (%d).\n", -err); + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock) < 0) { sock_release(sock); goto out; } @@ -1332,43 +1583,70 @@ static void xs_tcp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); + status = xs_tcp_finish_connecting(xprt, sock); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), + sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_close(xprt); + break; + } + } +out: + xprt_wake_pending_tasks(xprt, status); +out_clear: + xprt_clear_connecting(xprt); +} - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_tcp_data_ready; - sk->sk_state_change = xs_tcp_state_change; - sk->sk_write_space = xs_tcp_write_space; - sk->sk_allocation = GFP_ATOMIC; +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_clear_connected(xprt); + if (!sock) { + /* start from scratch */ + if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; + } + } else + /* "close" the socket, preserving the local port */ + xs_tcp_reuse_connection(xprt); - write_unlock_bh(&sk->sk_callback_lock); - } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; - status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, - xprt->addrlen, O_NONBLOCK); + status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), - sock->sk->sk_state); + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { switch (status) { case -EINPROGRESS: @@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, + unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_udp(struct xprt_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - return xprt; + if (try_module_get(THIS_MODULE)) + return xprt; + + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); } /** @@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) +struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return ERR_PTR(-EAFNOSUPPORT); + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - return xprt; + if (try_module_get(THIS_MODULE)) + return xprt; + + kfree(xprt->slot); + kfree(xprt); + return ERR_PTR(-EINVAL); } +static struct xprt_class xs_udp_transport = { + .list = LIST_HEAD_INIT(xs_udp_transport.list), + .name = "udp", + .owner = THIS_MODULE, + .ident = IPPROTO_UDP, + .setup = xs_setup_udp, +}; + +static struct xprt_class xs_tcp_transport = { + .list = LIST_HEAD_INIT(xs_tcp_transport.list), + .name = "tcp", + .owner = THIS_MODULE, + .ident = IPPROTO_TCP, + .setup = xs_setup_tcp, +}; + /** - * init_socket_xprt - set up xprtsock's sysctls + * init_socket_xprt - set up xprtsock's sysctls, register with RPC client * */ int init_socket_xprt(void) @@ -1640,11 +1979,14 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_udp_transport); + xprt_register_transport(&xs_tcp_transport); + return 0; } /** - * cleanup_socket_xprt - remove xprtsock's sysctls + * cleanup_socket_xprt - remove xprtsock's sysctls, unregister * */ void cleanup_socket_xprt(void) @@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void) sunrpc_table_header = NULL; } #endif + + xprt_unregister_transport(&xs_udp_transport); + xprt_unregister_transport(&xs_tcp_transport); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2b57eaf66ab..9163ec526c2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -334,7 +334,7 @@ static void unix_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, 2, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -483,7 +483,7 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ - sk->sk_peercred.pid = current->tgid; + sk->sk_peercred.pid = task_tgid_vnr(current); sk->sk_peercred.uid = current->euid; sk->sk_peercred.gid = current->egid; err = 0; @@ -1133,7 +1133,7 @@ restart: unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; - newsk->sk_peercred.pid = current->tgid; + newsk->sk_peercred.pid = task_tgid_vnr(current); newsk->sk_peercred.uid = current->euid; newsk->sk_peercred.gid = current->egid; newu = unix_sk(newsk); @@ -1194,7 +1194,7 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) sock_hold(skb); unix_peer(ska)=skb; unix_peer(skb)=ska; - ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid; + ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid; ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid; @@ -1639,7 +1639,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) goto out_unlock; - wake_up_interruptible(&u->peer_wait); + wake_up_interruptible_sync(&u->peer_wait); if (msg->msg_name) unix_copy_addr(msg, skb->sk); diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 5ced62c19c6..313d4bed3aa 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/pfkeyv2.h> #include <linux/crypto.h> +#include <linux/scatterlist.h> #include <net/xfrm.h> #if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) #include <net/ah.h> @@ -552,7 +553,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, if (copy > len) copy = len; - sg.page = virt_to_page(skb->data + offset); + sg_set_page(&sg, virt_to_page(skb->data + offset)); sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg.length = copy; @@ -577,7 +578,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, if (copy > len) copy = len; - sg.page = frag->page; + sg_set_page(&sg, frag->page); sg.offset = frag->page_offset + offset-start; sg.length = copy; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 113f4442998..cb97fda1b6d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -49,13 +49,16 @@ EXPORT_SYMBOL(secpath_dup); int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { int offset, offset_seq; + int hlen; switch (nexthdr) { case IPPROTO_AH: + hlen = sizeof(struct ip_auth_hdr); offset = offsetof(struct ip_auth_hdr, spi); offset_seq = offsetof(struct ip_auth_hdr, seq_no); break; case IPPROTO_ESP: + hlen = sizeof(struct ip_esp_hdr); offset = offsetof(struct ip_esp_hdr, spi); offset_seq = offsetof(struct ip_esp_hdr, seq_no); break; @@ -69,7 +72,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) return 1; } - if (!pskb_may_pull(skb, 16)) + if (!pskb_may_pull(skb, hlen)) return -EINVAL; *spi = *(__be32*)(skb_transport_header(skb) + offset); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0eb3377602e..f4bfd6c4565 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -63,7 +63,7 @@ int xfrm_output(struct sk_buff *skb) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; @@ -82,7 +82,7 @@ int xfrm_output(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); err = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index af27c193697..b702bd8a389 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -49,8 +49,6 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); -static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family); -static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo); static inline int __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) @@ -86,72 +84,6 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -int xfrm_register_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); - struct xfrm_type **typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - if (likely(typemap[type->proto] == NULL)) - typemap[type->proto] = type; - else - err = -EEXIST; - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_register_type); - -int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); - struct xfrm_type **typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - if (unlikely(typemap[type->proto] != type)) - err = -ENOENT; - else - typemap[type->proto] = NULL; - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_type); - -struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_type **typemap; - struct xfrm_type *type; - int modload_attempted = 0; - -retry: - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - typemap = afinfo->type_map; - - type = typemap[proto]; - if (unlikely(type && !try_module_get(type->owner))) - type = NULL; - if (!type && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-type-%d-%d", - (int) family, (int) proto); - modload_attempted = 1; - goto retry; - } - - xfrm_policy_put_afinfo(afinfo); - return type; -} - int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family) { @@ -170,94 +102,6 @@ int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, } EXPORT_SYMBOL(xfrm_dst_lookup); -void xfrm_put_type(struct xfrm_type *type) -{ - module_put(type->owner); -} - -int xfrm_register_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_policy_lock_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -EEXIST; - modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == NULL)) { - modemap[mode->encap] = mode; - err = 0; - } - - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_register_mode); - -int xfrm_unregister_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_policy_lock_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -ENOENT; - modemap = afinfo->mode_map; - if (likely(modemap[mode->encap] == mode)) { - modemap[mode->encap] = NULL; - err = 0; - } - - xfrm_policy_unlock_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_mode); - -struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) -{ - struct xfrm_policy_afinfo *afinfo; - struct xfrm_mode *mode; - int modload_attempted = 0; - - if (unlikely(encap >= XFRM_MODE_MAX)) - return NULL; - -retry: - afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - - mode = afinfo->mode_map[encap]; - if (unlikely(mode && !try_module_get(mode->owner))) - mode = NULL; - if (!mode && !modload_attempted) { - xfrm_policy_put_afinfo(afinfo); - request_module("xfrm-mode-%d-%d", family, encap); - modload_attempted = 1; - goto retry; - } - - xfrm_policy_put_afinfo(afinfo); - return mode; -} - -void xfrm_put_mode(struct xfrm_mode *mode) -{ - module_put(mode->owner); -} - static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) @@ -2096,7 +1940,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (xdst->genid != dst->xfrm->genid) return 0; - if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + if (strict && fl && + !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) return 0; @@ -2213,23 +2058,6 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) read_unlock(&xfrm_policy_afinfo_lock); } -static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family) -{ - struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - write_lock_bh(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (unlikely(!afinfo)) - write_unlock_bh(&xfrm_policy_afinfo_lock); - return afinfo; -} - -static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - write_unlock_bh(&xfrm_policy_afinfo_lock); -} - static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; @@ -2464,7 +2292,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) continue; n++; - if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL) + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && + pol->xfrm_vec[i].mode != XFRM_MODE_BEET) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 344f0a6abec..224b44e31a0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -57,6 +57,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); + static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -187,6 +190,184 @@ int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) +{ + struct xfrm_state_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + write_lock_bh(&xfrm_state_afinfo_lock); + afinfo = xfrm_state_afinfo[family]; + if (unlikely(!afinfo)) + write_unlock_bh(&xfrm_state_afinfo_lock); + return afinfo; +} + +static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) +{ + write_unlock_bh(&xfrm_state_afinfo_lock); +} + +int xfrm_register_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; + else + err = -EEXIST; + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_type); + +int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); + struct xfrm_type **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + if (unlikely(typemap[type->proto] != type)) + err = -ENOENT; + else + typemap[type->proto] = NULL; + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type); + +static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_type **typemap; + struct xfrm_type *type; + int modload_attempted = 0; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_map; + + type = typemap[proto]; + if (unlikely(type && !try_module_get(type->owner))) + type = NULL; + if (!type && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-type-%d-%d", family, proto); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return type; +} + +static void xfrm_put_type(struct xfrm_type *type) +{ + module_put(type->owner); +} + +int xfrm_register_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -EEXIST; + modemap = afinfo->mode_map; + if (modemap[mode->encap]) + goto out; + + err = -ENOENT; + if (!try_module_get(afinfo->owner)) + goto out; + + mode->afinfo = afinfo; + modemap[mode->encap] = mode; + err = 0; + +out: + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_mode); + +int xfrm_unregister_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_state_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -ENOENT; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == mode)) { + modemap[mode->encap] = NULL; + module_put(mode->afinfo->owner); + err = 0; + } + + xfrm_state_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_mode); + +static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *mode; + int modload_attempted = 0; + + if (unlikely(encap >= XFRM_MODE_MAX)) + return NULL; + +retry: + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + + mode = afinfo->mode_map[encap]; + if (unlikely(mode && !try_module_get(mode->owner))) + mode = NULL; + if (!mode && !modload_attempted) { + xfrm_state_put_afinfo(afinfo); + request_module("xfrm-mode-%d-%d", family, encap); + modload_attempted = 1; + goto retry; + } + + xfrm_state_put_afinfo(afinfo); + return mode; +} + +static void xfrm_put_mode(struct xfrm_mode *mode) +{ + module_put(mode->owner); +} + static void xfrm_state_gc_destroy(struct xfrm_state *x) { del_timer_sync(&x->timer); @@ -196,8 +377,10 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); - if (x->mode) - xfrm_put_mode(x->mode); + if (x->inner_mode) + xfrm_put_mode(x->inner_mode); + if (x->outer_mode) + xfrm_put_mode(x->outer_mode); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -1699,7 +1882,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1711,14 +1894,11 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return afinfo; } -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } -EXPORT_SYMBOL(xfrm_state_get_afinfo); -EXPORT_SYMBOL(xfrm_state_put_afinfo); - /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { @@ -1769,6 +1949,14 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; + x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (x->inner_mode == NULL) + goto error; + + if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) + goto error; + x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) goto error; @@ -1777,8 +1965,8 @@ int xfrm_init_state(struct xfrm_state *x) if (err) goto error; - x->mode = xfrm_get_mode(x->props.mode, family); - if (x->mode == NULL) + x->outer_mode = xfrm_get_mode(x->props.mode, family); + if (x->outer_mode == NULL) goto error; x->km.state = XFRM_STATE_VALID; |