diff options
Diffstat (limited to 'net')
247 files changed, 7849 insertions, 2798 deletions
diff --git a/net/Kconfig b/net/Kconfig index 7dfc9492069..915657832d9 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -37,6 +37,7 @@ config NETDEBUG source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" +source "net/iucv/Kconfig" config INET bool "TCP/IP networking" diff --git a/net/Makefile b/net/Makefile index ad4d14f4bb2..4854ac50631 100644 --- a/net/Makefile +++ b/net/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ +obj-$(CONFIG_IUCV) += iucv/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/atm/common.c b/net/atm/common.c index fbabff49446..a2878e92c3a 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -816,7 +816,8 @@ static void __exit atm_exit(void) proto_unregister(&vcc_proto); } -module_init(atm_init); +subsys_initcall(atm_init); + module_exit(atm_exit); MODULE_LICENSE("GPL"); diff --git a/net/atm/proc.c b/net/atm/proc.c index 91fe5f53ff1..739866bfe9e 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -393,7 +393,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, if (count == 0) return 0; page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; - dev = PDE(file->f_dentry->d_inode)->data; + dev = PDE(file->f_path.dentry->d_inode)->data; if (!dev->ops->proc_read) length = -EINVAL; else { diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 6cabf6d8a75..42233df2b09 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1088,8 +1088,8 @@ out: /* * FIXME: nonblock behaviour looks like it may have a bug. */ -static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +static int __must_check ax25_connect(struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; ax25_cb *ax25 = ax25_sk(sk), *ax25t; diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c index 5f0896ad004..97a49c79c60 100644 --- a/net/ax25/ax25_addr.c +++ b/net/ax25/ax25_addr.c @@ -29,17 +29,26 @@ #include <linux/interrupt.h> /* - * The null address is defined as a callsign of all spaces with an - * SSID of zero. + * The default broadcast address of an interface is QST-0; the default address + * is LINUX-1. The null address is defined as a callsign of all spaces with + * an SSID of zero. */ -ax25_address null_ax25_address = {{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00}}; +const ax25_address ax25_bcast = + {{'Q' << 1, 'S' << 1, 'T' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}}; +const ax25_address ax25_defaddr = + {{'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, 1 << 1}}; +const ax25_address null_ax25_address = + {{' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, ' ' << 1, 0 << 1}}; + +EXPORT_SYMBOL_GPL(ax25_bcast); +EXPORT_SYMBOL_GPL(ax25_defaddr); EXPORT_SYMBOL(null_ax25_address); /* * ax25 -> ascii conversion */ -char *ax2asc(char *buf, ax25_address *a) +char *ax2asc(char *buf, const ax25_address *a) { char c, *s; int n; @@ -72,9 +81,9 @@ EXPORT_SYMBOL(ax2asc); /* * ascii -> ax25 conversion */ -void asc2ax(ax25_address *addr, char *callsign) +void asc2ax(ax25_address *addr, const char *callsign) { - char *s; + const char *s; int n; for (s = callsign, n = 0; n < 6; n++) { @@ -107,7 +116,7 @@ EXPORT_SYMBOL(asc2ax); /* * Compare two ax.25 addresses */ -int ax25cmp(ax25_address *a, ax25_address *b) +int ax25cmp(const ax25_address *a, const ax25_address *b) { int ct = 0; @@ -128,7 +137,7 @@ EXPORT_SYMBOL(ax25cmp); /* * Compare two AX.25 digipeater paths. */ -int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2) +int ax25digicmp(const ax25_digi *digi1, const ax25_digi *digi2) { int i; @@ -149,7 +158,9 @@ int ax25digicmp(ax25_digi *digi1, ax25_digi *digi2) * Given an AX.25 address pull of to, from, digi list, command/response and the start of data * */ -unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, int *dama) +const unsigned char *ax25_addr_parse(const unsigned char *buf, int len, + ax25_address *src, ax25_address *dest, ax25_digi *digi, int *flags, + int *dama) { int d = 0; @@ -204,7 +215,8 @@ unsigned char *ax25_addr_parse(unsigned char *buf, int len, ax25_address *src, a /* * Assemble an AX.25 header from the bits */ -int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, ax25_digi *d, int flag, int modulus) +int ax25_addr_build(unsigned char *buf, const ax25_address *src, + const ax25_address *dest, const ax25_digi *d, int flag, int modulus) { int len = 0; int ct = 0; @@ -261,7 +273,7 @@ int ax25_addr_build(unsigned char *buf, ax25_address *src, ax25_address *dest, a return len; } -int ax25_addr_size(ax25_digi *dp) +int ax25_addr_size(const ax25_digi *dp) { if (dp == NULL) return 2 * AX25_ADDR_LEN; @@ -272,7 +284,7 @@ int ax25_addr_size(ax25_digi *dp) /* * Reverse Digipeat List. May not pass both parameters as same struct */ -void ax25_digi_invert(ax25_digi *in, ax25_digi *out) +void ax25_digi_invert(const ax25_digi *in, ax25_digi *out) { int ct; diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 07ac0207eb6..aff3e652c2d 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -29,17 +29,10 @@ #include <linux/mm.h> #include <linux/interrupt.h> -static struct protocol_struct { - struct protocol_struct *next; - unsigned int pid; - int (*func)(struct sk_buff *, ax25_cb *); -} *protocol_list = NULL; +static struct ax25_protocol *protocol_list; static DEFINE_RWLOCK(protocol_list_lock); -static struct linkfail_struct { - struct linkfail_struct *next; - void (*func)(ax25_cb *, int); -} *linkfail_list = NULL; +static HLIST_HEAD(ax25_linkfail_list); static DEFINE_SPINLOCK(linkfail_lock); static struct listen_struct { @@ -49,36 +42,23 @@ static struct listen_struct { } *listen_list = NULL; static DEFINE_SPINLOCK(listen_lock); -int ax25_protocol_register(unsigned int pid, - int (*func)(struct sk_buff *, ax25_cb *)) +/* + * Do not register the internal protocols AX25_P_TEXT, AX25_P_SEGMENT, + * AX25_P_IP or AX25_P_ARP ... + */ +void ax25_register_pid(struct ax25_protocol *ap) { - struct protocol_struct *protocol; - - if (pid == AX25_P_TEXT || pid == AX25_P_SEGMENT) - return 0; -#ifdef CONFIG_INET - if (pid == AX25_P_IP || pid == AX25_P_ARP) - return 0; -#endif - if ((protocol = kmalloc(sizeof(*protocol), GFP_ATOMIC)) == NULL) - return 0; - - protocol->pid = pid; - protocol->func = func; - write_lock_bh(&protocol_list_lock); - protocol->next = protocol_list; - protocol_list = protocol; + ap->next = protocol_list; + protocol_list = ap; write_unlock_bh(&protocol_list_lock); - - return 1; } -EXPORT_SYMBOL(ax25_protocol_register); +EXPORT_SYMBOL_GPL(ax25_register_pid); void ax25_protocol_release(unsigned int pid) { - struct protocol_struct *s, *protocol; + struct ax25_protocol *s, *protocol; write_lock_bh(&protocol_list_lock); protocol = protocol_list; @@ -110,54 +90,19 @@ void ax25_protocol_release(unsigned int pid) EXPORT_SYMBOL(ax25_protocol_release); -int ax25_linkfail_register(void (*func)(ax25_cb *, int)) +void ax25_linkfail_register(struct ax25_linkfail *lf) { - struct linkfail_struct *linkfail; - - if ((linkfail = kmalloc(sizeof(*linkfail), GFP_ATOMIC)) == NULL) - return 0; - - linkfail->func = func; - spin_lock_bh(&linkfail_lock); - linkfail->next = linkfail_list; - linkfail_list = linkfail; + hlist_add_head(&lf->lf_node, &ax25_linkfail_list); spin_unlock_bh(&linkfail_lock); - - return 1; } EXPORT_SYMBOL(ax25_linkfail_register); -void ax25_linkfail_release(void (*func)(ax25_cb *, int)) +void ax25_linkfail_release(struct ax25_linkfail *lf) { - struct linkfail_struct *s, *linkfail; - spin_lock_bh(&linkfail_lock); - linkfail = linkfail_list; - if (linkfail == NULL) { - spin_unlock_bh(&linkfail_lock); - return; - } - - if (linkfail->func == func) { - linkfail_list = linkfail->next; - spin_unlock_bh(&linkfail_lock); - kfree(linkfail); - return; - } - - while (linkfail != NULL && linkfail->next != NULL) { - if (linkfail->next->func == func) { - s = linkfail->next; - linkfail->next = linkfail->next->next; - spin_unlock_bh(&linkfail_lock); - kfree(s); - return; - } - - linkfail = linkfail->next; - } + hlist_del_init(&lf->lf_node); spin_unlock_bh(&linkfail_lock); } @@ -171,7 +116,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev) return 0; if ((listen = kmalloc(sizeof(*listen), GFP_ATOMIC)) == NULL) - return 0; + return -ENOMEM; listen->callsign = *callsign; listen->dev = dev; @@ -181,7 +126,7 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev) listen_list = listen; spin_unlock_bh(&listen_lock); - return 1; + return 0; } EXPORT_SYMBOL(ax25_listen_register); @@ -223,7 +168,7 @@ EXPORT_SYMBOL(ax25_listen_release); int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *) { int (*res)(struct sk_buff *, ax25_cb *) = NULL; - struct protocol_struct *protocol; + struct ax25_protocol *protocol; read_lock(&protocol_list_lock); for (protocol = protocol_list; protocol != NULL; protocol = protocol->next) @@ -242,7 +187,8 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) spin_lock_bh(&listen_lock); for (listen = listen_list; listen != NULL; listen = listen->next) - if (ax25cmp(&listen->callsign, callsign) == 0 && (listen->dev == dev || listen->dev == NULL)) { + if (ax25cmp(&listen->callsign, callsign) == 0 && + (listen->dev == dev || listen->dev == NULL)) { spin_unlock_bh(&listen_lock); return 1; } @@ -253,17 +199,18 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) void ax25_link_failed(ax25_cb *ax25, int reason) { - struct linkfail_struct *linkfail; + struct ax25_linkfail *lf; + struct hlist_node *node; spin_lock_bh(&linkfail_lock); - for (linkfail = linkfail_list; linkfail != NULL; linkfail = linkfail->next) - (linkfail->func)(ax25, reason); + hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node) + lf->func(ax25, reason); spin_unlock_bh(&linkfail_lock); } int ax25_protocol_is_registered(unsigned int pid) { - struct protocol_struct *protocol; + struct ax25_protocol *protocol; int res = 0; read_lock_bh(&protocol_list_lock); diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 8580356ace5..0a0381622b1 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -71,7 +71,7 @@ void ax25_rt_device_down(struct net_device *dev) write_unlock(&ax25_route_lock); } -static int ax25_rt_add(struct ax25_routes_struct *route) +static int __must_check ax25_rt_add(struct ax25_routes_struct *route) { ax25_route *ax25_rt; ax25_dev *ax25_dev; diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index be04e9fb11f..ab166b48ce8 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -196,6 +196,9 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s switch (CAPIMSG_SUBCOMMAND(skb->data)) { case CAPI_CONF: + if (skb->len < CAPI_MSG_BASELEN + 10) + break; + func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 5); info = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 8); @@ -226,6 +229,9 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s break; case CAPI_FUNCTION_GET_PROFILE: + if (skb->len < CAPI_MSG_BASELEN + 11 + sizeof(capi_profile)) + break; + controller = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 11); msgnum = CAPIMSG_MSGID(skb->data); @@ -246,17 +252,26 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s break; case CAPI_FUNCTION_GET_MANUFACTURER: + if (skb->len < CAPI_MSG_BASELEN + 15) + break; + controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 10); if (!info && ctrl) { + int len = min_t(uint, CAPI_MANUFACTURER_LEN, + skb->data[CAPI_MSG_BASELEN + 14]); + + memset(ctrl->manu, 0, CAPI_MANUFACTURER_LEN); strncpy(ctrl->manu, - skb->data + CAPI_MSG_BASELEN + 15, - skb->data[CAPI_MSG_BASELEN + 14]); + skb->data + CAPI_MSG_BASELEN + 15, len); } break; case CAPI_FUNCTION_GET_VERSION: + if (skb->len < CAPI_MSG_BASELEN + 32) + break; + controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); if (!info && ctrl) { @@ -269,13 +284,18 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s break; case CAPI_FUNCTION_GET_SERIAL_NUMBER: + if (skb->len < CAPI_MSG_BASELEN + 17) + break; + controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12); if (!info && ctrl) { + int len = min_t(uint, CAPI_SERIAL_LEN, + skb->data[CAPI_MSG_BASELEN + 16]); + memset(ctrl->serial, 0, CAPI_SERIAL_LEN); strncpy(ctrl->serial, - skb->data + CAPI_MSG_BASELEN + 17, - skb->data[CAPI_MSG_BASELEN + 16]); + skb->data + CAPI_MSG_BASELEN + 17, len); } break; @@ -284,14 +304,18 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s break; case CAPI_IND: + if (skb->len < CAPI_MSG_BASELEN + 6) + break; + func = CAPIMSG_U16(skb->data, CAPI_MSG_BASELEN + 3); if (func == CAPI_FUNCTION_LOOPBACK) { + int len = min_t(uint, skb->len - CAPI_MSG_BASELEN - 6, + skb->data[CAPI_MSG_BASELEN + 5]); appl = CAPIMSG_APPID(skb->data); msgnum = CAPIMSG_MSGID(skb->data); cmtp_send_interopmsg(session, CAPI_RESP, appl, msgnum, func, - skb->data + CAPI_MSG_BASELEN + 6, - skb->data[CAPI_MSG_BASELEN + 5]); + skb->data + CAPI_MSG_BASELEN + 6, len); } break; @@ -309,6 +333,9 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb) BT_DBG("session %p skb %p len %d", session, skb, skb->len); + if (skb->len < CAPI_MSG_BASELEN) + return; + if (CAPIMSG_COMMAND(skb->data) == CAPI_INTEROPERABILITY) { cmtp_recv_interopmsg(session, skb); return; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 711a085eca5..dbf98c49dba 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -123,10 +123,10 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (flt->opcode && ((evt == HCI_EV_CMD_COMPLETE && flt->opcode != - get_unaligned((__u16 *)(skb->data + 3))) || + get_unaligned((__le16 *)(skb->data + 3))) || (evt == HCI_EV_CMD_STATUS && flt->opcode != - get_unaligned((__u16 *)(skb->data + 4))))) + get_unaligned((__le16 *)(skb->data + 4))))) continue; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index d4c935692cc..801d687ea4e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -242,7 +242,7 @@ static void add_conn(struct work_struct *work) struct hci_conn *conn = container_of(work, struct hci_conn, work); int i; - if (device_register(&conn->dev) < 0) { + if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); return; } @@ -272,6 +272,8 @@ void hci_conn_add_sysfs(struct hci_conn *conn) dev_set_drvdata(&conn->dev, conn); + device_initialize(&conn->dev); + INIT_WORK(&conn->work, add_conn); schedule_work(&conn->work); @@ -287,6 +289,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) { BT_DBG("conn %p", conn); + if (!device_is_registered(&conn->dev)) + return; + INIT_WORK(&conn->work, del_conn); schedule_work(&conn->work); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index c2775f587d2..c8dfacd40a0 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -86,7 +86,7 @@ struct hidp_connadd_req { int intr_sock; // Connteted interrupt socket __u16 parser; __u16 rd_size; - __u8 *rd_data; + __u8 __user *rd_data; __u8 country; __u8 subclass; __u16 vendor; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 407fba43c1b..93cf9e58617 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -189,7 +189,7 @@ static int hidp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne uca = compat_alloc_user_space(sizeof(*uca)); - if (copy_from_user(&ca, (void *) arg, sizeof(ca))) + if (copy_from_user(&ca, (void __user *) arg, sizeof(ca))) return -EFAULT; if (put_user(ca.ctrl_sock, &uca->ctrl_sock) || diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 29a8fa4d372..f8c25d50015 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -585,6 +585,12 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ goto done; } + if (la->l2_psm > 0 && btohs(la->l2_psm) < 0x1001 && + !capable(CAP_NET_BIND_SERVICE)) { + err = -EACCES; + goto done; + } + write_lock_bh(&l2cap_sk_list.lock); if (la->l2_psm && __l2cap_get_sock_by_addr(la->l2_psm, &la->l2_bdaddr)) { @@ -2150,8 +2156,8 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), - sk->sk_state, pi->psm, pi->scid, pi->dcid, pi->imtu, - pi->omtu, pi->link_mode); + sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, + pi->imtu, pi->omtu, pi->link_mode); } read_unlock_bh(&l2cap_sk_list.lock); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 544d65b7baa..cb7e855f082 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -557,7 +557,6 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; struct sk_buff *skb; - int err; int sent = 0; if (msg->msg_flags & MSG_OOB) @@ -572,6 +571,7 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, while (len) { size_t size = min_t(size_t, len, d->mtu); + int err; skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, msg->msg_flags & MSG_DONTWAIT, &err); @@ -582,13 +582,16 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err) { kfree_skb(skb); - sent = err; + if (sent == 0) + sent = err; break; } err = rfcomm_dlc_send(d, skb); if (err < 0) { kfree_skb(skb); + if (sent == 0) + sent = err; break; } @@ -598,7 +601,7 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); - return sent ? sent : err; + return sent; } static long rfcomm_sock_data_wait(struct sock *sk, long timeo) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 1fb5d42f37a..eb2b52484c7 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -697,9 +697,13 @@ static int rfcomm_tty_write_room(struct tty_struct *tty) BT_DBG("tty %p", tty); + if (!dev || !dev->dlc) + return 0; + room = rfcomm_room(dev->dlc) - atomic_read(&dev->wmem_alloc); if (room < 0) room = 0; + return room; } @@ -752,9 +756,9 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned return -ENOIOCTLCMD; } -static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old) +static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { - struct termios *new = (struct termios *) tty->termios; + struct ktermios *new = tty->termios; int old_baud_rate = tty_termios_baud_rate(old); int new_baud_rate = tty_termios_baud_rate(new); @@ -915,12 +919,14 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty) static int rfcomm_tty_chars_in_buffer(struct tty_struct *tty) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; - struct rfcomm_dlc *dlc = dev->dlc; BT_DBG("tty %p dev %p", tty, dev); - if (!skb_queue_empty(&dlc->tx_queue)) - return dlc->mtu; + if (!dev || !dev->dlc) + return 0; + + if (!skb_queue_empty(&dev->dlc->tx_queue)) + return dev->dlc->mtu; return 0; } @@ -928,11 +934,12 @@ static int rfcomm_tty_chars_in_buffer(struct tty_struct *tty) static void rfcomm_tty_flush_buffer(struct tty_struct *tty) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; - if (!dev) - return; BT_DBG("tty %p dev %p", tty, dev); + if (!dev || !dev->dlc) + return; + skb_queue_purge(&dev->dlc->tx_queue); if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && tty->ldisc.write_wakeup) @@ -952,11 +959,12 @@ static void rfcomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) static void rfcomm_tty_hangup(struct tty_struct *tty) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; - if (!dev) - return; BT_DBG("tty %p dev %p", tty, dev); + if (!dev) + return; + rfcomm_tty_flush_buffer(tty); if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 55bb2634c08..2b7c2c7dad4 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -286,7 +286,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, kobject_init(&p->kobj); kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); p->kobj.ktype = &brport_ktype; - p->kobj.parent = &(dev->class_dev.kobj); + p->kobj.parent = &(dev->dev.kobj); p->kobj.kset = NULL; return p; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bd221ad52ea..a25fa8cb528 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -61,9 +61,6 @@ static int brnf_filter_vlan_tagged __read_mostly = 1; #define brnf_filter_vlan_tagged 1 #endif -int brnf_deferred_hooks; -EXPORT_SYMBOL_GPL(brnf_deferred_hooks); - static __be16 inline vlan_proto(const struct sk_buff *skb) { return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; @@ -685,110 +682,50 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, return NF_STOLEN; } -/* PF_BRIDGE/LOCAL_OUT ***********************************************/ -static int br_nf_local_out_finish(struct sk_buff *skb) -{ - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - br_forward_finish, NF_BR_PRI_FIRST + 1); - - return 0; -} - -/* This function sees both locally originated IP packets and forwarded +/* PF_BRIDGE/LOCAL_OUT *********************************************** + * + * This function sees both locally originated IP packets and forwarded * IP packets (in both cases the destination device is a bridge * device). It also sees bridged-and-DNAT'ed packets. - * To be able to filter on the physical bridge devices (with the physdev - * module), we steal packets destined to a bridge device away from the - * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, - * when we have determined the real output device. This is done in here. * * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. - * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched - * this packet before, and so the packet was locally originated. We fake - * the PF_INET/LOCAL_OUT hook. - * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, - * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure - * even routed packets that didn't arrive on a bridge interface have their - * nf_bridge->physindev set. */ + */ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct net_device *realindev, *realoutdev; + struct net_device *realindev; struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; - int pf; if (!skb->nf_bridge) return NF_ACCEPT; - if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) - pf = PF_INET; - else - pf = PF_INET6; - nf_bridge = skb->nf_bridge; - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; + if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT)) + return NF_ACCEPT; /* Bridged, take PF_BRIDGE/FORWARD. * (see big note in front of br_nf_pre_routing_finish) */ - if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; - } - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } + nf_bridge->physoutdev = skb->dev; + realindev = nf_bridge->physindev; - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, - skb->dev, br_forward_finish); - goto out; + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; } - realoutdev = bridge_parent(skb->dev); - if (!realoutdev) - return NF_DROP; - -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* iptables should match -o br0.x */ - if (nf_bridge->netoutdev) - realoutdev = nf_bridge->netoutdev; -#endif if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - (*pskb)->nh.raw += VLAN_HLEN; - } - /* IP forwarded traffic has a physindev, locally - * generated traffic hasn't. */ - if (realindev != NULL) { - if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) { - struct net_device *parent = bridge_parent(realindev); - if (parent) - realindev = parent; - } - - NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); - } else { - NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; } -out: + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, + br_forward_finish); return NF_STOLEN; } @@ -894,69 +831,6 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, return NF_ACCEPT; } -/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT - * and PF_INET(6)/POST_ROUTING until we have done the forwarding - * decision in the bridge code and have determined nf_bridge->physoutdev. */ -static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *skb = *pskb; - - if ((out->hard_start_xmit == br_dev_xmit && - okfn != br_nf_forward_finish && - okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit) -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - || ((out->priv_flags & IFF_802_1Q_VLAN) && - VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) -#endif - ) { - struct nf_bridge_info *nf_bridge; - - if (!skb->nf_bridge) { -#ifdef CONFIG_SYSCTL - /* This code is executed while in the IP(v6) stack, - the version should be 4 or 6. We can't use - skb->protocol because that isn't set on - PF_INET(6)/LOCAL_OUT. */ - struct iphdr *ip = skb->nh.iph; - - if (ip->version == 4 && !brnf_call_iptables) - return NF_ACCEPT; - else if (ip->version == 6 && !brnf_call_ip6tables) - return NF_ACCEPT; - else if (!brnf_deferred_hooks) - return NF_ACCEPT; -#endif - if (hook == NF_IP_POST_ROUTING) - return NF_ACCEPT; - if (!nf_bridge_alloc(skb)) - return NF_DROP; - } - - nf_bridge = skb->nf_bridge; - - /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we - * will need the indev then. For a brouter, the real indev - * can be a bridge port, so we make sure br_nf_local_out() - * doesn't use the bridge parent of the indev by using - * the BRNF_DONT_TAKE_PARENT mask. */ - if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { - nf_bridge->mask |= BRNF_DONT_TAKE_PARENT; - nf_bridge->physindev = (struct net_device *)in; - } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* the iptables outdev is br0.x, not br0 */ - if (out->priv_flags & IFF_802_1Q_VLAN) - nf_bridge->netoutdev = (struct net_device *)out; -#endif - return NF_STOP; - } - - return NF_ACCEPT; -} - /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because @@ -1002,36 +876,6 @@ static struct nf_hook_ops br_nf_ops[] = { .pf = PF_INET6, .hooknum = NF_IP6_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_FORWARD, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, - .priority = NF_IP6_PRI_FIRST, }, }; #ifdef CONFIG_SYSCTL @@ -1105,44 +949,29 @@ static ctl_table brnf_net_table[] = { }; #endif -int br_netfilter_init(void) +int __init br_netfilter_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) { - int ret; - - if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0) - continue; - - while (i--) - nf_unregister_hook(&br_nf_ops[i]); + int ret; + ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret < 0) return ret; - } - #ifdef CONFIG_SYSCTL brnf_sysctl_header = register_sysctl_table(brnf_net_table, 0); if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) - nf_unregister_hook(&br_nf_ops[i]); - return -EFAULT; + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + return -ENOMEM; } #endif - printk(KERN_NOTICE "Bridge firewalling registered\n"); - return 0; } void br_netfilter_fini(void) { - int i; - - for (i = ARRAY_SIZE(br_nf_ops) - 1; i >= 0; i--) - nf_unregister_hook(&br_nf_ops[i]); + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); #ifdef CONFIG_SYSCTL unregister_sysctl_table(brnf_sysctl_header); #endif diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a9139682c49..7d68b24b565 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -45,7 +45,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; hdr = nlmsg_data(nlh); hdr->ifi_family = AF_BRIDGE; @@ -72,7 +72,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* @@ -89,9 +90,12 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) goto errout; err = br_fill_ifinfo(skb, port, 0, 0, event, 0); - /* failure implies BUG in br_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in br_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index de9d1a9473f..ce10464716a 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -21,18 +21,17 @@ #include "br_private.h" -#define to_class_dev(obj) container_of(obj,struct class_device,kobj) -#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#define to_dev(obj) container_of(obj, struct device, kobj) #define to_bridge(cd) ((struct net_bridge *)(to_net_dev(cd)->priv)) /* * Common code for storing bridge parameters. */ -static ssize_t store_bridge_parm(struct class_device *cd, +static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, void (*set)(struct net_bridge *, unsigned long)) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); char *endp; unsigned long val; @@ -50,9 +49,10 @@ static ssize_t store_bridge_parm(struct class_device *cd, } -static ssize_t show_forward_delay(struct class_device *cd, char *buf) +static ssize_t show_forward_delay(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } @@ -64,18 +64,20 @@ static void set_forward_delay(struct net_bridge *br, unsigned long val) br->bridge_forward_delay = delay; } -static ssize_t store_forward_delay(struct class_device *cd, const char *buf, - size_t len) +static ssize_t store_forward_delay(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_forward_delay); + return store_bridge_parm(d, buf, len, set_forward_delay); } -static CLASS_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, - show_forward_delay, store_forward_delay); +static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, + show_forward_delay, store_forward_delay); -static ssize_t show_hello_time(struct class_device *cd, char *buf) +static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, + char *buf) { return sprintf(buf, "%lu\n", - jiffies_to_clock_t(to_bridge(cd)->hello_time)); + jiffies_to_clock_t(to_bridge(d)->hello_time)); } static void set_hello_time(struct net_bridge *br, unsigned long val) @@ -86,19 +88,20 @@ static void set_hello_time(struct net_bridge *br, unsigned long val) br->bridge_hello_time = t; } -static ssize_t store_hello_time(struct class_device *cd, const char *buf, +static ssize_t store_hello_time(struct device *d, + struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_hello_time); + return store_bridge_parm(d, buf, len, set_hello_time); } +static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, + store_hello_time); -static CLASS_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, - store_hello_time); - -static ssize_t show_max_age(struct class_device *cd, char *buf) +static ssize_t show_max_age(struct device *d, struct device_attribute *attr, + char *buf) { return sprintf(buf, "%lu\n", - jiffies_to_clock_t(to_bridge(cd)->max_age)); + jiffies_to_clock_t(to_bridge(d)->max_age)); } static void set_max_age(struct net_bridge *br, unsigned long val) @@ -109,18 +112,17 @@ static void set_max_age(struct net_bridge *br, unsigned long val) br->bridge_max_age = t; } -static ssize_t store_max_age(struct class_device *cd, const char *buf, - size_t len) +static ssize_t store_max_age(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_max_age); + return store_bridge_parm(d, buf, len, set_max_age); } +static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); -static CLASS_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, - store_max_age); - -static ssize_t show_ageing_time(struct class_device *cd, char *buf) +static ssize_t show_ageing_time(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } @@ -129,17 +131,19 @@ static void set_ageing_time(struct net_bridge *br, unsigned long val) br->ageing_time = clock_t_to_jiffies(val); } -static ssize_t store_ageing_time(struct class_device *cd, const char *buf, - size_t len) +static ssize_t store_ageing_time(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_ageing_time); + return store_bridge_parm(d, buf, len, set_ageing_time); } +static DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, + store_ageing_time); -static CLASS_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, - store_ageing_time); -static ssize_t show_stp_state(struct class_device *cd, char *buf) +static ssize_t show_stp_state(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->stp_enabled); } @@ -148,18 +152,19 @@ static void set_stp_state(struct net_bridge *br, unsigned long val) br->stp_enabled = val; } -static ssize_t store_stp_state(struct class_device *cd, - const char *buf, size_t len) +static ssize_t store_stp_state(struct device *d, + struct device_attribute *attr, const char *buf, + size_t len) { - return store_bridge_parm(cd, buf, len, set_stp_state); + return store_bridge_parm(d, buf, len, set_stp_state); } +static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, + store_stp_state); -static CLASS_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, - store_stp_state); - -static ssize_t show_priority(struct class_device *cd, char *buf) +static ssize_t show_priority(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } @@ -169,92 +174,107 @@ static void set_priority(struct net_bridge *br, unsigned long val) br_stp_set_bridge_priority(br, (u16) val); } -static ssize_t store_priority(struct class_device *cd, +static ssize_t store_priority(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_priority); + return store_bridge_parm(d, buf, len, set_priority); } -static CLASS_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, - store_priority); +static DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); -static ssize_t show_root_id(struct class_device *cd, char *buf) +static ssize_t show_root_id(struct device *d, struct device_attribute *attr, + char *buf) { - return br_show_bridge_id(buf, &to_bridge(cd)->designated_root); + return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } -static CLASS_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); -static ssize_t show_bridge_id(struct class_device *cd, char *buf) +static ssize_t show_bridge_id(struct device *d, struct device_attribute *attr, + char *buf) { - return br_show_bridge_id(buf, &to_bridge(cd)->bridge_id); + return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } -static CLASS_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); -static ssize_t show_root_port(struct class_device *cd, char *buf) +static ssize_t show_root_port(struct device *d, struct device_attribute *attr, + char *buf) { - return sprintf(buf, "%d\n", to_bridge(cd)->root_port); + return sprintf(buf, "%d\n", to_bridge(d)->root_port); } -static CLASS_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); -static ssize_t show_root_path_cost(struct class_device *cd, char *buf) +static ssize_t show_root_path_cost(struct device *d, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_bridge(cd)->root_path_cost); + return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } -static CLASS_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); -static ssize_t show_topology_change(struct class_device *cd, char *buf) +static ssize_t show_topology_change(struct device *d, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_bridge(cd)->topology_change); + return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } -static CLASS_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); -static ssize_t show_topology_change_detected(struct class_device *cd, char *buf) +static ssize_t show_topology_change_detected(struct device *d, + struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } -static CLASS_DEVICE_ATTR(topology_change_detected, S_IRUGO, show_topology_change_detected, NULL); +static DEVICE_ATTR(topology_change_detected, S_IRUGO, + show_topology_change_detected, NULL); -static ssize_t show_hello_timer(struct class_device *cd, char *buf) +static ssize_t show_hello_timer(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } -static CLASS_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); -static ssize_t show_tcn_timer(struct class_device *cd, char *buf) +static ssize_t show_tcn_timer(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } -static CLASS_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); -static ssize_t show_topology_change_timer(struct class_device *cd, char *buf) +static ssize_t show_topology_change_timer(struct device *d, + struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } -static CLASS_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, NULL); +static DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, + NULL); -static ssize_t show_gc_timer(struct class_device *cd, char *buf) +static ssize_t show_gc_timer(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); } -static CLASS_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); -static ssize_t show_group_addr(struct class_device *cd, char *buf) +static ssize_t show_group_addr(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%x:%x:%x:%x:%x:%x\n", br->group_addr[0], br->group_addr[1], br->group_addr[2], br->group_addr[3], br->group_addr[4], br->group_addr[5]); } -static ssize_t store_group_addr(struct class_device *cd, const char *buf, - size_t len) +static ssize_t store_group_addr(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); unsigned new_addr[6]; int i; @@ -286,28 +306,28 @@ static ssize_t store_group_addr(struct class_device *cd, const char *buf, return len; } -static CLASS_DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, - show_group_addr, store_group_addr); +static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, + show_group_addr, store_group_addr); static struct attribute *bridge_attrs[] = { - &class_device_attr_forward_delay.attr, - &class_device_attr_hello_time.attr, - &class_device_attr_max_age.attr, - &class_device_attr_ageing_time.attr, - &class_device_attr_stp_state.attr, - &class_device_attr_priority.attr, - &class_device_attr_bridge_id.attr, - &class_device_attr_root_id.attr, - &class_device_attr_root_path_cost.attr, - &class_device_attr_root_port.attr, - &class_device_attr_topology_change.attr, - &class_device_attr_topology_change_detected.attr, - &class_device_attr_hello_timer.attr, - &class_device_attr_tcn_timer.attr, - &class_device_attr_topology_change_timer.attr, - &class_device_attr_gc_timer.attr, - &class_device_attr_group_addr.attr, + &dev_attr_forward_delay.attr, + &dev_attr_hello_time.attr, + &dev_attr_max_age.attr, + &dev_attr_ageing_time.attr, + &dev_attr_stp_state.attr, + &dev_attr_priority.attr, + &dev_attr_bridge_id.attr, + &dev_attr_root_id.attr, + &dev_attr_root_path_cost.attr, + &dev_attr_root_port.attr, + &dev_attr_topology_change.attr, + &dev_attr_topology_change_detected.attr, + &dev_attr_hello_timer.attr, + &dev_attr_tcn_timer.attr, + &dev_attr_topology_change_timer.attr, + &dev_attr_gc_timer.attr, + &dev_attr_group_addr.attr, NULL }; @@ -325,8 +345,8 @@ static struct attribute_group bridge_group = { static ssize_t brforward_read(struct kobject *kobj, char *buf, loff_t off, size_t count) { - struct class_device *cdev = to_class_dev(kobj); - struct net_bridge *br = to_bridge(cdev); + struct device *dev = to_dev(kobj); + struct net_bridge *br = to_bridge(dev); int n; /* must read whole records */ @@ -363,7 +383,7 @@ static struct bin_attribute bridge_forward = { */ int br_sysfs_addbr(struct net_device *dev) { - struct kobject *brobj = &dev->class_dev.kobj; + struct kobject *brobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); int err; @@ -395,9 +415,9 @@ int br_sysfs_addbr(struct net_device *dev) } return 0; out3: - sysfs_remove_bin_file(&dev->class_dev.kobj, &bridge_forward); + sysfs_remove_bin_file(&dev->dev.kobj, &bridge_forward); out2: - sysfs_remove_group(&dev->class_dev.kobj, &bridge_group); + sysfs_remove_group(&dev->dev.kobj, &bridge_group); out1: return err; @@ -405,7 +425,7 @@ int br_sysfs_addbr(struct net_device *dev) void br_sysfs_delbr(struct net_device *dev) { - struct kobject *kobj = &dev->class_dev.kobj; + struct kobject *kobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); kobject_unregister(&br->ifobj); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index c51c9e42aeb..0bc2aef8f9f 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -211,7 +211,7 @@ int br_sysfs_addif(struct net_bridge_port *p) struct brport_attribute **a; int err; - err = sysfs_create_link(&p->kobj, &br->dev->class_dev.kobj, + err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, SYSFS_BRIDGE_PORT_LINK); if (err) goto out2; diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index e4c642448e1..6afa4d017d4 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -93,6 +93,7 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask, return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index a184f879f25..985df82e427 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -96,6 +96,7 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, NIPQUAD(ih->daddr), ih->tos, ih->protocol); if (ih->protocol == IPPROTO_TCP || ih->protocol == IPPROTO_UDP || + ih->protocol == IPPROTO_UDPLITE || ih->protocol == IPPROTO_SCTP || ih->protocol == IPPROTO_DCCP) { struct tcpudphdr _ports, *pptr; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index bee558a4180..6c84ccb8c9d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -610,7 +610,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, struct ebt_entry_target *t; struct ebt_target *target; unsigned int i, j, hook = 0, hookmask = 0; - size_t gap = e->next_offset - e->target_offset; + size_t gap; int ret; /* don't mess with the struct ebt_entries */ @@ -660,6 +660,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, if (ret != 0) goto cleanup_watchers; t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + gap = e->next_offset - e->target_offset; target = find_target_lock(t->u.name, &ret, &ebt_mutex); if (!target) goto cleanup_watchers; diff --git a/net/core/dev.c b/net/core/dev.c index e660cb57e42..1e94a1b9a0f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -751,7 +751,7 @@ int dev_change_name(struct net_device *dev, char *newname) else strlcpy(dev->name, newname, IFNAMSIZ); - err = class_device_rename(&dev->class_dev, dev->name); + err = device_rename(&dev->dev, dev->name); if (!err) { hlist_del(&dev->name_hlist); hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); @@ -3221,8 +3221,8 @@ void free_netdev(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_UNREGISTERED); dev->reg_state = NETREG_RELEASED; - /* will free via class release */ - class_device_put(&dev->class_dev); + /* will free via device release */ + put_device(&dev->dev); #else kfree((char *)dev - dev->padded); #endif @@ -3247,7 +3247,7 @@ void synchronize_net(void) * unregister_netdev() instead of this. */ -int unregister_netdevice(struct net_device *dev) +void unregister_netdevice(struct net_device *dev) { struct net_device *d, **dp; @@ -3258,7 +3258,9 @@ int unregister_netdevice(struct net_device *dev) if (dev->reg_state == NETREG_UNINITIALIZED) { printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " "was registered\n", dev->name, dev); - return -ENODEV; + + WARN_ON(1); + return; } BUG_ON(dev->reg_state != NETREG_REGISTERED); @@ -3280,11 +3282,7 @@ int unregister_netdevice(struct net_device *dev) break; } } - if (!d) { - printk(KERN_ERR "unregister net_device: '%s' not found\n", - dev->name); - return -ENODEV; - } + BUG_ON(!d); dev->reg_state = NETREG_UNREGISTERING; @@ -3316,7 +3314,6 @@ int unregister_netdevice(struct net_device *dev) synchronize_net(); dev_put(dev); - return 0; } /** diff --git a/net/core/dst.c b/net/core/dst.c index 836ec660692..1a53fb39b7e 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -99,7 +99,14 @@ static void dst_run_gc(unsigned long dummy) printk("dst_total: %d/%d %ld\n", atomic_read(&dst_total), delayed, dst_gc_timer_expires); #endif - mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); + /* if the next desired timer is more than 4 seconds in the future + * then round the timer to whole seconds + */ + if (dst_gc_timer_expires > 4*HZ) + mod_timer(&dst_gc_timer, + round_jiffies(jiffies + dst_gc_timer_expires)); + else + mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); out: spin_unlock(&dst_lock); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 1df6cd4568d..215f1bff048 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -331,7 +331,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); if (nlh == NULL) - return -1; + return -EMSGSIZE; frh = nlmsg_data(nlh); frh->table = rule->table; @@ -359,7 +359,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) @@ -405,9 +406,12 @@ static void notify_rule_change(int event, struct fib_rule *rule, goto errout; err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); - /* failure implies BUG in fib_rule_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: if (err < 0) diff --git a/net/core/flow.c b/net/core/flow.c index d137f971f97..5d25697920b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -231,22 +231,16 @@ nocache: err = resolver(key, family, dir, &obj, &obj_ref); - if (fle) { - if (err) { - /* Force security policy check on next lookup */ - *head = fle->next; - flow_entry_kill(cpu, fle); - } else { - fle->genid = atomic_read(&flow_cache_genid); - - if (fle->object) - atomic_dec(fle->object_ref); - - fle->object = obj; - fle->object_ref = obj_ref; - if (obj) - atomic_inc(fle->object_ref); - } + if (fle && !err) { + fle->genid = atomic_read(&flow_cache_genid); + + if (fle->object) + atomic_dec(fle->object_ref); + + fle->object = obj; + fle->object_ref = obj_ref; + if (obj) + atomic_inc(fle->object_ref); } local_bh_enable(); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0ab1987b934..054d46493d2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -577,9 +577,10 @@ void neigh_destroy(struct neighbour *neigh) while ((hh = neigh->hh) != NULL) { neigh->hh = hh->hh_next; hh->hh_next = NULL; - write_lock_bh(&hh->hh_lock); + + write_seqlock_bh(&hh->hh_lock); hh->hh_output = neigh_blackhole; - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); if (atomic_dec_and_test(&hh->hh_refcnt)) kfree(hh); } @@ -695,7 +696,10 @@ next_elt: if (!expire) expire = 1; - mod_timer(&tbl->gc_timer, now + expire); + if (expire>HZ) + mod_timer(&tbl->gc_timer, round_jiffies(now + expire)); + else + mod_timer(&tbl->gc_timer, now + expire); write_unlock(&tbl->lock); } @@ -897,9 +901,9 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { - write_lock_bh(&hh->hh_lock); + write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); } } } @@ -1089,7 +1093,7 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, break; if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { - rwlock_init(&hh->hh_lock); + seqlock_init(&hh->hh_lock); hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; @@ -1636,7 +1640,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); @@ -1705,7 +1709,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, nla_put_failure: read_unlock_bh(&tbl->lock); - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int neightbl_fill_param_info(struct sk_buff *skb, @@ -1719,7 +1724,7 @@ static int neightbl_fill_param_info(struct sk_buff *skb, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ndtmsg = nlmsg_data(nlh); @@ -1736,7 +1741,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb, return nlmsg_end(skb, nlh); errout: read_unlock_bh(&tbl->lock); - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, @@ -1954,7 +1960,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = neigh->ops->family; @@ -1986,7 +1992,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } @@ -2428,9 +2435,12 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) goto errout; err = neigh_fill_info(skb, n, 0, 0, type, flags); - /* failure implies BUG in neigh_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: if (err < 0) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f47f319bb7d..44db095a8f7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -18,9 +18,6 @@ #include <linux/wireless.h> #include <net/iw_handler.h> -#define to_class_dev(obj) container_of(obj,struct class_device,kobj) -#define to_net_dev(class) container_of(class, struct net_device, class_dev) - static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; @@ -32,10 +29,11 @@ static inline int dev_isalive(const struct net_device *dev) } /* use same locking rules as GIF* ioctl's */ -static ssize_t netdev_show(const struct class_device *cd, char *buf, +static ssize_t netdev_show(const struct device *dev, + struct device_attribute *attr, char *buf, ssize_t (*format)(const struct net_device *, char *)) { - struct net_device *net = to_net_dev(cd); + struct net_device *net = to_net_dev(dev); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); @@ -52,14 +50,15 @@ static ssize_t format_##field(const struct net_device *net, char *buf) \ { \ return sprintf(buf, format_string, net->field); \ } \ -static ssize_t show_##field(struct class_device *cd, char *buf) \ +static ssize_t show_##field(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ - return netdev_show(cd, buf, format_##field); \ + return netdev_show(dev, attr, buf, format_##field); \ } /* use same locking and permission rules as SIF* ioctl's */ -static ssize_t netdev_store(struct class_device *dev, +static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len, int (*set)(struct net_device *, unsigned long)) { @@ -104,7 +103,8 @@ static ssize_t format_addr(char *buf, const unsigned char *addr, int len) return cp - buf; } -static ssize_t show_address(struct class_device *dev, char *buf) +static ssize_t show_address(struct device *dev, struct device_attribute *attr, + char *buf) { struct net_device *net = to_net_dev(dev); ssize_t ret = -EINVAL; @@ -116,7 +116,8 @@ static ssize_t show_address(struct class_device *dev, char *buf) return ret; } -static ssize_t show_broadcast(struct class_device *dev, char *buf) +static ssize_t show_broadcast(struct device *dev, + struct device_attribute *attr, char *buf) { struct net_device *net = to_net_dev(dev); if (dev_isalive(net)) @@ -124,7 +125,8 @@ static ssize_t show_broadcast(struct class_device *dev, char *buf) return -EINVAL; } -static ssize_t show_carrier(struct class_device *dev, char *buf) +static ssize_t show_carrier(struct device *dev, + struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); if (netif_running(netdev)) { @@ -133,7 +135,8 @@ static ssize_t show_carrier(struct class_device *dev, char *buf) return -EINVAL; } -static ssize_t show_dormant(struct class_device *dev, char *buf) +static ssize_t show_dormant(struct device *dev, + struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); @@ -153,7 +156,8 @@ static const char *operstates[] = { "up" }; -static ssize_t show_operstate(struct class_device *dev, char *buf) +static ssize_t show_operstate(struct device *dev, + struct device_attribute *attr, char *buf) { const struct net_device *netdev = to_net_dev(dev); unsigned char operstate; @@ -178,9 +182,10 @@ static int change_mtu(struct net_device *net, unsigned long new_mtu) return dev_set_mtu(net, (int) new_mtu); } -static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len) +static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) { - return netdev_store(dev, buf, len, change_mtu); + return netdev_store(dev, attr, buf, len, change_mtu); } NETDEVICE_SHOW(flags, fmt_hex); @@ -190,9 +195,10 @@ static int change_flags(struct net_device *net, unsigned long new_flags) return dev_change_flags(net, (unsigned) new_flags); } -static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len) +static ssize_t store_flags(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) { - return netdev_store(dev, buf, len, change_flags); + return netdev_store(dev, attr, buf, len, change_flags); } NETDEVICE_SHOW(tx_queue_len, fmt_ulong); @@ -203,9 +209,11 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len) return 0; } -static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, size_t len) +static ssize_t store_tx_queue_len(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) { - return netdev_store(dev, buf, len, change_tx_queue_len); + return netdev_store(dev, attr, buf, len, change_tx_queue_len); } NETDEVICE_SHOW(weight, fmt_dec); @@ -216,12 +224,13 @@ static int change_weight(struct net_device *net, unsigned long new_weight) return 0; } -static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len) +static ssize_t store_weight(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) { - return netdev_store(dev, buf, len, change_weight); + return netdev_store(dev, attr, buf, len, change_weight); } -static struct class_device_attribute net_class_attributes[] = { +static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), @@ -242,10 +251,11 @@ static struct class_device_attribute net_class_attributes[] = { }; /* Show a given an attribute in the statistics group */ -static ssize_t netstat_show(const struct class_device *cd, char *buf, +static ssize_t netstat_show(const struct device *d, + struct device_attribute *attr, char *buf, unsigned long offset) { - struct net_device *dev = to_net_dev(cd); + struct net_device *dev = to_net_dev(d); struct net_device_stats *stats; ssize_t ret = -EINVAL; @@ -265,12 +275,13 @@ static ssize_t netstat_show(const struct class_device *cd, char *buf, /* generate a read-only statistics attribute */ #define NETSTAT_ENTRY(name) \ -static ssize_t show_##name(struct class_device *cd, char *buf) \ +static ssize_t show_##name(struct device *d, \ + struct device_attribute *attr, char *buf) \ { \ - return netstat_show(cd, buf, \ + return netstat_show(d, attr, buf, \ offsetof(struct net_device_stats, name)); \ } \ -static CLASS_DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) +static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) NETSTAT_ENTRY(rx_packets); NETSTAT_ENTRY(tx_packets); @@ -297,29 +308,29 @@ NETSTAT_ENTRY(rx_compressed); NETSTAT_ENTRY(tx_compressed); static struct attribute *netstat_attrs[] = { - &class_device_attr_rx_packets.attr, - &class_device_attr_tx_packets.attr, - &class_device_attr_rx_bytes.attr, - &class_device_attr_tx_bytes.attr, - &class_device_attr_rx_errors.attr, - &class_device_attr_tx_errors.attr, - &class_device_attr_rx_dropped.attr, - &class_device_attr_tx_dropped.attr, - &class_device_attr_multicast.attr, - &class_device_attr_collisions.attr, - &class_device_attr_rx_length_errors.attr, - &class_device_attr_rx_over_errors.attr, - &class_device_attr_rx_crc_errors.attr, - &class_device_attr_rx_frame_errors.attr, - &class_device_attr_rx_fifo_errors.attr, - &class_device_attr_rx_missed_errors.attr, - &class_device_attr_tx_aborted_errors.attr, - &class_device_attr_tx_carrier_errors.attr, - &class_device_attr_tx_fifo_errors.attr, - &class_device_attr_tx_heartbeat_errors.attr, - &class_device_attr_tx_window_errors.attr, - &class_device_attr_rx_compressed.attr, - &class_device_attr_tx_compressed.attr, + &dev_attr_rx_packets.attr, + &dev_attr_tx_packets.attr, + &dev_attr_rx_bytes.attr, + &dev_attr_tx_bytes.attr, + &dev_attr_rx_errors.attr, + &dev_attr_tx_errors.attr, + &dev_attr_rx_dropped.attr, + &dev_attr_tx_dropped.attr, + &dev_attr_multicast.attr, + &dev_attr_collisions.attr, + &dev_attr_rx_length_errors.attr, + &dev_attr_rx_over_errors.attr, + &dev_attr_rx_crc_errors.attr, + &dev_attr_rx_frame_errors.attr, + &dev_attr_rx_fifo_errors.attr, + &dev_attr_rx_missed_errors.attr, + &dev_attr_tx_aborted_errors.attr, + &dev_attr_tx_carrier_errors.attr, + &dev_attr_tx_fifo_errors.attr, + &dev_attr_tx_heartbeat_errors.attr, + &dev_attr_tx_window_errors.attr, + &dev_attr_rx_compressed.attr, + &dev_attr_tx_compressed.attr, NULL }; @@ -331,11 +342,11 @@ static struct attribute_group netstat_group = { #ifdef WIRELESS_EXT /* helper function that does all the locking etc for wireless stats */ -static ssize_t wireless_show(struct class_device *cd, char *buf, +static ssize_t wireless_show(struct device *d, char *buf, ssize_t (*format)(const struct iw_statistics *, char *)) { - struct net_device *dev = to_net_dev(cd); + struct net_device *dev = to_net_dev(d); const struct iw_statistics *iw = NULL; ssize_t ret = -EINVAL; @@ -358,11 +369,12 @@ static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \ { \ return sprintf(buf, format_string, iw->field); \ } \ -static ssize_t show_iw_##name(struct class_device *cd, char *buf) \ +static ssize_t show_iw_##name(struct device *d, \ + struct device_attribute *attr, char *buf) \ { \ - return wireless_show(cd, buf, format_iw_##name); \ + return wireless_show(d, buf, format_iw_##name); \ } \ -static CLASS_DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL) +static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL) WIRELESS_SHOW(status, status, fmt_hex); WIRELESS_SHOW(link, qual.qual, fmt_dec); @@ -376,16 +388,16 @@ WIRELESS_SHOW(retries, discard.retries, fmt_dec); WIRELESS_SHOW(beacon, miss.beacon, fmt_dec); static struct attribute *wireless_attrs[] = { - &class_device_attr_status.attr, - &class_device_attr_link.attr, - &class_device_attr_level.attr, - &class_device_attr_noise.attr, - &class_device_attr_nwid.attr, - &class_device_attr_crypt.attr, - &class_device_attr_fragment.attr, - &class_device_attr_retries.attr, - &class_device_attr_misc.attr, - &class_device_attr_beacon.attr, + &dev_attr_status.attr, + &dev_attr_link.attr, + &dev_attr_level.attr, + &dev_attr_noise.attr, + &dev_attr_nwid.attr, + &dev_attr_crypt.attr, + &dev_attr_fragment.attr, + &dev_attr_retries.attr, + &dev_attr_misc.attr, + &dev_attr_beacon.attr, NULL }; @@ -396,10 +408,10 @@ static struct attribute_group wireless_group = { #endif #ifdef CONFIG_HOTPLUG -static int netdev_uevent(struct class_device *cd, char **envp, +static int netdev_uevent(struct device *d, char **envp, int num_envp, char *buf, int size) { - struct net_device *dev = to_net_dev(cd); + struct net_device *dev = to_net_dev(d); int i = 0; int n; @@ -419,12 +431,11 @@ static int netdev_uevent(struct class_device *cd, char **envp, /* * netdev_release -- destroy and free a dead device. - * Called when last reference to class_device kobject is gone. + * Called when last reference to device kobject is gone. */ -static void netdev_release(struct class_device *cd) +static void netdev_release(struct device *d) { - struct net_device *dev - = container_of(cd, struct net_device, class_dev); + struct net_device *dev = to_net_dev(d); BUG_ON(dev->reg_state != NETREG_RELEASED); @@ -433,31 +444,31 @@ static void netdev_release(struct class_device *cd) static struct class net_class = { .name = "net", - .release = netdev_release, - .class_dev_attrs = net_class_attributes, + .dev_release = netdev_release, + .dev_attrs = net_class_attributes, #ifdef CONFIG_HOTPLUG - .uevent = netdev_uevent, + .dev_uevent = netdev_uevent, #endif }; void netdev_unregister_sysfs(struct net_device * net) { - class_device_del(&(net->class_dev)); + device_del(&(net->dev)); } /* Create sysfs entries for network device. */ int netdev_register_sysfs(struct net_device *net) { - struct class_device *class_dev = &(net->class_dev); + struct device *dev = &(net->dev); struct attribute_group **groups = net->sysfs_groups; - class_device_initialize(class_dev); - class_dev->class = &net_class; - class_dev->class_data = net; - class_dev->groups = groups; + device_initialize(dev); + dev->class = &net_class; + dev->platform_data = net; + dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE); + strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); if (net->get_stats) *groups++ = &netstat_group; @@ -467,7 +478,7 @@ int netdev_register_sysfs(struct net_device *net) *groups++ = &wireless_group; #endif - return class_device_add(class_dev); + return device_add(dev); } int netdev_sysfs_init(void) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b3c559b9ac3..823215d8e90 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -55,6 +55,7 @@ static void queue_process(struct work_struct *work) struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; + unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; @@ -64,15 +65,19 @@ static void queue_process(struct work_struct *work) continue; } - netif_tx_lock_bh(dev); + local_irq_save(flags); + netif_tx_lock(dev); if (netif_queue_stopped(dev) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - netif_tx_unlock_bh(dev); + netif_tx_unlock(dev); + local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } + netif_tx_unlock(dev); + local_irq_restore(flags); } } @@ -242,22 +247,28 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && - npinfo->poll_owner != smp_processor_id() && - netif_tx_trylock(dev)) { - /* try until next clock tick */ - for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (!netif_queue_stopped(dev)) - status = dev->hard_start_xmit(skb, dev); + npinfo->poll_owner != smp_processor_id()) { + unsigned long flags; - if (status == NETDEV_TX_OK) - break; + local_irq_save(flags); + if (netif_tx_trylock(dev)) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; + tries > 0; --tries) { + if (!netif_queue_stopped(dev)) + status = dev->hard_start_xmit(skb, dev); - /* tickle device maybe there is some cleanup */ - netpoll_poll(np); + if (status == NETDEV_TX_OK) + break; + + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); - udelay(USEC_PER_POLL); + udelay(USEC_PER_POLL); + } + netif_tx_unlock(dev); } - netif_tx_unlock(dev); + local_irq_restore(flags); } if (status != NETDEV_TX_OK) { @@ -330,6 +341,7 @@ static void arp_reply(struct sk_buff *skb) unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; __be32 sip, tip; + unsigned char *sha; struct sk_buff *send_skb; struct netpoll *np = NULL; @@ -356,9 +368,14 @@ static void arp_reply(struct sk_buff *skb) arp->ar_op != htons(ARPOP_REQUEST)) return; - arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; + arp_ptr = (unsigned char *)(arp+1); + /* save the location of the src hw addr */ + sha = arp_ptr; + arp_ptr += skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); - arp_ptr += 4 + skb->dev->addr_len; + arp_ptr += 4; + /* if we actually cared about dst hw addr, it would get copied here */ + arp_ptr += skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ @@ -381,7 +398,7 @@ static void arp_reply(struct sk_buff *skb) if (np->dev->hard_header && np->dev->hard_header(send_skb, skb->dev, ptype, - np->remote_mac, np->local_mac, + sha, np->local_mac, send_skb->len) < 0) { kfree_skb(send_skb); return; @@ -405,7 +422,7 @@ static void arp_reply(struct sk_buff *skb) arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &tip, 4); arp_ptr += 4; - memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); + memcpy(arp_ptr, sha, np->dev->addr_len); arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &sip, 4); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1897a3a385d..04d4b93c68e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -148,6 +148,7 @@ #include <linux/seq_file.h> #include <linux/wait.h> #include <linux/etherdevice.h> +#include <linux/kthread.h> #include <net/checksum.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -360,8 +361,7 @@ struct pktgen_thread { spinlock_t if_lock; struct list_head if_list; /* All device here */ struct list_head th_list; - int removed; - char name[32]; + struct task_struct *tsk; char result[512]; u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ @@ -1689,7 +1689,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->name, t->max_before_softirq); + t->tsk->comm, t->max_before_softirq); seq_printf(seq, "Running: "); @@ -3112,7 +3112,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t) { /* Remove from the thread list */ - remove_proc_entry(t->name, pg_proc_dir); + remove_proc_entry(t->tsk->comm, pg_proc_dir); mutex_lock(&pktgen_thread_lock); @@ -3260,58 +3260,40 @@ out:; * Main loop of the thread goes here */ -static void pktgen_thread_worker(struct pktgen_thread *t) +static int pktgen_thread_worker(void *arg) { DEFINE_WAIT(wait); + struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - sigset_t tmpsig; u32 max_before_softirq; u32 tx_since_softirq = 0; - daemonize("pktgen/%d", cpu); - - /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */ - - spin_lock_irq(¤t->sighand->siglock); - tmpsig = current->blocked; - siginitsetinv(¤t->blocked, - sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGTERM)); - - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - /* Migrate to the right CPU */ - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - BUG(); + BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); - t->control &= ~(T_TERMINATE); - t->control &= ~(T_RUN); - t->control &= ~(T_STOP); - t->control &= ~(T_REMDEVALL); - t->control &= ~(T_REMDEV); - t->pid = current->pid; PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid)); max_before_softirq = t->max_before_softirq; - __set_current_state(TASK_INTERRUPTIBLE); - mb(); + set_current_state(TASK_INTERRUPTIBLE); - while (1) { - - __set_current_state(TASK_RUNNING); + while (!kthread_should_stop()) { + pkt_dev = next_to_run(t); - /* - * Get next dev to xmit -- if any. - */ + if (!pkt_dev && + (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV)) + == 0) { + prepare_to_wait(&(t->queue), &wait, + TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + finish_wait(&(t->queue), &wait); + } - pkt_dev = next_to_run(t); + __set_current_state(TASK_RUNNING); if (pkt_dev) { @@ -3329,21 +3311,8 @@ static void pktgen_thread_worker(struct pktgen_thread *t) do_softirq(); tx_since_softirq = 0; } - } else { - prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); - finish_wait(&(t->queue), &wait); } - /* - * Back from sleep, either due to the timeout or signal. - * We check if we have any "posted" work for us. - */ - - if (t->control & T_TERMINATE || signal_pending(current)) - /* we received a request to terminate ourself */ - break; - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); @@ -3364,20 +3333,19 @@ static void pktgen_thread_worker(struct pktgen_thread *t) t->control &= ~(T_REMDEV); } - if (need_resched()) - schedule(); + set_current_state(TASK_INTERRUPTIBLE); } - PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name)); + PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm)); pktgen_stop(t); - PG_DEBUG(printk("pktgen: %s removing all device\n", t->name)); + PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm)); pktgen_rem_all_ifs(t); - PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name)); + PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm)); pktgen_rem_thread(t); - t->removed = 1; + return 0; } static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, @@ -3495,37 +3463,11 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); } -static struct pktgen_thread *__init pktgen_find_thread(const char *name) +static int __init pktgen_create_thread(int cpu) { struct pktgen_thread *t; - - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pktgen_threads, th_list) - if (strcmp(t->name, name) == 0) { - mutex_unlock(&pktgen_thread_lock); - return t; - } - - mutex_unlock(&pktgen_thread_lock); - return NULL; -} - -static int __init pktgen_create_thread(const char *name, int cpu) -{ - int err; - struct pktgen_thread *t = NULL; struct proc_dir_entry *pe; - - if (strlen(name) > 31) { - printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); - return -EINVAL; - } - - if (pktgen_find_thread(name)) { - printk("pktgen: ERROR: thread: %s already exists\n", name); - return -EINVAL; - } + struct task_struct *p; t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { @@ -3533,14 +3475,29 @@ static int __init pktgen_create_thread(const char *name, int cpu) return -ENOMEM; } - strcpy(t->name, name); spin_lock_init(&t->if_lock); t->cpu = cpu; - pe = create_proc_entry(t->name, 0600, pg_proc_dir); + INIT_LIST_HEAD(&t->if_list); + + list_add_tail(&t->th_list, &pktgen_threads); + + p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); + if (IS_ERR(p)) { + printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); + list_del(&t->th_list); + kfree(t); + return PTR_ERR(p); + } + kthread_bind(p, cpu); + t->tsk = p; + + pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir); if (!pe) { printk("pktgen: cannot create %s/%s procfs entry.\n", - PG_PROC_DIR, t->name); + PG_PROC_DIR, t->tsk->comm); + kthread_stop(p); + list_del(&t->th_list); kfree(t); return -EINVAL; } @@ -3548,21 +3505,7 @@ static int __init pktgen_create_thread(const char *name, int cpu) pe->proc_fops = &pktgen_thread_fops; pe->data = t; - INIT_LIST_HEAD(&t->if_list); - - list_add_tail(&t->th_list, &pktgen_threads); - - t->removed = 0; - - err = kernel_thread((void *)pktgen_thread_worker, (void *)t, - CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - if (err < 0) { - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); - remove_proc_entry(t->name, pg_proc_dir); - list_del(&t->th_list); - kfree(t); - return err; - } + wake_up_process(p); return 0; } @@ -3643,10 +3586,8 @@ static int __init pg_init(void) for_each_online_cpu(cpu) { int err; - char buf[30]; - sprintf(buf, "kpktgend_%i", cpu); - err = pktgen_create_thread(buf, cpu); + err = pktgen_create_thread(cpu); if (err) printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n", cpu, err); @@ -3674,9 +3615,8 @@ static void __exit pg_cleanup(void) list_for_each_safe(q, n, &pktgen_threads) { t = list_entry(q, struct pktgen_thread, th_list); - t->control |= (T_TERMINATE); - - wait_event_interruptible_timeout(queue, (t->removed == 1), HZ); + kthread_stop(t->tsk); + kfree(t); } /* Un-register us from receiving netdevice events */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e76539a5eb5..9bf9ae05f15 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -320,7 +320,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifi_family = AF_UNSPEC; @@ -384,7 +384,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -633,9 +634,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); - /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_size */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(nskb); + goto errout; + } err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: kfree(iw_buf); @@ -678,9 +682,12 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) goto errout; err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); - /* failure implies BUG in if_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in if_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index de7801d589e..f3404ae9f19 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -268,7 +268,7 @@ nodata: struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask) { - int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1; + int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; struct sk_buff *skb; skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 1f4727ddbdb..a086c6312d3 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -223,7 +223,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, gap = -new_head; } new_head += DCCP_MAX_ACKVEC_LEN; - } + } av->dccpav_buf_head = new_head; @@ -336,7 +336,7 @@ out_duplicate: void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) { dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); + (unsigned long long)ackno); while (len--) { const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index bcc2d12ae81..c65cb2453e4 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -43,8 +43,6 @@ struct ccid_operations { unsigned char* value); int (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); - int (*ccid_hc_tx_insert_options)(struct sock *sk, - struct sk_buff *skb); void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); int (*ccid_hc_tx_parse_options)(struct sock *sk, @@ -146,14 +144,6 @@ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, return rc; } -static inline int ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, - struct sk_buff *skb) -{ - if (ccid->ccid_ops->ccid_hc_tx_insert_options != NULL) - return ccid->ccid_ops->ccid_hc_tx_insert_options(sk, skb); - return 0; -} - static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, struct sk_buff *skb) { diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 2555be8f479..fd38b05d6f7 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -351,7 +351,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) while (seqp != hctx->ccid2hctx_seqh) { ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", - (unsigned long long)seqp->ccid2s_seq, + (unsigned long long)seqp->ccid2s_seq, seqp->ccid2s_acked, seqp->ccid2s_sent); seqp = seqp->ccid2s_next; } @@ -473,7 +473,7 @@ static inline void ccid2_new_ack(struct sock *sk, /* first measurement */ if (hctx->ccid2hctx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", - r, jiffies, + r, jiffies, (unsigned long long)seqp->ccid2s_seq); ccid2_change_srtt(hctx, r); hctx->ccid2hctx_rttvar = r >> 1; @@ -518,8 +518,8 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_lastrtt = jiffies; ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", - hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, - hctx->ccid2hctx_rto, HZ, r); + hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, + hctx->ccid2hctx_rto, HZ, r); hctx->ccid2hctx_sent = 0; } @@ -667,9 +667,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* new packet received or marked */ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == + if (state == DCCP_ACKVEC_STATE_ECN_MARKED) { - ccid2_congestion_event(hctx, + ccid2_congestion_event(hctx, seqp); } else ccid2_new_ack(sk, seqp, diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 66a27b9688c..5c452a3ec4d 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -41,27 +41,6 @@ #include "lib/tfrc.h" #include "ccid3.h" -/* - * Reason for maths here is to avoid 32 bit overflow when a is big. - * With this we get close to the limit. - */ -static u32 usecs_div(const u32 a, const u32 b) -{ - const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 : - a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 : - a < (UINT_MAX / (USEC_PER_SEC / 100)) ? 100 : - a < (UINT_MAX / (USEC_PER_SEC / 500)) ? 500 : - a < (UINT_MAX / (USEC_PER_SEC / 1000)) ? 1000 : - a < (UINT_MAX / (USEC_PER_SEC / 5000)) ? 5000 : - a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 : - a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 : - 100000; - const u32 tmp = a * (USEC_PER_SEC / div); - return (b >= 2 * div) ? tmp / (b / div) : tmp; -} - - - #ifdef CONFIG_IP_DCCP_CCID3_DEBUG static int ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) @@ -108,8 +87,9 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) { timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); - /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ - hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x); + /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ + hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x >> 6); /* Update nominal send time with regard to the new t_ipi */ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); @@ -128,40 +108,44 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx) * X = max(min(2 * X, 2 * X_recv), s / R); * tld = now; * + * Note: X and X_recv are both stored in units of 64 * bytes/second, to support + * fine-grained resolution of sending rates. This requires scaling by 2^6 + * throughout the code. Only X_calc is unscaled (in bytes/second). + * * If X has changed, we also update the scheduled send time t_now, * the inter-packet interval t_ipi, and the delta value. - */ + */ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - const __u32 old_x = hctx->ccid3hctx_x; + const __u64 old_x = hctx->ccid3hctx_x; if (hctx->ccid3hctx_p > 0) { - hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); - hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc, - hctx->ccid3hctx_x_recv * 2), - hctx->ccid3hctx_s / TFRC_T_MBI); - - } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >= - hctx->ccid3hctx_rtt) { - hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv, - hctx->ccid3hctx_x ) * 2, - usecs_div(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt) ); + + hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, + hctx->ccid3hctx_x_recv * 2); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); + + } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - + (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + + hctx->ccid3hctx_x = + max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv), + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = *now; - } else - ccid3_pr_debug("Not changing X\n"); + } if (hctx->ccid3hctx_x != old_x) ccid3_update_send_time(hctx); } /* - * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) - * @len: DCCP packet payload size in bytes + * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) + * @len: DCCP packet payload size in bytes */ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) { @@ -178,6 +162,33 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) */ } +/* + * Update Window Counter using the algorithm from [RFC 4342, 8.1]. + * The algorithm is not applicable if RTT < 4 microseconds. + */ +static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, + struct timeval *now) +{ + suseconds_t delta; + u32 quarter_rtts; + + if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ + return; + + delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); + DCCP_BUG_ON(delta < 0); + + quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); + + if (quarter_rtts > 0) { + hctx->ccid3hctx_t_last_win_count = *now; + hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ + + ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); + } +} + static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; @@ -191,20 +202,20 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) goto restart_timer; } - ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state)); - + switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_FBACK: /* RFC 3448, 4.4: Halve send rate directly */ - hctx->ccid3hctx_x = min_t(u32, hctx->ccid3hctx_x / 2, - hctx->ccid3hctx_s / TFRC_T_MBI); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + TFRC_T_MBI); - ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " - "bytes/s\n", - dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s), updated tx rate to %u " + "bytes/s\n", dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), - hctx->ccid3hctx_x); + (unsigned)(hctx->ccid3hctx_x >> 6)); /* The value of R is still undefined and so we can not recompute * the timout value. Keep initial value as per [RFC 4342, 5]. */ t_nfb = TFRC_INITIAL_TIMEOUT; @@ -213,34 +224,46 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) case TFRC_SSTATE_FBACK: /* * Check if IDLE since last timeout and recv rate is less than - * 4 packets per RTT + * 4 packets (in units of 64*bytes/sec) per RTT */ if (!hctx->ccid3hctx_idle || - (hctx->ccid3hctx_x_recv >= - 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { + (hctx->ccid3hctx_x_recv >= 4 * + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt))) { struct timeval now; - ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", + ccid3_pr_debug("%s(%p, state=%s), not idle\n", dccp_role(sk), sk, - ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* Halve sending rate */ + ccid3_tx_state_name(hctx->ccid3hctx_state)); - /* If (p == 0 || X_calc > 2 * X_recv) + /* + * Modify the cached value of X_recv [RFC 3448, 4.4] + * + * If (p == 0 || X_calc > 2 * X_recv) * X_recv = max(X_recv / 2, s / (2 * t_mbi)); * Else * X_recv = X_calc / 4; + * + * Note that X_recv is scaled by 2^6 while X_calc is not */ BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); if (hctx->ccid3hctx_p == 0 || - hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) - hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, - hctx->ccid3hctx_s / (2 * TFRC_T_MBI)); - else - hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; - - /* Update sending rate */ - dccp_timestamp(sk, &now); + (hctx->ccid3hctx_x_calc > + (hctx->ccid3hctx_x_recv >> 5))) { + + hctx->ccid3hctx_x_recv = + max(hctx->ccid3hctx_x_recv / 2, + (((__u64)hctx->ccid3hctx_s) << 6) / + (2 * TFRC_T_MBI)); + + if (hctx->ccid3hctx_p == 0) + dccp_timestamp(sk, &now); + } else { + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; + hctx->ccid3hctx_x_recv <<= 4; + } + /* Now recalculate X [RFC 3448, 4.3, step (4)] */ ccid3_hc_tx_update_x(sk, &now); } /* @@ -251,7 +274,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); break; case TFRC_SSTATE_NO_SENT: - DCCP_BUG("Illegal %s state NO_SENT, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state NO_SENT", dccp_role(sk), sk); /* fall through */ case TFRC_SSTATE_TERM: goto out; @@ -277,9 +300,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct dccp_tx_hist_entry *new_packet; struct timeval now; - long delay; + suseconds_t delay; BUG_ON(hctx == NULL); @@ -291,34 +313,21 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) if (unlikely(skb->len == 0)) return -EBADMSG; - /* See if last packet allocated was not sent */ - new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); - if (new_packet == NULL || new_packet->dccphtx_sent) { - new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, - GFP_ATOMIC); - - if (unlikely(new_packet == NULL)) { - DCCP_WARN("%s, sk=%p, not enough mem to add to history," - "send refused\n", dccp_role(sk), sk); - return -ENOBUFS; - } - - dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); - } - dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + (jiffies + + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); hctx->ccid3hctx_last_win_count = 0; hctx->ccid3hctx_t_last_win_count = now; ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - /* Set initial sending rate to 1 packet per second */ + /* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */ ccid3_hc_tx_update_s(hctx, skb->len); - hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_x = hctx->ccid3hctx_s; + hctx->ccid3hctx_x <<= 6; /* First timeout, according to [RFC 3448, 4.2], is 1 second */ hctx->ccid3hctx_t_ipi = USEC_PER_SEC; @@ -332,77 +341,57 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) case TFRC_SSTATE_FBACK: delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); /* - * Scheduling of packet transmissions [RFC 3448, 4.6] + * Scheduling of packet transmissions [RFC 3448, 4.6] * * if (t_now > t_nom - delta) * // send the packet now * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (long)hctx->ccid3hctx_delta >= 0) + if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) return delay / 1000L; + + ccid3_hc_tx_update_win_count(hctx, &now); break; case TFRC_SSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return -EINVAL; } /* prepare to send now (add options etc.) */ dp->dccps_hc_tx_insert_options = 1; - new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = - hctx->ccid3hctx_last_win_count; + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + + /* set the nominal send time for the next following packet */ timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); return 0; } -static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, + unsigned int len) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct timeval now; - unsigned long quarter_rtt; struct dccp_tx_hist_entry *packet; BUG_ON(hctx == NULL); - dccp_timestamp(sk, &now); - ccid3_hc_tx_update_s(hctx, len); - packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC); if (unlikely(packet == NULL)) { - DCCP_WARN("packet doesn't exist in history!\n"); - return; - } - if (unlikely(packet->dccphtx_sent)) { - DCCP_WARN("no unsent packet in history!\n"); + DCCP_CRIT("packet history - out of memory!"); return; } - packet->dccphtx_tstamp = now; - packet->dccphtx_seqno = dp->dccps_gss; - /* - * Check if win_count have changed - * Algorithm in "8.1. Window Counter Value" in RFC 4342. - */ - quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); - if (likely(hctx->ccid3hctx_rtt > 8)) - quarter_rtt /= hctx->ccid3hctx_rtt / 4; - - if (quarter_rtt > 0) { - hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + - min_t(unsigned long, quarter_rtt, 5)) % 16; - ccid3_pr_debug("%s, sk=%p, window changed from " - "%u to %u!\n", - dccp_role(sk), sk, - packet->dccphtx_ccval, - hctx->ccid3hctx_last_win_count); - } + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); - hctx->ccid3hctx_idle = 0; - packet->dccphtx_rtt = hctx->ccid3hctx_rtt; - packet->dccphtx_sent = 1; + dccp_timestamp(sk, &now); + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; + packet->dccphtx_sent = 1; + hctx->ccid3hctx_idle = 0; } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -414,7 +403,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct timeval now; unsigned long t_nfb; u32 pinv; - long r_sample, t_elapsed; + suseconds_t r_sample, t_elapsed; BUG_ON(hctx == NULL); @@ -430,44 +419,44 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_SSTATE_FBACK: /* get packet from history to look up t_recvdata */ packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + DCCP_SKB_CB(skb)->dccpd_ack_seq); if (unlikely(packet == NULL)) { DCCP_WARN("%s(%p), seqno %llu(%s) doesn't exist " "in history!\n", dccp_role(sk), sk, (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, - dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); return; } - /* Update receive rate */ + /* Update receive rate in units of 64 * bytes/second */ hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; + hctx->ccid3hctx_x_recv <<= 6; /* Update loss event rate */ pinv = opt_recv->ccid3or_loss_event_rate; - if (pinv == ~0U || pinv == 0) + if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ hctx->ccid3hctx_p = 0; - else - hctx->ccid3hctx_p = 1000000 / pinv; + else /* can not exceed 100% */ + hctx->ccid3hctx_p = 1000000 / pinv; dccp_timestamp(sk, &now); /* * Calculate new round trip sample as per [RFC 3448, 4.3] by - * R_sample = (now - t_recvdata) - t_elapsed + * R_sample = (now - t_recvdata) - t_elapsed */ r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10; - if (unlikely(r_sample <= 0)) { - DCCP_WARN("WARNING: R_sample (%ld) <= 0!\n", r_sample); - r_sample = 0; - } else if (unlikely(r_sample <= t_elapsed)) - DCCP_WARN("WARNING: r_sample=%ldus <= t_elapsed=%ldus\n", - r_sample, t_elapsed); + DCCP_BUG_ON(r_sample < 0); + if (unlikely(r_sample <= t_elapsed)) + DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n", + (int)r_sample, (int)t_elapsed); else r_sample -= t_elapsed; + CCID3_RTT_SANITY_CHECK(r_sample); - /* Update RTT estimate by + /* Update RTT estimate by * If (No feedback recv) * R = R_sample; * Else @@ -476,34 +465,46 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * q is a constant, RFC 3448 recomments 0.9 */ if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { - /* Use Larger Initial Windows [RFC 4342, sec. 5] - * We deviate in that we use `s' instead of `MSS'. */ - u16 w_init = max( 4 * hctx->ccid3hctx_s, - max(2 * hctx->ccid3hctx_s, 4380)); + /* + * Larger Initial Windows [RFC 4342, sec. 5] + * We deviate in that we use `s' instead of `MSS'. + */ + __u64 w_init = min(4 * hctx->ccid3hctx_s, + max(2 * hctx->ccid3hctx_s, 4380)); hctx->ccid3hctx_rtt = r_sample; - hctx->ccid3hctx_x = usecs_div(w_init, r_sample); + hctx->ccid3hctx_x = scaled_div(w_init << 6, r_sample); hctx->ccid3hctx_t_ld = now; ccid3_update_send_time(hctx); - ccid3_pr_debug("%s(%p), s=%u, w_init=%u, " - "R_sample=%ldus, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_s, w_init, r_sample, - hctx->ccid3hctx_x); + ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, " + "R_sample=%dus, X=%u\n", dccp_role(sk), + sk, hctx->ccid3hctx_s, + (unsigned long long)w_init, + (int)r_sample, + (unsigned)(hctx->ccid3hctx_x >> 6)); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); } else { hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt + - (u32)r_sample ) / 10; - + (u32)r_sample) / 10; + + /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ + if (hctx->ccid3hctx_p > 0) + hctx->ccid3hctx_x_calc = + tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); ccid3_hc_tx_update_x(sk, &now); - ccid3_pr_debug("%s(%p), RTT=%uus (sample=%ldus), s=%u, " - "p=%u, X_calc=%u, X=%u\n", dccp_role(sk), - sk, hctx->ccid3hctx_rtt, r_sample, + ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, " + "p=%u, X_calc=%u, X_recv=%u, X=%u\n", + dccp_role(sk), + sk, hctx->ccid3hctx_rtt, (int)r_sample, hctx->ccid3hctx_s, hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, - hctx->ccid3hctx_x); + (unsigned)(hctx->ccid3hctx_x_recv >> 6), + (unsigned)(hctx->ccid3hctx_x >> 6)); } /* unschedule no feedback timer */ @@ -513,57 +514,48 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) dccp_tx_hist_purge_older(ccid3_tx_hist, &hctx->ccid3hctx_hist, packet); /* - * As we have calculated new ipi, delta, t_nom it is possible that - * we now can send a packet, so wake up dccp_wait_for_ccid + * As we have calculated new ipi, delta, t_nom it is possible + * that we now can send a packet, so wake up dccp_wait_for_ccid */ sk->sk_write_space(sk); /* * Update timeout interval for the nofeedback timer. * We use a configuration option to increase the lower bound. - * This can help avoid triggering the nofeedback timer too often - * ('spinning') on LANs with small RTTs. + * This can help avoid triggering the nofeedback timer too + * often ('spinning') on LANs with small RTTs. */ hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, CONFIG_IP_DCCP_CCID3_RTO * - (USEC_PER_SEC/1000) ); + (USEC_PER_SEC/1000)); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) */ t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); - - ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to " + + ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", - dccp_role(sk), sk, - usecs_to_jiffies(t_nfb), t_nfb); + dccp_role(sk), + sk, usecs_to_jiffies(t_nfb), t_nfb); - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); /* set idle flag */ - hctx->ccid3hctx_idle = 1; + hctx->ccid3hctx_idle = 1; break; case TFRC_SSTATE_NO_SENT: - if (dccp_sk(sk)->dccps_role == DCCP_ROLE_CLIENT) - DCCP_WARN("Illegal ACK received - no packet sent\n"); + /* + * XXX when implementing bidirectional rx/tx check this again + */ + DCCP_WARN("Illegal ACK received - no packet sent\n"); /* fall through */ case TFRC_SSTATE_TERM: /* ignore feedback when closing */ break; } } -static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) -{ - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - - BUG_ON(hctx == NULL); - - if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN) - DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; - return 0; -} - static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, unsigned char len, u16 idx, unsigned char *value) @@ -588,13 +580,14 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, switch (option) { case TFRC_OPT_LOSS_EVENT_RATE: if (unlikely(len != 4)) { - DCCP_WARN("%s, sk=%p, invalid len %d " + DCCP_WARN("%s(%p), invalid len %d " "for TFRC_OPT_LOSS_EVENT_RATE\n", dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value); - ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", + opt_recv->ccid3or_loss_event_rate = + ntohl(*(__be32 *)value); + ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_event_rate); } @@ -602,20 +595,21 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, case TFRC_OPT_LOSS_INTERVALS: opt_recv->ccid3or_loss_intervals_idx = idx; opt_recv->ccid3or_loss_intervals_len = len; - ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", + ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", dccp_role(sk), sk, opt_recv->ccid3or_loss_intervals_idx, opt_recv->ccid3or_loss_intervals_len); break; case TFRC_OPT_RECEIVE_RATE: if (unlikely(len != 4)) { - DCCP_WARN("%s, sk=%p, invalid len %d " + DCCP_WARN("%s(%p), invalid len %d " "for TFRC_OPT_RECEIVE_RATE\n", dccp_role(sk), sk, len); rc = -EINVAL; } else { - opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value); - ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", + opt_recv->ccid3or_receive_rate = + ntohl(*(__be32 *)value); + ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", dccp_role(sk), sk, opt_recv->ccid3or_receive_rate); } @@ -630,10 +624,12 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); hctx->ccid3hctx_s = 0; + hctx->ccid3hctx_rtt = 0; hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; INIT_LIST_HEAD(&hctx->ccid3hctx_hist); - hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.function = + ccid3_hc_tx_no_feedback_timer; hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; init_timer(&hctx->ccid3hctx_no_feedback_timer); @@ -698,8 +694,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; struct timeval now; + suseconds_t delta; - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); dccp_timestamp(sk, &now); @@ -707,21 +704,21 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; - case TFRC_RSTATE_DATA: { - const u32 delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); - hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, - delta); - } + case TFRC_RSTATE_DATA: + delta = timeval_delta(&now, + &hcrx->ccid3hcrx_tstamp_last_feedback); + DCCP_BUG_ON(delta < 0); + hcrx->ccid3hcrx_x_recv = + scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); break; case TFRC_RSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return; } packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (unlikely(packet == NULL)) { - DCCP_WARN("%s, sk=%p, no data packet in history!\n", + DCCP_WARN("%s(%p), no data packet in history!\n", dccp_role(sk), sk); return; } @@ -730,13 +727,19 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; - /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = - timeval_delta(&now, &packet->dccphrx_tstamp) / 10; + /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ + delta = timeval_delta(&now, &packet->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + hcrx->ccid3hcrx_elapsed_time = delta / 10; + if (hcrx->ccid3hcrx_p == 0) - hcrx->ccid3hcrx_pinv = ~0; - else + hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ + else if (hcrx->ccid3hcrx_p > 1000000) { + DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p); + hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */ + } else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } @@ -764,9 +767,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_elapsed_time)) || dccp_insert_option_timestamp(sk, skb) || dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, - &pinv, sizeof(pinv)) || + &pinv, sizeof(pinv)) || dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, - &x_recv, sizeof(x_recv))) + &x_recv, sizeof(x_recv))) return -1; return 0; @@ -780,12 +783,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_rx_hist_entry *entry, *next, *tail = NULL; - u32 rtt, delta, x_recv, fval, p, tmp2; + u32 x_recv, p; + suseconds_t rtt, delta; struct timeval tstamp = { 0, }; int interval = 0; int win_count = 0; int step = 0; - u64 tmp1; + u64 fval; list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, dccphrx_node) { @@ -810,13 +814,13 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) } if (unlikely(step == 0)) { - DCCP_WARN("%s, sk=%p, packet history has no data packets!\n", + DCCP_WARN("%s(%p), packet history has no data packets!\n", dccp_role(sk), sk); return ~0; } if (unlikely(interval == 0)) { - DCCP_WARN("%s, sk=%p, Could not find a win_count interval > 0." + DCCP_WARN("%s(%p), Could not find a win_count interval > 0." "Defaulting to 1\n", dccp_role(sk), sk); interval = 1; } @@ -825,41 +829,51 @@ found: DCCP_CRIT("tail is null\n"); return ~0; } - rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; - ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", - dccp_role(sk), sk, rtt); - if (rtt == 0) { - DCCP_WARN("RTT==0, setting to 1\n"); - rtt = 1; + delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + DCCP_BUG_ON(delta < 0); + + rtt = delta * 4 / interval; + ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", + dccp_role(sk), sk, (int)rtt); + + /* + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = -------- + * R * fval + * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. + */ + if (rtt == 0) { /* would result in divide-by-zero */ + DCCP_WARN("RTT==0\n"); + return ~0; } dccp_timestamp(sk, &tstamp); delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); - - if (x_recv == 0) - x_recv = hcrx->ccid3hcrx_x_recv; - - tmp1 = (u64)x_recv * (u64)rtt; - do_div(tmp1,10000000); - tmp2 = (u32)tmp1; - - if (!tmp2) { - DCCP_CRIT("tmp2 = 0, x_recv = %u, rtt =%u\n", x_recv, rtt); - return ~0; + DCCP_BUG_ON(delta <= 0); + + x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); + if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { + DCCP_BUG("stored value of X_recv is zero"); + return ~0; + } } - fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; - /* do not alter order above or you will get overflow on 32 bit */ + fval = scaled_div(hcrx->ccid3hcrx_s, rtt); + fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); - ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied " + + ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); if (p == 0) return ~0; else - return 1000000 / p; + return 1000000 / p; } static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) @@ -913,7 +927,8 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + struct dccp_rx_hist_entry *rx_hist = + dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); u64 seqno = packet->dccphrx_seqno; u64 tmp_seqno; int loss = 0; @@ -941,7 +956,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk, dccp_inc_seqno(&tmp_seqno); while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, tmp_seqno, &ccval)) { - hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; hcrx->ccid3hcrx_ccval_nonloss = ccval; dccp_inc_seqno(&tmp_seqno); } @@ -967,7 +982,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; struct timeval now; - u32 p_prev, rtt_prev, r_sample, t_elapsed; + u32 p_prev, rtt_prev; + suseconds_t r_sample, t_elapsed; int loss, payload_size; BUG_ON(hcrx == NULL); @@ -987,11 +1003,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample = timeval_usecs(&now); t_elapsed = opt_recv->dccpor_elapsed_time * 10; + DCCP_BUG_ON(r_sample < 0); if (unlikely(r_sample <= t_elapsed)) - DCCP_WARN("r_sample=%uus, t_elapsed=%uus\n", - r_sample, t_elapsed); + DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n", + (long)r_sample, (long)t_elapsed); else r_sample -= t_elapsed; + CCID3_RTT_SANITY_CHECK(r_sample); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -1000,8 +1018,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) r_sample / 10; if (rtt_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", - dccp_role(sk), hcrx->ccid3hcrx_rtt, + ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; case DCCP_PKT_DATA: @@ -1013,7 +1031,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { - DCCP_WARN("%s, sk=%p, Not enough mem to add rx packet " + DCCP_WARN("%s(%p), Not enough mem to add rx packet " "to history, consider it lost!\n", dccp_role(sk), sk); return; } @@ -1028,9 +1046,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: - ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial " - "feedback\n", - dccp_role(sk), sk, + ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " + "feedback\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); ccid3_hc_rx_send_feedback(sk); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); @@ -1041,19 +1058,19 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) break; dccp_timestamp(sk, &now); - if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= - hcrx->ccid3hcrx_rtt) { + if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - + (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } return; case TFRC_RSTATE_TERM: - DCCP_BUG("Illegal %s state TERM, sk=%p", dccp_role(sk), sk); + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); return; } /* Dealing with packet loss */ - ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", + ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); p_prev = hcrx->ccid3hcrx_p; @@ -1078,7 +1095,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); - ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + ccid3_pr_debug("entry\n"); hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); @@ -1086,7 +1103,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; hcrx->ccid3hcrx_s = 0; - hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ + hcrx->ccid3hcrx_rtt = 0; return 0; } @@ -1115,9 +1132,9 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) BUG_ON(hcrx == NULL); - info->tcpi_ca_state = hcrx->ccid3hcrx_state; - info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; + info->tcpi_ca_state = hcrx->ccid3hcrx_state; + info->tcpi_options |= TCPI_OPT_TIMESTAMPS; + info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; } static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) @@ -1198,7 +1215,6 @@ static struct ccid_operations ccid3 = { .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, - .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock), .ccid_hc_rx_init = ccid3_hc_rx_init, @@ -1210,7 +1226,7 @@ static struct ccid_operations ccid3 = { .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; - + #ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, int, 0444); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); @@ -1233,7 +1249,7 @@ static __init int ccid3_module_init(void) goto out_free_tx; rc = ccid_register(&ccid3); - if (rc != 0) + if (rc != 0) goto out_free_loss_interval_history; out: return rc; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 07596d704ef..15776a88c09 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -51,6 +51,16 @@ /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ #define TFRC_T_MBI 64 +/* What we think is a reasonable upper limit on RTT values */ +#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC)) + +#define CCID3_RTT_SANITY_CHECK(rtt) do { \ + if (rtt > CCID3_SANE_RTT_MAX) { \ + DCCP_CRIT("RTT (%d) too large, substituting %d", \ + (int)rtt, (int)CCID3_SANE_RTT_MAX); \ + rtt = CCID3_SANE_RTT_MAX; \ + } } while (0) + enum ccid3_options { TFRC_OPT_LOSS_EVENT_RATE = 192, TFRC_OPT_LOSS_INTERVALS = 193, @@ -67,7 +77,7 @@ struct ccid3_options_received { /* TFRC sender states */ enum ccid3_hc_tx_states { - TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_SENT = 1, TFRC_SSTATE_NO_FBACK, TFRC_SSTATE_FBACK, TFRC_SSTATE_TERM, @@ -75,23 +85,23 @@ enum ccid3_hc_tx_states { /** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket * - * @ccid3hctx_x - Current sending rate - * @ccid3hctx_x_recv - Receive rate - * @ccid3hctx_x_calc - Calculated send rate (RFC 3448, 3.1) + * @ccid3hctx_x - Current sending rate in 64 * bytes per second + * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second + * @ccid3hctx_x_calc - Calculated rate in bytes per second * @ccid3hctx_rtt - Estimate of current round trip time in usecs * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 - * @ccid3hctx_s - Packet size - * @ccid3hctx_t_rto - Retransmission Timeout (RFC 3448, 3.1) - * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) + * @ccid3hctx_s - Packet size in bytes + * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs + * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states * @ccid3hctx_last_win_count - Last window counter sent * @ccid3hctx_t_last_win_count - Timestamp of earliest packet - * with last_win_count value sent + * with last_win_count value sent * @ccid3hctx_no_feedback_timer - Handle to no feedback timer * @ccid3hctx_idle - Flag indicating that sender is idling * @ccid3hctx_t_ld - Time last doubled during slow start * @ccid3hctx_t_nom - Nominal send time of next packet - * @ccid3hctx_delta - Send timer delta + * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs * @ccid3hctx_hist - Packet history * @ccid3hctx_options_received - Parsed set of retrieved options */ @@ -105,7 +115,7 @@ struct ccid3_hc_tx_sock { #define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto #define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi u16 ccid3hctx_s; - enum ccid3_hc_tx_states ccid3hctx_state:8; + enum ccid3_hc_tx_states ccid3hctx_state:8; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; struct timeval ccid3hctx_t_last_win_count; @@ -119,7 +129,7 @@ struct ccid3_hc_tx_sock { /* TFRC receiver states */ enum ccid3_hc_rx_states { - TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_NO_DATA = 1, TFRC_RSTATE_DATA, TFRC_RSTATE_TERM = 127, }; @@ -147,18 +157,18 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_nonloss:48, + u64 ccid3hcrx_seqno_nonloss:48, ccid3hcrx_ccval_nonloss:4, ccid3hcrx_ccval_last_counter:4; enum ccid3_hc_rx_states ccid3hcrx_state:8; - u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; struct list_head ccid3hcrx_li_hist; - u16 ccid3hcrx_s; - u32 ccid3hcrx_pinv; - u32 ccid3hcrx_elapsed_time; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index b876c9c81c6..2e8ef42721e 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -36,9 +36,100 @@ #include <linux/module.h> #include <linux/string.h> - #include "packet_history.h" +/* + * Transmitter History Routines + */ +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +/* + * Receiver History Routines + */ struct dccp_rx_hist *dccp_rx_hist_new(const char *name) { struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); @@ -83,18 +174,24 @@ void dccp_rx_hist_delete(struct dccp_rx_hist *hist) EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); -void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) { - struct dccp_rx_hist_entry *entry, *next; + struct dccp_rx_hist_entry *packet = NULL, *entry; - list_for_each_entry_safe(entry, next, list, dccphrx_node) { - list_del_init(&entry->dccphrx_node); - kmem_cache_free(hist->dccprxh_slab, entry); - } -} + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } -EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + if (packet) + *ccval = packet->dccphrx_ccval; + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); struct dccp_rx_hist_entry * dccp_rx_hist_find_data_packet(const struct list_head *list) { @@ -184,110 +281,18 @@ void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -struct dccp_tx_hist *dccp_tx_hist_new(const char *name) -{ - struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); - static const char dccp_tx_hist_mask[] = "tx_hist_%s"; - char *slab_name; - - if (hist == NULL) - goto out; - - slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, - GFP_ATOMIC); - if (slab_name == NULL) - goto out_free_hist; - - sprintf(slab_name, dccp_tx_hist_mask, name); - hist->dccptxh_slab = kmem_cache_create(slab_name, - sizeof(struct dccp_tx_hist_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (hist->dccptxh_slab == NULL) - goto out_free_slab_name; -out: - return hist; -out_free_slab_name: - kfree(slab_name); -out_free_hist: - kfree(hist); - hist = NULL; - goto out; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_new); - -void dccp_tx_hist_delete(struct dccp_tx_hist *hist) -{ - const char* name = kmem_cache_name(hist->dccptxh_slab); - - kmem_cache_destroy(hist->dccptxh_slab); - kfree(name); - kfree(hist); -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); - -struct dccp_tx_hist_entry * - dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq) -{ - struct dccp_tx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphtx_node) - if (entry->dccphtx_seqno == seq) { - packet = entry; - break; - } - - return packet; -} - -EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); - -int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, - u8 *ccval) -{ - struct dccp_rx_hist_entry *packet = NULL, *entry; - - list_for_each_entry(entry, list, dccphrx_node) - if (entry->dccphrx_seqno == seq) { - packet = entry; - break; - } - - if (packet) - *ccval = packet->dccphrx_ccval; - - return packet != NULL; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); - -void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, - struct list_head *list, - struct dccp_tx_hist_entry *packet) +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) { - struct dccp_tx_hist_entry *next; + struct dccp_rx_hist_entry *entry, *next; - list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { - list_del_init(&packet->dccphtx_node); - dccp_tx_hist_entry_delete(hist, packet); + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); } } -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); - -void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) -{ - struct dccp_tx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, list, dccphtx_node) { - list_del_init(&entry->dccphtx_node); - dccp_tx_hist_entry_delete(hist, entry); - } -} +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); -EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 9a8bcf224aa..1f960c19ea1 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -49,43 +49,27 @@ #define TFRC_WIN_COUNT_PER_RTT 4 #define TFRC_WIN_COUNT_LIMIT 16 +/* + * Transmitter History data structures and declarations + */ struct dccp_tx_hist_entry { struct list_head dccphtx_node; u64 dccphtx_seqno:48, - dccphtx_ccval:4, dccphtx_sent:1; u32 dccphtx_rtt; struct timeval dccphtx_tstamp; }; -struct dccp_rx_hist_entry { - struct list_head dccphrx_node; - u64 dccphrx_seqno:48, - dccphrx_ccval:4, - dccphrx_type:4; - u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; -}; - struct dccp_tx_hist { struct kmem_cache *dccptxh_slab; }; extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); -extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); - -struct dccp_rx_hist { - struct kmem_cache *dccprxh_slab; -}; - -extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); -extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); -extern struct dccp_rx_hist_entry * - dccp_rx_hist_find_data_packet(const struct list_head *list); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); static inline struct dccp_tx_hist_entry * - dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, - const gfp_t prio) + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const gfp_t prio) { struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, prio); @@ -96,18 +80,20 @@ static inline struct dccp_tx_hist_entry * return entry; } -static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, - struct dccp_tx_hist_entry *entry) +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_head(struct list_head *list) { - if (entry != NULL) - kmem_cache_free(hist->dccptxh_slab, entry); + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; } extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); -extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, - u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) @@ -115,30 +101,45 @@ static inline void dccp_tx_hist_add_entry(struct list_head *list, list_add(&entry->dccphtx_node, list); } +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *next); -extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, - struct list_head *list); +/* + * Receiver History data structures and declarations + */ +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_ccval:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; -static inline struct dccp_tx_hist_entry * - dccp_tx_hist_head(struct list_head *list) -{ - struct dccp_tx_hist_entry *head = NULL; +struct dccp_rx_hist { + struct kmem_cache *dccprxh_slab; +}; - if (!list_empty(list)) - head = list_entry(list->next, struct dccp_tx_hist_entry, - dccphtx_node); - return head; -} +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); static inline struct dccp_rx_hist_entry * - dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, - const u32 ndp, - const struct sk_buff *skb, - const gfp_t prio) + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, + const u32 ndp, + const struct sk_buff *skb, + const gfp_t prio) { struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, prio); @@ -156,18 +157,8 @@ static inline struct dccp_rx_hist_entry * return entry; } -static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, - struct dccp_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(hist->dccprxh_slab, entry); -} - -extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, - struct list_head *list); - static inline struct dccp_rx_hist_entry * - dccp_rx_hist_head(struct list_head *list) + dccp_rx_hist_head(struct list_head *list) { struct dccp_rx_hist_entry *head = NULL; @@ -177,6 +168,27 @@ static inline struct dccp_rx_hist_entry * return head; } +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, + struct list_head *rx_list, + struct list_head *li_list, + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + static inline int dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) { @@ -184,12 +196,6 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, - struct list_head *rx_list, - struct list_head *li_list, - struct dccp_rx_hist_entry *packet, - u64 nonloss_seqno); - extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 45f30f59ea2..faf5f7e219e 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -13,8 +13,29 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - #include <linux/types.h> +#include <asm/div64.h> + +/* integer-arithmetic divisions of type (a * 1000000)/b */ +static inline u64 scaled_div(u64 a, u32 b) +{ + BUG_ON(b==0); + a *= 1000000; + do_div(a, b); + return a; +} + +static inline u32 scaled_div32(u64 a, u32 b) +{ + u64 result = scaled_div(a, b); + + if (result > UINT_MAX) { + DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", + (unsigned long long)a, b); + return UINT_MAX; + } + return result; +} extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index ddac2c511e2..90009fd77e1 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -13,7 +13,6 @@ */ #include <linux/module.h> -#include <asm/div64.h> #include "../../dccp.h" #include "tfrc.h" @@ -616,15 +615,12 @@ static inline u32 tfrc_binsearch(u32 fval, u8 small) * @R: RTT scaled by 1000000 (i.e., microseconds) * @p: loss ratio estimate scaled by 1000000 * Returns X_calc in bytes per second (not scaled). - * - * Note: DO NOT alter this code unless you run test cases against it, - * as the code has been optimized to stop underflow/overflow. */ u32 tfrc_calc_x(u16 s, u32 R, u32 p) { - int index; + u16 index; u32 f; - u64 tmp1, tmp2; + u64 result; /* check against invalid parameters and divide-by-zero */ BUG_ON(p > 1000000); /* p must not exceed 100% */ @@ -650,15 +646,17 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) f = tfrc_calc_x_lookup[index][0]; } - /* The following computes X = s/(R*f(p)) in bytes per second. Since f(p) - * and R are both scaled by 1000000, we need to multiply by 1000000^2. - * ==> DO NOT alter this unless you test against overflow on 32 bit */ - tmp1 = ((u64)s * 100000000); - tmp2 = ((u64)R * (u64)f); - do_div(tmp2, 10000); - do_div(tmp1, tmp2); - - return (u32)tmp1; + /* + * Compute X = s/(R*f(p)) in bytes per second. + * Since f(p) and R are both scaled by 1000000, we need to multiply by + * 1000000^2. To avoid overflow, the result is computed in two stages. + * This works under almost all reasonable operational conditions, for a + * wide range of parameters. Yet, should some strange combination of + * parameters result in overflow, the use of scaled_div32 will catch + * this and return UINT_MAX - which is a logically adequate consequence. + */ + result = scaled_div(s, R); + return scaled_div32(result, f); } EXPORT_SYMBOL_GPL(tfrc_calc_x); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 68886986c8e..a0900bf98e6 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -80,8 +80,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ -#define DCCP_XMIT_TIMEO 30000 /* Time/msecs for blocking transmit per packet */ - /* sysctl variables for DCCP */ extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; @@ -434,6 +432,7 @@ static inline void timeval_sub_usecs(struct timeval *tv, tv->tv_sec--; tv->tv_usec += USEC_PER_SEC; } + DCCP_BUG_ON(tv->tv_sec < 0); } #ifdef CONFIG_SYSCTL diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4dc487f27a1..95b6927ec65 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -329,7 +329,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, switch (type) { case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break; case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break; - default: DCCP_WARN("invalid type %d\n", type); return; + default: DCCP_WARN("invalid type %d\n", type); return; } opt->dccpop_feat = feature; @@ -427,7 +427,7 @@ int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, switch (type) { case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break; case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break; - default: DCCP_WARN("invalid type %d\n", type); + default: DCCP_WARN("invalid type %d\n", type); return 1; } @@ -610,7 +610,7 @@ const char *dccp_feat_typename(const u8 type) case DCCPO_CHANGE_R: return("ChangeR"); case DCCPO_CONFIRM_R: return("ConfirmR"); /* the following case must not appear in feature negotation */ - default: dccp_pr_debug("unknown type %d [BUG!]\n", type); + default: dccp_pr_debug("unknown type %d [BUG!]\n", type); } return NULL; } diff --git a/net/dccp/input.c b/net/dccp/input.c index 7371a2f3acf..565bc80557c 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -1,6 +1,6 @@ /* * net/dccp/input.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -82,7 +82,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Otherwise, * Drop packet and return */ - if (dh->dccph_type == DCCP_PKT_SYNC || + if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && @@ -185,8 +185,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, dccp_rcv_close(sk, skb); return 0; case DCCP_PKT_REQUEST: - /* Step 7 - * or (S.is_server and P.type == Response) + /* Step 7 + * or (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state >= OPEN and P.type == Request * and P.seqno >= S.OSR) @@ -248,8 +248,18 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + /* + * Deliver to the CCID module in charge. + * FIXME: Currently DCCP operates one-directional only, i.e. a listening + * server is not at the same time a connecting client. There is + * not much sense in delivering to both rx/tx sides at the moment + * (only one is active at a time); when moving to bidirectional + * service, this needs to be revised. + */ + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + else + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: @@ -264,7 +274,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, const struct dccp_hdr *dh, const unsigned len) { - /* + /* * Step 4: Prepare sequence numbers in REQUEST * If S.state == REQUEST, * If (P.type == Response or P.type == Reset) @@ -332,7 +342,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, * from the Response * / * S.state := PARTOPEN * Set PARTOPEN timer - * Continue with S.state == PARTOPEN + * Continue with S.state == PARTOPEN * / * Step 12 will send the Ack completing the * three-way handshake * / */ @@ -363,7 +373,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, */ __kfree_skb(skb); return 0; - } + } dccp_send_ack(sk); return -1; } @@ -371,7 +381,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, out_invalid_packet: /* dccp_v4_do_rcv will send a reset */ DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; - return 1; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -478,14 +488,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKVEC_STATE_RECEIVED)) + goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + /* XXX see the comments in dccp_rcv_established about this */ + if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + else + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); } /* @@ -567,7 +580,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } } - if (!queued) { + if (!queued) { discard: __kfree_skb(skb); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ff81679c9f1..fa2c982d430 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -72,7 +72,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - inet->sport, usin->sin_port, sk); + inet->sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; @@ -157,7 +157,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the + * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if ((dst = __sk_dst_check(sk, 0)) == NULL) @@ -467,7 +467,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, .dport = dccp_hdr(skb)->dccph_sport } - } + } }; security_skb_classify_flow(skb, &fl); @@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct inet_request_sock *ireq; struct request_sock *req; struct dccp_request_sock *dreq; - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -609,7 +609,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_bad_service_code(sk, service)) { reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; - } + } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -644,7 +644,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = skb->nh.iph->saddr; ireq->opt = NULL; - /* + /* * Step 3: Process LISTEN state * * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie @@ -846,15 +846,15 @@ static int dccp_v4_rcv(struct sk_buff *skb) } /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); - /* + /* * Step 2: - * If no socket ... + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -862,9 +862,9 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } - /* + /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ @@ -876,8 +876,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage - * o if MinCsCov = 0, only packets with CsCov = 0 are accepted - * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov */ min_cov = dccp_sk(sk)->dccps_pcrlen; if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { @@ -900,7 +900,7 @@ no_dccp_socket: goto discard_it; /* * Step 2: - * If no socket ... + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c7aaa2574f5..79140b3e592 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -77,7 +77,7 @@ static inline void dccp_v6_send_check(struct sock *sk, int unused_value, } static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) + __be16 sport, __be16 dport ) { return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); } @@ -329,7 +329,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); if (skb == NULL) - return; + return; skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header); @@ -353,7 +353,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr, - &rxskb->nh.ipv6h->daddr); + &rxskb->nh.ipv6h->daddr); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); @@ -424,7 +424,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; struct inet6_request_sock *ireq6; struct ipv6_pinfo *np = inet6_sk(sk); - const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; @@ -437,7 +437,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_bad_service_code(sk, service)) { reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; - } + } /* * There are no SYN attacks on IPv6, yet... */ @@ -787,7 +787,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if (nsk != sk) { + if (nsk != sk) { if (dccp_child_process(sk, nsk, skb)) goto reset; if (opt_skb != NULL) @@ -843,14 +843,14 @@ static int dccp_v6_rcv(struct sk_buff **pskb) DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr, dh->dccph_sport, &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport), inet6_iif(skb)); /* * Step 2: - * If no socket ... + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -860,7 +860,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ @@ -872,8 +872,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb) /* * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage - * o if MinCsCov = 0, only packets with CsCov = 0 are accepted - * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov */ min_cov = dccp_sk(sk)->dccps_pcrlen; if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { @@ -893,7 +893,7 @@ no_dccp_socket: goto discard_it; /* * Step 2: - * If no socket ... + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ @@ -1041,7 +1041,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(&dst, &fl, sk, 1); if (err < 0) goto failure; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 4c9e26775f7..6656bb497c7 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -182,7 +182,7 @@ out_free: EXPORT_SYMBOL_GPL(dccp_create_openreq_child); -/* +/* * Process an incoming packet for RESPOND sockets represented * as an request_sock. */ diff --git a/net/dccp/options.c b/net/dccp/options.c index f398b43bc05..c03ba61eb6d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -557,11 +557,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) return -1; dp->dccps_hc_rx_insert_options = 0; } - if (dp->dccps_hc_tx_insert_options) { - if (ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb)) - return -1; - dp->dccps_hc_tx_insert_options = 0; - } /* Feature negotiation */ /* Data packets can't do feat negotiation */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 400c30b6fca..3435542e965 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -1,6 +1,6 @@ /* * net/dccp/output.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -124,7 +124,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, 0); return net_xmit_eval(err); } return -ENOBUFS; @@ -175,14 +175,12 @@ void dccp_write_space(struct sock *sk) /** * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet * @sk: socket to wait for - * @timeo: for how long */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, - long *timeo) +static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - long delay; + unsigned long delay; int rc; while (1) { @@ -190,8 +188,6 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (sk->sk_err) goto do_error; - if (!*timeo) - goto do_nonblock; if (signal_pending(current)) goto do_interrupted; @@ -199,12 +195,9 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (rc <= 0) break; delay = msecs_to_jiffies(rc); - if (delay > *timeo || delay < 0) - goto do_nonblock; - sk->sk_write_pending++; release_sock(sk); - *timeo -= schedule_timeout(delay); + schedule_timeout(delay); lock_sock(sk); sk->sk_write_pending--; } @@ -215,11 +208,8 @@ out: do_error: rc = -EPIPE; goto out; -do_nonblock: - rc = -EAGAIN; - goto out; do_interrupted: - rc = sock_intr_errno(*timeo); + rc = -EINTR; goto out; } @@ -240,8 +230,6 @@ void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - long timeo = DCCP_XMIT_TIMEO; /* If a packet is taking longer than - this we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); @@ -251,11 +239,9 @@ void dccp_write_xmit(struct sock *sk, int block) sk_reset_timer(sk, &dp->dccps_xmit_timer, msecs_to_jiffies(err)+jiffies); break; - } else { - err = dccp_wait_for_ccid(sk, skb, &timeo); - timeo = DCCP_XMIT_TIMEO; - } - if (err) + } else + err = dccp_wait_for_ccid(sk, skb); + if (err && err != -EINTR) DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } @@ -281,8 +267,10 @@ void dccp_write_xmit(struct sock *sk, int block) if (err) DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", err); - } else + } else { + dccp_pr_debug("packet discarded\n"); kfree(skb); + } } } @@ -350,7 +338,6 @@ EXPORT_SYMBOL_GPL(dccp_make_response); static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, const enum dccp_reset_codes code) - { struct dccp_hdr *dh; struct dccp_sock *dp = dccp_sk(sk); @@ -409,7 +396,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) code); if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); + err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); return net_xmit_eval(err); } } @@ -431,14 +418,14 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* + /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: * SWL := max(GSR + 1 - floor(W/4), ISR), * AWL := max(GSS - W' + 1, ISS). * These adjustments MUST be applied only at the beginning of the * connection. - */ + */ dccp_update_gss(sk, dp->dccps_iss); dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 5ec47d9ee44..48438565d70 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -196,7 +196,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) sk, GFP_KERNEL); dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, sk, GFP_KERNEL); - if (unlikely(dp->dccps_hc_rx_ccid == NULL || + if (unlikely(dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL)) { ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); @@ -390,7 +390,7 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; - if (service == DCCP_SERVICE_INVALID_VALUE || + if (service == DCCP_SERVICE_INVALID_VALUE || optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) return -EINVAL; @@ -830,7 +830,7 @@ EXPORT_SYMBOL_GPL(inet_dccp_listen); static const unsigned char dccp_new_state[] = { /* current state: new state: action: */ [0] = DCCP_CLOSED, - [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, [DCCP_REQUESTING] = DCCP_CLOSED, [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, [DCCP_LISTEN] = DCCP_CLOSED, @@ -1024,7 +1024,6 @@ static int __init dccp_init(void) do { dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); - dccp_hashinfo.ehash_size >>= 1; while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) dccp_hashinfo.ehash_size--; @@ -1037,9 +1036,10 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { + for (i = 0; i < dccp_hashinfo.ehash_size; i++) { rwlock_init(&dccp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); + INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); } bhash_order = ehash_order; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index e8f519e7f48..e5348f369c6 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -1,6 +1,6 @@ /* * net/dccp/timer.c - * + * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * @@ -102,13 +102,13 @@ static void dccp_retransmit_timer(struct sock *sk) * sk->sk_send_head has to have one skb with * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP * packet types. The only packets eligible for retransmission are: - * -- Requests in client-REQUEST state (sec. 8.1.1) - * -- Acks in client-PARTOPEN state (sec. 8.1.5) - * -- CloseReq in server-CLOSEREQ state (sec. 8.3) - * -- Close in node-CLOSING state (sec. 8.3) */ + * -- Requests in client-REQUEST state (sec. 8.1.1) + * -- Acks in client-PARTOPEN state (sec. 8.1.5) + * -- CloseReq in server-CLOSEREQ state (sec. 8.3) + * -- Close in node-CLOSING state (sec. 8.3) */ BUG_TRAP(sk->sk_send_head != NULL); - /* + /* * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was * sent, no need to retransmit, this sock is dead. */ @@ -200,7 +200,7 @@ static void dccp_keepalive_timer(unsigned long data) /* Only process if socket is not in use. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - /* Try again later. */ + /* Try again later. */ inet_csk_reset_keepalive_timer(sk, HZ / 20); goto out; } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0b9d4c95515..90b3dfd72b4 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -167,8 +167,7 @@ static int dn_forwarding_proc(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context); + void __user *newval, size_t newlen); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; @@ -347,8 +346,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { #ifdef CONFIG_DECNET_ROUTER struct net_device *dev = table->extra1; @@ -751,7 +749,7 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifa_family = AF_DECnet; @@ -770,7 +768,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) @@ -783,9 +782,12 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) goto errout; err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0); - /* failure implies BUG in dn_ifaddr_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); errout: if (err < 0) @@ -1147,16 +1149,23 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err) init_timer(&dn_db->timer); dn_db->uptime = jiffies; + + dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); + if (!dn_db->neigh_parms) { + dev->dn_ptr = NULL; + kfree(dn_db); + return NULL; + } + if (dn_db->parms.up) { if (dn_db->parms.up(dev) < 0) { + neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); dev->dn_ptr = NULL; kfree(dn_db); return NULL; } } - dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); - dn_dev_sysctl_register(dev, &dn_db->parms); dn_dev_set_timer(dev); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 13b2421991b..c1f0cc1b1c6 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -350,7 +350,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nlmsg_failure: rtattr_failure: skb_trim(skb, b - skb->data); - return -1; + return -EMSGSIZE; } @@ -368,9 +368,12 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); - /* failure implies BUG in dn_fib_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); errout: if (err < 0) diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index e246f054f36..a4065eb1341 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str) static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { size_t len; __le16 addr; @@ -220,8 +219,7 @@ static int dn_node_address_handler(ctl_table *table, int write, static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { size_t len; struct net_device *dev; diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index eec1a1dd91d..a824852909e 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -167,7 +167,7 @@ static void ieee80211softmac_assoc_notify_scan(struct net_device *dev, int event_type, void *context) { struct ieee80211softmac_device *mac = ieee80211_priv(dev); - ieee80211softmac_assoc_work((void*)mac); + ieee80211softmac_assoc_work(&mac->associnfo.work.work); } static void @@ -177,7 +177,7 @@ ieee80211softmac_assoc_notify_auth(struct net_device *dev, int event_type, void switch (event_type) { case IEEE80211SOFTMAC_EVENT_AUTHENTICATED: - ieee80211softmac_assoc_work((void*)mac); + ieee80211softmac_assoc_work(&mac->associnfo.work.work); break; case IEEE80211SOFTMAC_EVENT_AUTH_FAILED: case IEEE80211SOFTMAC_EVENT_AUTH_TIMEOUT: @@ -438,7 +438,7 @@ ieee80211softmac_try_reassoc(struct ieee80211softmac_device *mac) spin_lock_irqsave(&mac->lock, flags); mac->associnfo.associating = 1; - schedule_work(&mac->associnfo.work); + schedule_delayed_work(&mac->associnfo.work, 0); spin_unlock_irqrestore(&mac->lock, flags); } diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 480d72c7a42..fb58e03b3fb 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -265,6 +265,12 @@ ieee80211softmac_wx_get_rate(struct net_device *net_dev, int err = -EINVAL; spin_lock_irqsave(&mac->lock, flags); + + if (unlikely(!mac->running)) { + err = -ENODEV; + goto out_unlock; + } + switch (mac->txrates.default_rate) { case IEEE80211_CCK_RATE_1MB: data->bitrate.value = 1000000; @@ -463,7 +469,7 @@ ieee80211softmac_wx_get_genie(struct net_device *dev, err = -E2BIG; } spin_unlock_irqrestore(&mac->lock, flags); - mutex_lock(&mac->associnfo.mutex); + mutex_unlock(&mac->associnfo.mutex); return err; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1144900d37f..5750a2b2a0d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -305,7 +305,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); - inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; + inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { inet->num = protocol; @@ -1007,7 +1007,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->sport, inet->dport, sk); + inet->sport, inet->dport, sk, 0); if (err) return err; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 7b068a89195..0072d79f0c2 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->sport, usin->sin_port, sk); + inet->sport, usin->sin_port, sk, 1); if (err) return err; if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2fd899160f8..c4020364096 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -165,9 +165,8 @@ struct in_device *inetdev_init(struct net_device *dev) NET_IPV4_NEIGH, "ipv4", NULL, NULL); #endif - /* Account for reference dev->ip_ptr */ + /* Account for reference dev->ip_ptr (below) */ in_dev_hold(in_dev); - rcu_assign_pointer(dev->ip_ptr, in_dev); #ifdef CONFIG_SYSCTL devinet_sysctl_register(in_dev, &in_dev->cnf); @@ -175,6 +174,9 @@ struct in_device *inetdev_init(struct net_device *dev) ip_mc_init_dev(in_dev); if (dev->flags & IFF_UP) ip_mc_up(in_dev); + + /* we can receive as soon as ip_ptr is set -- do this last */ + rcu_assign_pointer(dev->ip_ptr, in_dev); out: return in_dev; out_kfree: @@ -1138,7 +1140,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; @@ -1165,7 +1167,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1223,9 +1226,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, goto errout; err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); - /* failure implies BUG in inet_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err < 0) @@ -1303,8 +1309,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { int *valp = table->data; int new; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e63b8a98fb4..be1028c9933 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -314,9 +314,12 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, err = fib_dump_info(skb, info->pid, seq, event, tb_id, fa->fa_type, fa->fa_scope, key, dst_len, fa->fa_tos, fa->fa_info, 0); - /* failure implies BUG in fib_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); errout: @@ -960,7 +963,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; @@ -1031,7 +1034,8 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index cfb249cc0a5..1e589b91605 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1989,6 +1989,10 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) unsigned cindex = iter->index; struct tnode *p; + /* A single entry routing table */ + if (!tn) + return NULL; + pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); rescan: @@ -2037,11 +2041,18 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, if(!iter) return NULL; - if (n && IS_TNODE(n)) { - iter->tnode = (struct tnode *) n; - iter->trie = t; - iter->index = 0; - iter->depth = 1; + if (n) { + if (IS_TNODE(n)) { + iter->tnode = (struct tnode *) n; + iter->trie = t; + iter->index = 0; + iter->depth = 1; + } else { + iter->tnode = NULL; + iter->trie = t; + iter->index = 0; + iter->depth = 0; + } return n; } return NULL; @@ -2279,16 +2290,17 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; + if (!NODE_PARENT(n)) { + if (iter->trie == trie_local) + seq_puts(seq, "<local>:\n"); + else + seq_puts(seq, "<main>:\n"); + } + if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); - if (!NODE_PARENT(n)) { - if (iter->trie == trie_local) - seq_puts(seq, "<local>:\n"); - else - seq_puts(seq, "<main>:\n"); - } seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 0017ccb01d6..024ae56cab2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -455,6 +455,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } + if (!skb) + return NULL; psrc = (__be32 *)skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 77761ac4f7b..8aa7d51e688 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -153,7 +153,7 @@ static int inet_csk_diag_fill(struct sock *sk, rtattr_failure: nlmsg_failure: skb_trim(skb, b - skb->data); - return -1; + return -EMSGSIZE; } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, @@ -209,7 +209,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, return skb->len; nlmsg_failure: skb_trim(skb, previous_tail - skb->data); - return -1; + return -EMSGSIZE; } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, @@ -274,11 +274,14 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, if (!rep) goto out; - if (sk_diag_fill(sk, rep, req->idiag_ext, - NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, nlh) <= 0) - BUG(); - + err = sk_diag_fill(sk, rep, req->idiag_ext, + NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, 0, nlh); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) @@ -775,7 +778,7 @@ next_normal: struct inet_timewait_sock *tw; inet_twsk_for_each(tw, node, - &hashinfo->ehash[i + hashinfo->ehash_size].chain) { + &head->twchain) { if (num < s_num) goto next_dying; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8c79c8a4ea5..150ace18dc7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -212,7 +212,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 9f414e35c48..a73cf93cee3 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -78,8 +78,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_del_node_init(sk)) sock_prot_dec_use(sk->sk_prot); - /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain); + /* Step 3: Hash TW into TIMEWAIT chain. */ + inet_twsk_add_node(tw, &ehead->twchain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 74046efdf87..8ce00d3703d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -565,7 +565,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } else { struct sk_buff *free_it = next; - /* Old fragmnet is completely overridden with + /* Old fragment is completely overridden with * new one drop it. */ next = next->next; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 476cb6084c7..51c83500790 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1008,7 +1008,8 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; dev = t->dev; } - err = unregister_netdevice(dev); + unregister_netdevice(dev); + err = 0; break; default: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a35209d517a..a0f2008584b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -164,7 +164,6 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static inline int ip_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct hh_cache *hh = dst->hh; struct net_device *dev = dst->dev; int hh_len = LL_RESERVED_SPACE(dev); @@ -183,16 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - if (hh) { - int hh_alen; - - read_lock_bh(&hh->hh_lock); - hh_alen = HH_DATA_ALIGN(hh->hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); - read_unlock_bh(&hh->hh_lock); - skb_push(skb, hh->hh_len); - return hh->hh_output(skb); - } else if (dst->neighbour) + if (dst->hh) + return neigh_hh_output(dst->hh, skb); + else if (dst->neighbour) return dst->neighbour->output(skb); if (net_ratelimit()) @@ -289,8 +281,9 @@ int ip_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_queue_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok) +int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = inet->opt; struct rtable *rt; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9d719d664e5..da8bbd20c7e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -754,7 +754,8 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; dev = t->dev; } - err = unregister_netdevice(dev); + unregister_netdevice(dev); + err = 0; break; default: diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 91a075edd68..7ea2d981a93 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -657,7 +657,7 @@ static void sync_master_loop(void) if (stop_master_sync) break; - ssleep(1); + msleep_interruptible(1000); } /* clean up the sync_buff queue */ @@ -714,7 +714,7 @@ static void sync_backup_loop(void) if (stop_backup_sync) break; - ssleep(1); + msleep_interruptible(1000); } /* release the sending multicast socket */ @@ -826,7 +826,7 @@ static int fork_sync_thread(void *startup) if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { IP_VS_ERR("could not create sync_thread due to %d... " "retrying.\n", pid); - ssleep(1); + msleep_interruptible(1000); goto repeat; } @@ -849,10 +849,12 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) ip_vs_sync_state |= state; if (state == IP_VS_STATE_MASTER) { - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, sizeof(ip_vs_master_mcast_ifn)); + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, + sizeof(ip_vs_master_mcast_ifn)); ip_vs_master_syncid = syncid; } else { - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, sizeof(ip_vs_backup_mcast_ifn)); + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, + sizeof(ip_vs_backup_mcast_ifn)); ip_vs_backup_syncid = syncid; } @@ -860,7 +862,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { IP_VS_ERR("could not create fork_sync_thread due to %d... " "retrying.\n", pid); - ssleep(1); + msleep_interruptible(1000); goto repeat; } @@ -880,7 +882,8 @@ int stop_sync_thread(int state) IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_INFO("stopping sync thread %d ...\n", - (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); + (state == IP_VS_STATE_MASTER) ? + sync_master_pid : sync_backup_pid); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&stop_sync_wait, &wait); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index a68966059b5..c47ce7076bd 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -15,16 +15,19 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) struct flowi fl = {}; struct dst_entry *odst; unsigned int hh_len; + unsigned int type; + type = inet_addr_type(iph->saddr); if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(iph->saddr); + addr_type = type; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { fl.nl_u.ip4_u.daddr = iph->daddr; - fl.nl_u.ip4_u.saddr = iph->saddr; + if (type == RTN_LOCAL) + fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; fl.mark = (*pskb)->mark; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 363df9976c9..9b08e7ad71b 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -6,8 +6,8 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER config NF_CONNTRACK_IPV4 - tristate "IPv4 connection tracking support (required for NAT) (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "IPv4 connection tracking support (required for NAT)" + depends on NF_CONNTRACK ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -361,32 +361,6 @@ config IP_NF_TARGET_ULOG To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_TCPMSS - tristate "TCPMSS target support" - depends on IP_NF_IPTABLES - ---help--- - This option adds a `TCPMSS' target, which allows you to alter the - MSS value of TCP SYN packets, to control the maximum size for that - connection (usually limiting it to your outgoing interface's MTU - minus 40). - - This is used to overcome criminally braindead ISPs or servers which - block ICMP Fragmentation Needed packets. The symptoms of this - problem are that everything works fine from your Linux - firewall/router, but machines behind it can never exchange large - packets: - 1) Web browsers connect, then hang with no data received. - 2) Small mail works fine, but large emails hang. - 3) ssh works fine, but scp hangs after initial handshaking. - - Workaround: activate this option and add a rule to your firewall - configuration like: - - iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \ - -j TCPMSS --clamp-mss-to-pmtu - - To compile it as a module, choose M here. If unsure, say N. - # NAT + specific targets: ip_conntrack config IP_NF_NAT tristate "Full NAT" @@ -401,7 +375,7 @@ config IP_NF_NAT # NAT + specific targets: nf_conntrack config NF_NAT tristate "Full NAT" - depends on IP_NF_IPTABLES && NF_CONNTRACK + depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 help The Full NAT option allows masquerading, port forwarding and other forms of full Network Address Port Translation. It is controlled by diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 15e741aeb29..6625ec68180 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,6 +4,14 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o +# objects for l3 independent conntrack +nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) +ifeq ($(CONFIG_PROC_FS),y) +nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o +endif +endif + ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o ifneq ($(CONFIG_NF_NAT),) @@ -20,6 +28,8 @@ ip_nat_h323-objs := ip_nat_helper_h323.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o +obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o + obj-$(CONFIG_IP_NF_NAT) += ip_nat.o obj-$(CONFIG_NF_NAT) += nf_nat.o @@ -93,7 +103,6 @@ obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o -obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o @@ -106,13 +115,3 @@ obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o -# objects for l3 independent conntrack -nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o -ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) -ifeq ($(CONFIG_PROC_FS),y) -nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o -endif -endif - -# l3 independent conntrack -obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 71b76ade00e..9aa22398b3d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -358,6 +358,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct arpt_standard_target *t = (void *)arpt_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { printk("arptables: loop hook %u pos %u %08X.\n", @@ -368,11 +369,11 @@ static int mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct arpt_entry) + if ((e->target_offset == sizeof(struct arpt_entry) && (strcmp(t->target.u.user.name, ARPT_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->arp)) { + && unconditional(&e->arp)) || visited) { unsigned int oldpos, size; if (t->verdict < -NF_MAX_VERDICT - 1) { diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 5fcf91d617c..7f70b0886b8 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -374,9 +374,11 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, && ctnetlink_dump_helpinfo(skb, ct) < 0) goto nfattr_failure; +#ifdef CONFIG_IP_NF_CONNTRACK_MARK if ((events & IPCT_MARK || ct->mark) && ctnetlink_dump_mark(skb, ct) < 0) goto nfattr_failure; +#endif if (events & IPCT_COUNTER_FILLING && (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || @@ -959,7 +961,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (cda[CTA_PROTOINFO-1]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) - return err; + goto err; } #if defined(CONFIG_IP_NF_CONNTRACK_MARK) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 06e4e8a6dd9..c34f48fe547 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -50,12 +50,9 @@ static DEFINE_RWLOCK(tcp_lock); If it's non-zero, we mark only out of window RST segments as INVALID. */ int ip_ct_tcp_be_liberal __read_mostly = 0; -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established +/* If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose __read_mostly = 3; +int ip_ct_tcp_loose __read_mostly = 1; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer @@ -694,11 +691,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + if (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ @@ -743,15 +739,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, state->retrans = 0; } } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - res = 1; } else { - if (LOG_INVALID(IPPROTO_TCP)) + res = 0; + if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || + ip_ct_tcp_be_liberal) + res = 1; + if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -762,8 +756,6 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - - res = ip_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -1105,8 +1097,6 @@ static int tcp_new(struct ip_conntrack *conntrack, tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; } else if (ip_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1127,11 +1117,11 @@ static int tcp_new(struct ip_conntrack *conntrack, conntrack->proto.tcp.seen[0].td_maxwin; conntrack->proto.tcp.seen[0].td_scale = 0; - /* We assume SACK. Should we assume window scaling too? */ + /* We assume SACK and liberal window checking to handle + * window scaling */ conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose; + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; } conntrack->proto.tcp.seen[1].td_end = 0; diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index 3a26d63eed8..11c588a10e6 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -283,10 +283,16 @@ static int skp_epaddr_len(const char *dptr, const char *limit, int *shift) { int s = *shift; - for (; dptr <= limit && *dptr != '@'; dptr++) + /* Search for @, but stop at the end of the line. + * We are inside a sip: URI, so we don't need to worry about + * continuation lines. */ + while (dptr <= limit && + *dptr != '@' && *dptr != '\r' && *dptr != '\n') { (*shift)++; + dptr++; + } - if (*dptr == '@') { + if (dptr <= limit && *dptr == '@') { dptr++; (*shift)++; } else diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 9d1a5175dcd..5e08c2bf887 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -246,8 +246,9 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { DEBUGP("get_unique_tuple: Found current src map\n"); - if (!ip_nat_used_tuple(tuple, conntrack)) - return; + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) + if (!ip_nat_used_tuple(tuple, conntrack)) + return; } } @@ -261,6 +262,13 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Change protocol info to have some randomization */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { + proto->unique_tuple(tuple, range, maniptype, conntrack); + ip_nat_proto_put(proto); + return; + } + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index ee80feb4b2a..2e5c4bc52a6 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -183,7 +183,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, + tcph->check = tcp_v4_check(datalen, iph->saddr, iph->daddr, csum_partial((char *)tcph, datalen, 0)); diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index b586d18b3fb..14ff24f53a7 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/netfilter.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -75,6 +76,10 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.tcp.port) - min + 1; } + /* Start from random port to avoid prediction */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!ip_nat_used_tuple(tuple, conntrack)) { diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 5ced0877b32..dfd52167289 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/netfilter.h> #include <linux/ip.h> #include <linux/udp.h> @@ -74,6 +75,10 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.udp.port) - min + 1; } + /* Start from random port to avoid prediction */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!ip_nat_used_tuple(tuple, conntrack)) diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index a176aa3031e..e1c8a05f3dc 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -86,7 +86,7 @@ static struct } }; -static struct ipt_table nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -99,7 +99,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, - const struct ipt_target *target, + const struct xt_target *target, const void *targinfo) { struct ip_conntrack *ct; @@ -141,7 +141,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, - const struct ipt_target *target, + const struct xt_target *target, const void *targinfo) { struct ip_conntrack *ct; @@ -166,7 +166,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, static int ipt_snat_checkentry(const char *tablename, const void *entry, - const struct ipt_target *target, + const struct xt_target *target, void *targinfo, unsigned int hook_mask) { @@ -182,7 +182,7 @@ static int ipt_snat_checkentry(const char *tablename, static int ipt_dnat_checkentry(const char *tablename, const void *entry, - const struct ipt_target *target, + const struct xt_target *target, void *targinfo, unsigned int hook_mask) { @@ -193,6 +193,10 @@ static int ipt_dnat_checkentry(const char *tablename, printk("DNAT: multiple ranges no longer supported\n"); return 0; } + if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) { + printk("DNAT: port randomization not supported\n"); + return 0; + } return 1; } @@ -257,8 +261,9 @@ int ip_nat_rule_find(struct sk_buff **pskb, return ret; } -static struct ipt_target ipt_snat_reg = { +static struct xt_target ipt_snat_reg = { .name = "SNAT", + .family = AF_INET, .target = ipt_snat_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -266,8 +271,9 @@ static struct ipt_target ipt_snat_reg = { .checkentry = ipt_snat_checkentry, }; -static struct ipt_target ipt_dnat_reg = { +static struct xt_target ipt_dnat_reg = { .name = "DNAT", + .family = AF_INET, .target = ipt_dnat_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -282,27 +288,27 @@ int __init ip_nat_rule_init(void) ret = ipt_register_table(&nat_table, &nat_initial_table.repl); if (ret != 0) return ret; - ret = ipt_register_target(&ipt_snat_reg); + ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; - ret = ipt_register_target(&ipt_dnat_reg); + ret = xt_register_target(&ipt_dnat_reg); if (ret != 0) goto unregister_snat; return ret; unregister_snat: - ipt_unregister_target(&ipt_snat_reg); + xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(&nat_table); + xt_unregister_table(&nat_table); return ret; } void ip_nat_rule_cleanup(void) { - ipt_unregister_target(&ipt_dnat_reg); - ipt_unregister_target(&ipt_snat_reg); + xt_unregister_target(&ipt_dnat_reg); + xt_unregister_target(&ipt_snat_reg); ipt_unregister_table(&nat_table); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0ff2956d35e..5a7b3a34138 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -216,7 +216,7 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -384,6 +384,7 @@ mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct ipt_standard_target *t = (void *)ipt_get_target(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { printk("iptables: loop hook %u pos %u %08X.\n", @@ -394,11 +395,11 @@ mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ipt_entry) + if ((e->target_offset == sizeof(struct ipt_entry) && (strcmp(t->target.u.user.name, IPT_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->ip)) { + && unconditional(&e->ip)) || visited) { unsigned int oldpos, size; if (t->verdict < -NF_MAX_VERDICT - 1) { @@ -484,39 +485,68 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) } static inline int -check_match(struct ipt_entry_match *m, +check_entry(struct ipt_entry *e, const char *name) +{ + struct ipt_entry_target *t; + + if (!ip_checkentry(&e->ip)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) + return -EINVAL; + + t = ipt_get_target(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static inline int check_match(struct ipt_entry_match *m, const char *name, + const struct ipt_ip *ip, unsigned int hookmask) +{ + struct xt_match *match; + int ret; + + match = m->u.kernel.match; + ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), + name, hookmask, ip->proto, + ip->invflags & IPT_INV_PROTO); + if (!ret && m->u.kernel.match->checkentry + && !m->u.kernel.match->checkentry(name, ip, match, m->data, + hookmask)) { + duprintf("ip_tables: check failed for `%s'.\n", + m->u.kernel.match->name); + ret = -EINVAL; + } + return ret; +} + +static inline int +find_check_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, unsigned int *i) { - struct ipt_match *match; + struct xt_match *match; int ret; match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, m->u.user.revision), "ipt_%s", m->u.user.name); if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); + duprintf("find_check_match: `%s' not found\n", m->u.user.name); return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); + ret = check_match(m, name, ip, hookmask); if (ret) goto err; - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -524,60 +554,62 @@ err: return ret; } -static struct ipt_target ipt_standard_target; +static inline int check_target(struct ipt_entry *e, const char *name) +{ + struct ipt_entry_target *t; + struct xt_target *target; + int ret; + + t = ipt_get_target(e); + target = t->u.kernel.target; + ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), + name, e->comefrom, e->ip.proto, + e->ip.invflags & IPT_INV_PROTO); + if (!ret && t->u.kernel.target->checkentry + && !t->u.kernel.target->checkentry(name, e, target, + t->data, e->comefrom)) { + duprintf("ip_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + ret = -EINVAL; + } + return ret; +} static inline int -check_entry(struct ipt_entry *e, const char *name, unsigned int size, +find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, unsigned int *i) { struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_target *target; int ret; unsigned int j; - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct ipt_entry_target) > e->next_offset) - return -EINVAL; + ret = check_entry(e, name); + if (ret) + return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); + ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, + e->comefrom, &j); if (ret != 0) goto cleanup_matches; t = ipt_get_target(e); - ret = -EINVAL; - if (e->target_offset + t->u.target_size > e->next_offset) - goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET, t->u.user.name, t->u.user.revision), "ipt_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); + ret = check_target(e, name); if (ret) goto err; - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -712,7 +744,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + find_check_entry, name, size, &i); if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, @@ -786,7 +818,7 @@ get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters * alloc_counters(struct ipt_table *table) +static inline struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; @@ -811,7 +843,7 @@ static inline struct xt_counters * alloc_counters(struct ipt_table *table) static int copy_entries_to_user(unsigned int total_size, - struct ipt_table *table, + struct xt_table *table, void __user *userptr) { unsigned int off, num; @@ -887,13 +919,13 @@ copy_entries_to_user(unsigned int total_size, #ifdef CONFIG_COMPAT struct compat_delta { struct compat_delta *next; - u_int16_t offset; + unsigned int offset; short delta; }; static struct compat_delta *compat_offsets = NULL; -static int compat_add_offset(u_int16_t offset, short delta) +static int compat_add_offset(unsigned int offset, short delta) { struct compat_delta *tmp; @@ -925,7 +957,7 @@ static void compat_flush_offsets(void) } } -static short compat_calc_jump(u_int16_t offset) +static short compat_calc_jump(unsigned int offset) { struct compat_delta *tmp; short delta; @@ -965,7 +997,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, void *base, struct xt_table_info *newinfo) { struct ipt_entry_target *t; - u_int16_t entry_offset; + unsigned int entry_offset; int off, i, ret; off = 0; @@ -1014,7 +1046,7 @@ static int compat_table_info(struct xt_table_info *info, static int get_info(void __user *user, int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; - struct ipt_table *t; + struct xt_table *t; int ret; if (*len != sizeof(struct ipt_getinfo)) { @@ -1075,7 +1107,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) { int ret; struct ipt_get_entries get; - struct ipt_table *t; + struct xt_table *t; if (*len < sizeof(get)) { duprintf("get_entries: %u < %d\n", *len, @@ -1119,7 +1151,7 @@ __do_replace(const char *name, unsigned int valid_hooks, void __user *counters_ptr) { int ret; - struct ipt_table *t; + struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; void *loc_cpu_old_entry; @@ -1270,7 +1302,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) char *name; int size; void *ptmp; - struct ipt_table *t; + struct xt_table *t; struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; @@ -1405,7 +1437,7 @@ compat_check_calc_match(struct ipt_entry_match *m, unsigned int hookmask, int *size, int *i) { - struct ipt_match *match; + struct xt_match *match; match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, m->u.user.revision), @@ -1434,8 +1466,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, const char *name) { struct ipt_entry_target *t; - struct ipt_target *target; - u_int16_t entry_offset; + struct xt_target *target; + unsigned int entry_offset; int ret, off, h, j; duprintf("check_compat_entry_size_and_hooks %p\n", e); @@ -1452,14 +1484,9 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } - - if (e->target_offset + sizeof(struct compat_xt_entry_target) > - e->next_offset) - return -EINVAL; + ret = check_entry(e, name); + if (ret) + return ret; off = 0; entry_offset = (void *)e - (void *)base; @@ -1470,15 +1497,13 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, goto cleanup_matches; t = ipt_get_target(e); - ret = -EINVAL; - if (e->target_offset + t->u.target_size > e->next_offset) - goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET, t->u.user.name, t->u.user.revision), "ipt_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } @@ -1525,7 +1550,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct xt_table_info *newinfo, unsigned char *base) { struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_target *target; struct ipt_entry *de; unsigned int origsize; int ret, h; @@ -1555,57 +1580,15 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, return ret; } -static inline int compat_check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) -{ - struct ipt_match *match; - int ret; - - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { - duprintf("ip_tables: compat: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - } - return ret; -} - -static inline int compat_check_target(struct ipt_entry *e, const char *name) -{ - struct ipt_entry_target *t; - struct ipt_target *target; - int ret; - - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, - t->data, e->comefrom)) { - duprintf("ip_tables: compat: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - } - return ret; -} - static inline int compat_check_entry(struct ipt_entry *e, const char *name) { int ret; - ret = IPT_MATCH_ITERATE(e, compat_check_match, name, &e->ip, - e->comefrom); + ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom); if (ret) return ret; - return compat_check_target(e, name); + return check_target(e, name); } static int @@ -1812,7 +1795,7 @@ struct compat_ipt_get_entries }; static int compat_copy_entries_to_user(unsigned int total_size, - struct ipt_table *table, void __user *userptr) + struct xt_table *table, void __user *userptr) { unsigned int off, num; struct compat_ipt_entry e; @@ -1886,7 +1869,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) { int ret; struct compat_ipt_get_entries get; - struct ipt_table *t; + struct xt_table *t; if (*len < sizeof(get)) { @@ -2069,7 +2052,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) return 0; } -void ipt_unregister_table(struct ipt_table *table) +void ipt_unregister_table(struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -2141,7 +2124,7 @@ icmp_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct ipt_target ipt_standard_target = { +static struct xt_target ipt_standard_target = { .name = IPT_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET, @@ -2152,7 +2135,7 @@ static struct ipt_target ipt_standard_target = { #endif }; -static struct ipt_target ipt_error_target = { +static struct xt_target ipt_error_target = { .name = IPT_ERROR_TARGET, .target = ipt_error, .targetsize = IPT_FUNCTION_MAXNAMELEN, @@ -2175,7 +2158,7 @@ static struct nf_sockopt_ops ipt_sockopts = { #endif }; -static struct ipt_match icmp_matchstruct = { +static struct xt_match icmp_matchstruct = { .name = "icmp", .match = icmp_match, .matchsize = sizeof(struct ipt_icmp), diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 09836506223..343c2abdc1a 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -26,6 +26,7 @@ #include <linux/netfilter_arp.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> #include <net/netfilter/nf_conntrack_compat.h> @@ -247,6 +248,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) switch (iph->protocol) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: case IPPROTO_DCCP: case IPPROTO_ICMP: @@ -329,7 +331,7 @@ target(struct sk_buff **pskb, if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) - return IPT_CONTINUE; + return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here @@ -367,7 +369,7 @@ target(struct sk_buff **pskb, * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ (*pskb)->pkt_type = PACKET_HOST; - return IPT_CONTINUE; + return XT_CONTINUE; } static int @@ -447,6 +449,12 @@ checkentry(const char *tablename, cipinfo->config = config; } + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } + return 1; } @@ -460,10 +468,13 @@ static void destroy(const struct xt_target *target, void *targinfo) clusterip_config_entry_put(cipinfo->config); clusterip_config_put(cipinfo->config); + + nf_ct_l3proto_module_put(target->family); } -static struct ipt_target clusterip_tgt = { +static struct xt_target clusterip_tgt = { .name = "CLUSTERIP", + .family = AF_INET, .target = target, .targetsize = sizeof(struct ipt_clusterip_tgt_info), .checkentry = checkentry, @@ -683,7 +694,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, { #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; - struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); + struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct clusterip_config *c = pde->data; unsigned long nodenum; @@ -719,7 +730,7 @@ static int __init ipt_clusterip_init(void) { int ret; - ret = ipt_register_target(&clusterip_tgt); + ret = xt_register_target(&clusterip_tgt); if (ret < 0) return ret; @@ -745,7 +756,7 @@ cleanup_hook: nf_unregister_hook(&cip_arp_ops); #endif /* CONFIG_PROC_FS */ cleanup_target: - ipt_unregister_target(&clusterip_tgt); + xt_unregister_target(&clusterip_tgt); return ret; } @@ -757,7 +768,7 @@ static void __exit ipt_clusterip_fini(void) remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); #endif nf_unregister_hook(&cip_arp_ops); - ipt_unregister_target(&clusterip_tgt); + xt_unregister_target(&clusterip_tgt); } module_init(ipt_clusterip_init); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index b55d670a24d..b5ca5938d1f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -9,12 +9,14 @@ * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp */ +#include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/tcp.h> #include <net/checksum.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_ECN.h> @@ -95,7 +97,7 @@ target(struct sk_buff **pskb, if (!set_ect_tcp(pskb, einfo)) return NF_DROP; - return IPT_CONTINUE; + return XT_CONTINUE; } static int @@ -119,7 +121,7 @@ checkentry(const char *tablename, return 0; } if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) - && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) { + && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { printk(KERN_WARNING "ECN: cannot use TCP operations on a " "non-tcp rule\n"); return 0; @@ -127,8 +129,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_ecn_reg = { +static struct xt_target ipt_ecn_reg = { .name = "ECN", + .family = AF_INET, .target = target, .targetsize = sizeof(struct ipt_ECN_info), .table = "mangle", @@ -138,12 +141,12 @@ static struct ipt_target ipt_ecn_reg = { static int __init ipt_ecn_init(void) { - return ipt_register_target(&ipt_ecn_reg); + return xt_register_target(&ipt_ecn_reg); } static void __exit ipt_ecn_fini(void) { - ipt_unregister_target(&ipt_ecn_reg); + xt_unregister_target(&ipt_ecn_reg); } module_init(ipt_ecn_init); diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index c96de16fefa..f68370ffb43 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -20,7 +20,7 @@ #include <net/route.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_LOG.h> MODULE_LICENSE("GPL"); @@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb, ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); - return IPT_CONTINUE; + return XT_CONTINUE; } static int ipt_log_checkentry(const char *tablename, @@ -455,8 +455,9 @@ static int ipt_log_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_log_reg = { +static struct xt_target ipt_log_reg = { .name = "LOG", + .family = AF_INET, .target = ipt_log_target, .targetsize = sizeof(struct ipt_log_info), .checkentry = ipt_log_checkentry, @@ -471,8 +472,11 @@ static struct nf_logger ipt_log_logger ={ static int __init ipt_log_init(void) { - if (ipt_register_target(&ipt_log_reg)) - return -EINVAL; + int ret; + + ret = xt_register_target(&ipt_log_reg); + if (ret < 0) + return ret; if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { printk(KERN_WARNING "ipt_LOG: not logging via system console " "since somebody else already registered for PF_INET\n"); @@ -486,7 +490,7 @@ static int __init ipt_log_init(void) static void __exit ipt_log_fini(void) { nf_log_unregister_logger(&ipt_log_logger); - ipt_unregister_target(&ipt_log_reg); + xt_unregister_target(&ipt_log_reg); } module_init(ipt_log_init); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 28b9233956b..91c42efcd53 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -25,7 +25,7 @@ #else #include <linux/netfilter_ipv4/ip_nat_rule.h> #endif -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -127,10 +127,13 @@ masquerade_target(struct sk_buff **pskb, static inline int device_cmp(struct ip_conntrack *i, void *ifindex) { + int ret; #ifdef CONFIG_NF_NAT_NEEDED struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; #endif - int ret; read_lock_bh(&masq_lock); #ifdef CONFIG_NF_NAT_NEEDED @@ -187,8 +190,9 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static struct ipt_target masquerade = { +static struct xt_target masquerade = { .name = "MASQUERADE", + .family = AF_INET, .target = masquerade_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -201,7 +205,7 @@ static int __init ipt_masquerade_init(void) { int ret; - ret = ipt_register_target(&masquerade); + ret = xt_register_target(&masquerade); if (ret == 0) { /* Register for device down reports */ @@ -215,7 +219,7 @@ static int __init ipt_masquerade_init(void) static void __exit ipt_masquerade_fini(void) { - ipt_unregister_target(&masquerade); + xt_unregister_target(&masquerade); unregister_netdevice_notifier(&masq_dev_notifier); unregister_inetaddr_notifier(&masq_inet_notifier); } diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 9390e90f2b2..b4acc241d89 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -15,6 +15,7 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_rule.h> #else @@ -88,8 +89,9 @@ target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target target_module = { +static struct xt_target target_module = { .name = MODULENAME, + .family = AF_INET, .target = target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -101,12 +103,12 @@ static struct ipt_target target_module = { static int __init ipt_netmap_init(void) { - return ipt_register_target(&target_module); + return xt_register_target(&target_module); } static void __exit ipt_netmap_fini(void) { - ipt_unregister_target(&target_module); + xt_unregister_target(&target_module); } module_init(ipt_netmap_init); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 462eceb3a1b..54cd021aa5a 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -18,6 +18,7 @@ #include <net/protocol.h> #include <net/checksum.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_rule.h> #else @@ -104,8 +105,9 @@ redirect_target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target redirect_reg = { +static struct xt_target redirect_reg = { .name = "REDIRECT", + .family = AF_INET, .target = redirect_target, .targetsize = sizeof(struct ip_nat_multi_range_compat), .table = "nat", @@ -116,12 +118,12 @@ static struct ipt_target redirect_reg = { static int __init ipt_redirect_init(void) { - return ipt_register_target(&redirect_reg); + return xt_register_target(&redirect_reg); } static void __exit ipt_redirect_fini(void) { - ipt_unregister_target(&redirect_reg); + xt_unregister_target(&redirect_reg); } module_init(ipt_redirect_init); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f0319e5ee43..e4a1ddb386a 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -22,6 +22,7 @@ #include <net/tcp.h> #include <net/route.h> #include <net/dst.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> #ifdef CONFIG_BRIDGE_NETFILTER @@ -116,7 +117,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* Adjust TCP checksum */ tcph->check = 0; - tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), + tcph->check = tcp_v4_check(sizeof(struct tcphdr), nskb->nh.iph->saddr, nskb->nh.iph->daddr, csum_partial((char *)tcph, @@ -230,7 +231,7 @@ static int check(const char *tablename, } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP - || (e->ip.invflags & IPT_INV_PROTO)) { + || (e->ip.invflags & XT_INV_PROTO)) { DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n"); return 0; } @@ -238,8 +239,9 @@ static int check(const char *tablename, return 1; } -static struct ipt_target ipt_reject_reg = { +static struct xt_target ipt_reject_reg = { .name = "REJECT", + .family = AF_INET, .target = reject, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", @@ -251,12 +253,12 @@ static struct ipt_target ipt_reject_reg = { static int __init ipt_reject_init(void) { - return ipt_register_target(&ipt_reject_reg); + return xt_register_target(&ipt_reject_reg); } static void __exit ipt_reject_fini(void) { - ipt_unregister_target(&ipt_reject_reg); + xt_unregister_target(&ipt_reject_reg); } module_init(ipt_reject_init); diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 3dcf2941133..a1cdd1262de 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -34,6 +34,7 @@ #include <net/protocol.h> #include <net/checksum.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter/x_tables.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_rule.h> #else @@ -186,8 +187,9 @@ same_target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &newrange, hooknum); } -static struct ipt_target same_reg = { +static struct xt_target same_reg = { .name = "SAME", + .family = AF_INET, .target = same_target, .targetsize = sizeof(struct ipt_same_info), .table = "nat", @@ -199,12 +201,12 @@ static struct ipt_target same_reg = { static int __init ipt_same_init(void) { - return ipt_register_target(&same_reg); + return xt_register_target(&same_reg); } static void __exit ipt_same_fini(void) { - ipt_unregister_target(&same_reg); + xt_unregister_target(&same_reg); } module_init(ipt_same_init); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c deleted file mode 100644 index 93eb5c3c188..00000000000 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * This is a module which is used for setting the MSS option in TCP packets. - * - * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/ip.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_TCPMSS.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("iptables TCP MSS modification module"); - -static inline unsigned int -optlen(const u_int8_t *opt, unsigned int offset) -{ - /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) - return 1; - else - return opt[offset+1]; -} - -static unsigned int -ipt_tcpmss_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) -{ - const struct ipt_tcpmss_info *tcpmssinfo = targinfo; - struct tcphdr *tcph; - struct iphdr *iph; - u_int16_t tcplen, newmss; - __be16 newtotlen, oldval; - unsigned int i; - u_int8_t *opt; - - if (!skb_make_writable(pskb, (*pskb)->len)) - return NF_DROP; - - iph = (*pskb)->nh.iph; - tcplen = (*pskb)->len - iph->ihl*4; - tcph = (void *)iph + iph->ihl*4; - - /* Since it passed flags test in tcp match, we know it is is - not a fragment, and has data >= tcp header length. SYN - packets should not contain data: if they did, then we risk - running over MTU, sending Frag Needed and breaking things - badly. --RR */ - if (tcplen != tcph->doff*4) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: bad length (%d bytes)\n", - (*pskb)->len); - return NF_DROP; - } - - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + - sizeof(struct tcphdr)) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target: " - "unknown or invalid path-MTU (%d)\n", - dst_mtu((*pskb)->dst)); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - - sizeof(struct tcphdr); - } else - newmss = tcpmssinfo->mss; - - opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { - if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && - opt[i+1] == TCPOLEN_MSS) { - u_int16_t oldmss; - - oldmss = (opt[i+2] << 8) | opt[i+3]; - - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && - oldmss <= newmss) - return IPT_CONTINUE; - - opt[i+2] = (newmss & 0xff00) >> 8; - opt[i+3] = (newmss & 0x00ff); - - nf_proto_csum_replace2(&tcph->check, *pskb, - htons(oldmss), htons(newmss), 0); - return IPT_CONTINUE; - } - } - - /* - * MSS Option not found ?! add it.. - */ - if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), - TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) - return NF_DROP; - kfree_skb(*pskb); - *pskb = newskb; - iph = (*pskb)->nh.iph; - tcph = (void *)iph + iph->ihl*4; - } - - skb_put((*pskb), TCPOLEN_MSS); - - opt = (u_int8_t *)tcph + sizeof(struct tcphdr); - memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - - nf_proto_csum_replace2(&tcph->check, *pskb, - htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); - opt[0] = TCPOPT_MSS; - opt[1] = TCPOLEN_MSS; - opt[2] = (newmss & 0xff00) >> 8; - opt[3] = (newmss & 0x00ff); - - nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); - - oldval = ((__be16 *)tcph)[6]; - tcph->doff += TCPOLEN_MSS/4; - nf_proto_csum_replace2(&tcph->check, *pskb, - oldval, ((__be16 *)tcph)[6], 0); - - newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - nf_csum_replace2(&iph->check, iph->tot_len, newtotlen); - iph->tot_len = newtotlen; - return IPT_CONTINUE; -} - -#define TH_SYN 0x02 - -static inline int find_syn_match(const struct ipt_entry_match *m) -{ - const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - - if (strcmp(m->u.kernel.match->name, "tcp") == 0 && - tcpinfo->flg_cmp & TH_SYN && - !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) - return 1; - - return 0; -} - -/* Must specify -p tcp --syn/--tcp-flags SYN */ -static int -ipt_tcpmss_checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) -{ - const struct ipt_tcpmss_info *tcpmssinfo = targinfo; - const struct ipt_entry *e = e_void; - - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_IP_FORWARD) | - (1 << NF_IP_LOCAL_OUT) | - (1 << NF_IP_POST_ROUTING))) != 0) { - printk("TCPMSS: path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; - } - - if (IPT_MATCH_ITERATE(e, find_syn_match)) - return 1; - printk("TCPMSS: Only works on TCP SYN packets\n"); - return 0; -} - -static struct ipt_target ipt_tcpmss_reg = { - .name = "TCPMSS", - .target = ipt_tcpmss_target, - .targetsize = sizeof(struct ipt_tcpmss_info), - .proto = IPPROTO_TCP, - .checkentry = ipt_tcpmss_checkentry, - .me = THIS_MODULE, -}; - -static int __init ipt_tcpmss_init(void) -{ - return ipt_register_target(&ipt_tcpmss_reg); -} - -static void __exit ipt_tcpmss_fini(void) -{ - ipt_unregister_target(&ipt_tcpmss_reg); -} - -module_init(ipt_tcpmss_init); -module_exit(ipt_tcpmss_fini); diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 18e74ac4d42..29b05a6bd10 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -13,7 +13,7 @@ #include <linux/ip.h> #include <net/checksum.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_TOS.h> MODULE_LICENSE("GPL"); @@ -40,7 +40,7 @@ target(struct sk_buff **pskb, iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); } - return IPT_CONTINUE; + return XT_CONTINUE; } static int @@ -63,8 +63,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_tos_reg = { +static struct xt_target ipt_tos_reg = { .name = "TOS", + .family = AF_INET, .target = target, .targetsize = sizeof(struct ipt_tos_target_info), .table = "mangle", @@ -74,12 +75,12 @@ static struct ipt_target ipt_tos_reg = { static int __init ipt_tos_init(void) { - return ipt_register_target(&ipt_tos_reg); + return xt_register_target(&ipt_tos_reg); } static void __exit ipt_tos_fini(void) { - ipt_unregister_target(&ipt_tos_reg); + xt_unregister_target(&ipt_tos_reg); } module_init(ipt_tos_init); diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index fffe5ca82e9..d2b6fa3f9dc 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -12,7 +12,7 @@ #include <linux/ip.h> #include <net/checksum.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_TTL.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); @@ -59,7 +59,7 @@ ipt_ttl_target(struct sk_buff **pskb, iph->ttl = new_ttl; } - return IPT_CONTINUE; + return XT_CONTINUE; } static int ipt_ttl_checkentry(const char *tablename, @@ -80,8 +80,9 @@ static int ipt_ttl_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_TTL = { +static struct xt_target ipt_TTL = { .name = "TTL", + .family = AF_INET, .target = ipt_ttl_target, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", @@ -91,12 +92,12 @@ static struct ipt_target ipt_TTL = { static int __init ipt_ttl_init(void) { - return ipt_register_target(&ipt_TTL); + return xt_register_target(&ipt_TTL); } static void __exit ipt_ttl_fini(void) { - ipt_unregister_target(&ipt_TTL); + xt_unregister_target(&ipt_TTL); } module_init(ipt_ttl_init); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index dbd34783a64..7af57a3a1f3 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -57,7 +57,7 @@ #include <linux/mm.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_ULOG.h> #include <net/sock.h> #include <linux/bitops.h> @@ -132,7 +132,6 @@ static void ulog_send(unsigned int nlgroupnum) ub->qlen = 0; ub->skb = NULL; ub->lastnlh = NULL; - } @@ -314,7 +313,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); - return IPT_CONTINUE; + return XT_CONTINUE; } static void ipt_logfn(unsigned int pf, @@ -363,8 +362,9 @@ static int ipt_ulog_checkentry(const char *tablename, return 1; } -static struct ipt_target ipt_ulog_reg = { +static struct xt_target ipt_ulog_reg = { .name = "ULOG", + .family = AF_INET, .target = ipt_ulog_target, .targetsize = sizeof(struct ipt_ulog_info), .checkentry = ipt_ulog_checkentry, @@ -379,7 +379,7 @@ static struct nf_logger ipt_ulog_logger = { static int __init ipt_ulog_init(void) { - int i; + int ret, i; DEBUGP("ipt_ULOG: init module\n"); @@ -400,9 +400,10 @@ static int __init ipt_ulog_init(void) if (!nflognl) return -ENOMEM; - if (ipt_register_target(&ipt_ulog_reg) != 0) { + ret = xt_register_target(&ipt_ulog_reg); + if (ret < 0) { sock_release(nflognl->sk_socket); - return -EINVAL; + return ret; } if (nflog) nf_log_register(PF_INET, &ipt_ulog_logger); @@ -419,7 +420,7 @@ static void __exit ipt_ulog_fini(void) if (nflog) nf_log_unregister_logger(&ipt_ulog_logger); - ipt_unregister_target(&ipt_ulog_reg); + xt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); /* remove pending timers and free allocated skb's */ @@ -435,7 +436,6 @@ static void __exit ipt_ulog_fini(void) ub->skb = NULL; } } - } module_init(ipt_ulog_init); diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 7b60eb74788..648f555c4d1 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -16,7 +16,7 @@ #include <net/route.h> #include <linux/netfilter_ipv4/ipt_addrtype.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); @@ -44,8 +44,9 @@ static int match(const struct sk_buff *skb, return ret; } -static struct ipt_match addrtype_match = { +static struct xt_match addrtype_match = { .name = "addrtype", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_addrtype_info), .me = THIS_MODULE @@ -53,12 +54,12 @@ static struct ipt_match addrtype_match = { static int __init ipt_addrtype_init(void) { - return ipt_register_match(&addrtype_match); + return xt_register_match(&addrtype_match); } static void __exit ipt_addrtype_fini(void) { - ipt_unregister_match(&addrtype_match); + xt_unregister_match(&addrtype_match); } module_init(ipt_addrtype_init); diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 1798f86bc53..42f41224a43 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -6,12 +6,13 @@ * published by the Free Software Foundation. */ +#include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ipt_ah.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); @@ -86,8 +87,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match ah_match = { +static struct xt_match ah_match = { .name = "ah", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, @@ -97,12 +99,12 @@ static struct ipt_match ah_match = { static int __init ipt_ah_init(void) { - return ipt_register_match(&ah_match); + return xt_register_match(&ah_match); } static void __exit ipt_ah_fini(void) { - ipt_unregister_match(&ah_match); + xt_unregister_match(&ah_match); } module_init(ipt_ah_init); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index dafbdec0efc..37508b2cfea 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -9,10 +9,13 @@ * published by the Free Software Foundation. */ +#include <linux/in.h> +#include <linux/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/tcp.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_ecn.h> @@ -109,8 +112,9 @@ static int checkentry(const char *tablename, const void *ip_void, return 1; } -static struct ipt_match ecn_match = { +static struct xt_match ecn_match = { .name = "ecn", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_ecn_info), .checkentry = checkentry, @@ -119,12 +123,12 @@ static struct ipt_match ecn_match = { static int __init ipt_ecn_init(void) { - return ipt_register_match(&ecn_match); + return xt_register_match(&ecn_match); } static void __exit ipt_ecn_fini(void) { - ipt_unregister_match(&ecn_match); + xt_unregister_match(&ecn_match); } module_init(ipt_ecn_init); diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c index 5202edd8d33..05de593be94 100644 --- a/net/ipv4/netfilter/ipt_iprange.c +++ b/net/ipv4/netfilter/ipt_iprange.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_iprange.h> MODULE_LICENSE("GPL"); @@ -63,22 +63,22 @@ match(const struct sk_buff *skb, return 1; } -static struct ipt_match iprange_match = { +static struct xt_match iprange_match = { .name = "iprange", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_iprange_info), - .destroy = NULL, .me = THIS_MODULE }; static int __init ipt_iprange_init(void) { - return ipt_register_match(&iprange_match); + return xt_register_match(&iprange_match); } static void __exit ipt_iprange_fini(void) { - ipt_unregister_match(&iprange_match); + xt_unregister_match(&iprange_match); } module_init(ipt_iprange_init); diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 78c336f12a9..9f496ac834b 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -15,7 +15,7 @@ #include <net/sock.h> #include <linux/netfilter_ipv4/ipt_owner.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); @@ -68,8 +68,9 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match owner_match = { +static struct xt_match owner_match = { .name = "owner", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_owner_info), .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING), @@ -79,12 +80,12 @@ static struct ipt_match owner_match = { static int __init ipt_owner_init(void) { - return ipt_register_match(&owner_match); + return xt_register_match(&owner_match); } static void __exit ipt_owner_fini(void) { - ipt_unregister_match(&owner_match); + xt_unregister_match(&owner_match); } module_init(ipt_owner_init); diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 126db44e71a..6b97b679617 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -12,6 +12,7 @@ * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ #include <linux/init.h> +#include <linux/ip.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -24,7 +25,7 @@ #include <linux/skbuff.h> #include <linux/inet.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); @@ -401,7 +402,7 @@ static int recent_seq_open(struct inode *inode, struct file *file) static ssize_t recent_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { - struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); + struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; @@ -462,8 +463,9 @@ static struct file_operations recent_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct ipt_match recent_match = { +static struct xt_match recent_match = { .name = "recent", + .family = AF_INET, .match = ipt_recent_match, .matchsize = sizeof(struct ipt_recent_info), .checkentry = ipt_recent_checkentry, @@ -479,13 +481,13 @@ static int __init ipt_recent_init(void) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); - err = ipt_register_match(&recent_match); + err = xt_register_match(&recent_match); #ifdef CONFIG_PROC_FS if (err) return err; proc_dir = proc_mkdir("ipt_recent", proc_net); if (proc_dir == NULL) { - ipt_unregister_match(&recent_match); + xt_unregister_match(&recent_match); err = -ENOMEM; } #endif @@ -495,7 +497,7 @@ static int __init ipt_recent_init(void) static void __exit ipt_recent_exit(void) { BUG_ON(!list_empty(&tables)); - ipt_unregister_match(&recent_match); + xt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS remove_proc_entry("ipt_recent", proc_net); #endif diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c index 5549c39c785..5d33b51d49d 100644 --- a/net/ipv4/netfilter/ipt_tos.c +++ b/net/ipv4/netfilter/ipt_tos.c @@ -8,11 +8,12 @@ * published by the Free Software Foundation. */ +#include <linux/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_ipv4/ipt_tos.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("iptables TOS match module"); @@ -32,8 +33,9 @@ match(const struct sk_buff *skb, return (skb->nh.iph->tos == info->tos) ^ info->invert; } -static struct ipt_match tos_match = { +static struct xt_match tos_match = { .name = "tos", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_tos_info), .me = THIS_MODULE, @@ -41,12 +43,12 @@ static struct ipt_match tos_match = { static int __init ipt_multiport_init(void) { - return ipt_register_match(&tos_match); + return xt_register_match(&tos_match); } static void __exit ipt_multiport_fini(void) { - ipt_unregister_match(&tos_match); + xt_unregister_match(&tos_match); } module_init(ipt_multiport_init); diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index a5243bdb87d..d5cd984e5ed 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -9,11 +9,12 @@ * published by the Free Software Foundation. */ +#include <linux/ip.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_ipv4/ipt_ttl.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IP tables TTL matching module"); @@ -48,8 +49,9 @@ static int match(const struct sk_buff *skb, return 0; } -static struct ipt_match ttl_match = { +static struct xt_match ttl_match = { .name = "ttl", + .family = AF_INET, .match = match, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, @@ -57,13 +59,12 @@ static struct ipt_match ttl_match = { static int __init ipt_ttl_init(void) { - return ipt_register_match(&ttl_match); + return xt_register_match(&ttl_match); } static void __exit ipt_ttl_fini(void) { - ipt_unregister_match(&ttl_match); - + xt_unregister_match(&ttl_match); } module_init(ipt_ttl_init); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index e2e7dd8d790..51053cb42f4 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -74,7 +74,7 @@ static struct } }; -static struct ipt_table packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index af293988944..a532e4d8433 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -103,7 +103,7 @@ static struct } }; -static struct ipt_table packet_mangler = { +static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index bcbeb4aeacd..5277550fa6b 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -79,7 +79,7 @@ static struct } }; -static struct ipt_table packet_raw = { +static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 86a92272b05..998b2557692 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -254,8 +254,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { DEBUGP("get_unique_tuple: Found current src map\n"); - if (!nf_nat_used_tuple(tuple, ct)) - return; + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) + if (!nf_nat_used_tuple(tuple, ct)) + return; } } @@ -269,6 +270,13 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, proto = nf_nat_proto_find_get(orig_tuple->dst.protonum); + /* Change protocol info to have some randomization */ + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { + proto->unique_tuple(tuple, range, maniptype, ct); + nf_nat_proto_put(proto); + return; + } + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) && diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 98fbfc84d18..dc6738bdfab 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -176,7 +176,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb, datalen = (*pskb)->len - iph->ihl*4; if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, + tcph->check = tcp_v4_check(datalen, iph->saddr, iph->daddr, csum_partial((char *)tcph, datalen, 0)); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 0ae45b79a4e..5df4fcae3ab 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -72,9 +72,9 @@ static void pptp_nat_expected(struct nf_conn *ct, DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.l3num = AF_INET; - t.src.u3.ip = master->tuplehash[exp->dir].tuple.src.u3.ip; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; t.src.u.gre.key = nat_pptp_info->pns_call_id; - t.dst.u3.ip = master->tuplehash[exp->dir].tuple.dst.u3.ip; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; t.dst.u.gre.key = nat_pptp_info->pac_call_id; t.dst.protonum = IPPROTO_GRE; } diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 7e26a7e9bee..439164c7a62 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -75,6 +76,9 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.tcp.port) - min + 1; } + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!nf_nat_used_tuple(tuple, ct)) diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index ab0ce4c8699..8cae6e063bb 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/random.h> #include <linux/ip.h> #include <linux/udp.h> @@ -73,6 +74,9 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.udp.port) - min + 1; } + if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + port = net_random(); + for (i = 0; i < range_size; i++, port++) { *portptr = htons(min + port % range_size); if (!nf_nat_used_tuple(tuple, ct)) diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index b868ee0195d..7f95b4e2eb3 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -119,7 +119,7 @@ static struct } }; -static struct ipt_table nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -226,6 +226,10 @@ static int ipt_dnat_checkentry(const char *tablename, printk("DNAT: multiple ranges no longer supported\n"); return 0; } + if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) { + printk("DNAT: port randomization not supported\n"); + return 0; + } return 1; } @@ -290,7 +294,7 @@ int nf_nat_rule_find(struct sk_buff **pskb, return ret; } -static struct ipt_target ipt_snat_reg = { +static struct xt_target ipt_snat_reg = { .name = "SNAT", .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 730a7a44c88..5a964a167c1 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -32,12 +32,6 @@ #define DEBUGP(format, args...) #endif -#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ - : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ - : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ - : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ - : "*ERROR*"))) - #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { @@ -123,7 +117,7 @@ nf_nat_fn(unsigned int hooknum, nat = nfct_nat(ct); if (!nat) - return NF_DROP; + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a6c63bbd9dd..fed6a1e7af9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -489,7 +489,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); + err = ip_route_output_flow(&rt, &fl, sk, 1); } if (err) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 11c167118e8..baee304a3cb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1325,7 +1325,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (time_after(jiffies, + if (rt->u.dst.rate_tokens == 0 || + time_after(jiffies, (rt->u.dst.rate_last + (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); @@ -2634,7 +2635,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; r = nlmsg_data(nlh); r->rtm_family = AF_INET; @@ -2717,7 +2718,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) @@ -2872,8 +2874,7 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, - size_t newlen, - void **context) + size_t newlen) { int delay; if (newlen != sizeof(int)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index dfcf47f10f8..fabf69a9108 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -51,8 +51,7 @@ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, static int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { int *valp = table->data; int new; @@ -111,8 +110,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { char val[TCP_CA_NAME_MAX]; ctl_table tbl = { @@ -122,8 +120,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int ret; tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen, - context); + ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) ret = tcp_set_default_congestion_control(val); return ret; @@ -169,8 +166,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, + size_t newlen) { ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; @@ -180,8 +177,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam return -ENOMEM; tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen, - context); + ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 090c690627e..5bd43d7294f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2364,8 +2364,9 @@ struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) EXPORT_SYMBOL(__tcp_get_md5sig_pool); -void __tcp_put_md5sig_pool(void) { - __tcp_free_md5sig_pool(tcp_md5sig_pool); +void __tcp_put_md5sig_pool(void) +{ + tcp_free_md5sig_pool(); } EXPORT_SYMBOL(__tcp_put_md5sig_pool); @@ -2414,10 +2415,11 @@ void __init tcp_init(void) &tcp_hashinfo.ehash_size, NULL, 0); - tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1; - for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) { + tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; + for (i = 0; i < tcp_hashinfo.ehash_size; i++) { rwlock_init(&tcp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); + INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); } tcp_hashinfo.bhash = @@ -2474,7 +2476,7 @@ void __init tcp_init(void) printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", - tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size); + tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c701f6abbfc..c6109895bb5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -936,28 +936,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); + struct sk_buff *cached_skb; int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; u32 lost_retrans = 0; int flag = 0; int dup_sack = 0; + int cached_fack_count; int i; + int first_sack_index; if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; + /* Check for D-SACK. */ + if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { + dup_sack = 1; + tp->rx_opt.sack_ok |= 4; + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + } else if (num_sacks > 1 && + !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && + !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { + dup_sack = 1; + tp->rx_opt.sack_ok |= 4; + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + } + + /* D-SACK for already forgotten data... + * Do dumb counting. */ + if (dup_sack && + !after(ntohl(sp[0].end_seq), prior_snd_una) && + after(ntohl(sp[0].end_seq), tp->undo_marker)) + tp->undo_retrans--; + + /* Eliminate too old ACKs, but take into + * account more or less fresh ones, they can + * contain valid SACK info. + */ + if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window)) + return 0; + /* SACK fastpath: * if the only SACK change is the increase of the end_seq of * the first block then only apply that SACK block * and use retrans queue hinting otherwise slowpath */ flag = 1; - for (i = 0; i< num_sacks; i++) { - __u32 start_seq = ntohl(sp[i].start_seq); - __u32 end_seq = ntohl(sp[i].end_seq); + for (i = 0; i < num_sacks; i++) { + __be32 start_seq = sp[i].start_seq; + __be32 end_seq = sp[i].end_seq; - if (i == 0){ + if (i == 0) { if (tp->recv_sack_cache[i].start_seq != start_seq) flag = 0; } else { @@ -967,39 +997,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } tp->recv_sack_cache[i].start_seq = start_seq; tp->recv_sack_cache[i].end_seq = end_seq; - - /* Check for D-SACK. */ - if (i == 0) { - u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; - - if (before(start_seq, ack)) { - dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); - } else if (num_sacks > 1 && - !after(end_seq, ntohl(sp[1].end_seq)) && - !before(start_seq, ntohl(sp[1].start_seq))) { - dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); - } - - /* D-SACK for already forgotten data... - * Do dumb counting. */ - if (dup_sack && - !after(end_seq, prior_snd_una) && - after(end_seq, tp->undo_marker)) - tp->undo_retrans--; - - /* Eliminate too old ACKs, but take into - * account more or less fresh ones, they can - * contain valid SACK info. - */ - if (before(ack, prior_snd_una - tp->max_window)) - return 0; - } + } + /* Clear the rest of the cache sack blocks so they won't match mistakenly. */ + for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) { + tp->recv_sack_cache[i].start_seq = 0; + tp->recv_sack_cache[i].end_seq = 0; } + first_sack_index = 0; if (flag) num_sacks = 1; else { @@ -1011,10 +1016,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ for (j = 0; j < i; j++){ if (after(ntohl(sp[j].start_seq), ntohl(sp[j+1].start_seq))){ - sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); - sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); - sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); - sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); + struct tcp_sack_block_wire tmp; + + tmp = sp[j]; + sp[j] = sp[j+1]; + sp[j+1] = tmp; + + /* Track where the first SACK block goes to */ + if (j == first_sack_index) + first_sack_index = j+1; } } @@ -1024,20 +1034,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* clear flag as used for different purpose in following code */ flag = 0; + /* Use SACK fastpath hint if valid */ + cached_skb = tp->fastpath_skb_hint; + cached_fack_count = tp->fastpath_cnt_hint; + if (!cached_skb) { + cached_skb = sk->sk_write_queue.next; + cached_fack_count = 0; + } + for (i=0; i<num_sacks; i++, sp++) { struct sk_buff *skb; __u32 start_seq = ntohl(sp->start_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count; - /* Use SACK fastpath hint if valid */ - if (tp->fastpath_skb_hint) { - skb = tp->fastpath_skb_hint; - fack_count = tp->fastpath_cnt_hint; - } else { - skb = sk->sk_write_queue.next; - fack_count = 0; - } + skb = cached_skb; + fack_count = cached_fack_count; /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) @@ -1047,8 +1059,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int in_sack, pcount; u8 sacked; - tp->fastpath_skb_hint = skb; - tp->fastpath_cnt_hint = fack_count; + cached_skb = skb; + cached_fack_count = fack_count; + if (i == first_sack_index) { + tp->fastpath_skb_hint = skb; + tp->fastpath_cnt_hint = fack_count; + } /* The retransmission queue is always in order, so * we can short-circuit the walk early. @@ -4420,9 +4436,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * But, this leaves one open to an easy denial of * service attack, and SYN cookies can't defend * against this problem. So, we drop the data - * in the interest of security over speed. + * in the interest of security over speed unless + * it's still in use. */ - goto discard; + kfree_skb(skb); + return 0; } goto discard; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a1222d6968c..f51d6404c61 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -191,7 +191,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - inet->sport, usin->sin_port, sk); + inet->sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; @@ -502,11 +502,11 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(th, len, - inet->saddr, inet->daddr, 0); + th->check = ~tcp_v4_check(len, inet->saddr, + inet->daddr, 0); skb->csum_offset = offsetof(struct tcphdr, check); } else { - th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr, + th->check = tcp_v4_check(len, inet->saddr, inet->daddr, csum_partial((char *)th, th->doff << 2, skb->csum)); @@ -525,7 +525,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th = skb->h.th; th->check = 0; - th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); + th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; return 0; @@ -648,7 +648,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, TCPOLEN_TIMESTAMP); rep.opt[1] = htonl(tcp_time_stamp); rep.opt[2] = htonl(ts); - arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED; + arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; } /* Swap the send and the receive. */ @@ -747,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, if (skb) { struct tcphdr *th = skb->h.th; - th->check = tcp_v4_check(th, skb->len, + th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, csum_partial((char *)th, skb->len, @@ -928,6 +928,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) if (tp->md5sig_info->entries4 == 0) { kfree(tp->md5sig_info->keys4); tp->md5sig_info->keys4 = NULL; + tp->md5sig_info->alloced4 = 0; } else if (tp->md5sig_info->entries4 != i) { /* Need to do some manipulation */ memcpy(&tp->md5sig_info->keys4[i], @@ -1185,7 +1186,7 @@ done_opts: return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " + LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); @@ -1513,7 +1514,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, + if (!tcp_v4_check(skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; return 0; @@ -2050,7 +2051,7 @@ static void *established_get_first(struct seq_file *seq) } st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { + &tcp_hashinfo.ehash[st->bucket].twchain) { if (tw->tw_family != st->family) { continue; } @@ -2106,7 +2107,7 @@ get_tw: } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain); + tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); goto get_tw; found: cur = sk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 32c1a972fa3..58b7111523f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -467,6 +467,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; + skb_set_owner_w(skb, sk); /* Build TCP header and checksum it. */ th->source = inet->sport; @@ -540,7 +541,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_INC_STATS(TCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) return err; @@ -964,7 +965,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk u32 in_flight, cwnd; /* Don't be strict about the congestion window for the final FIN. */ - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) + if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && + tcp_skb_pcount(skb) == 1) return 1; in_flight = tcp_packets_in_flight(tp); @@ -1650,7 +1652,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - skb->ip_summed = next_skb->ip_summed; + if (next_skb->ip_summed == CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_PARTIAL; if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f230eeecf09..41c15784818 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -30,7 +30,7 @@ #include <net/tcp.h> -MODULE_AUTHOR("Stephen Hemminger <shemminger@osdl.org>"); +MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>"); MODULE_DESCRIPTION("TCP cwnd snooper"); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 035915fc9ed..8b54c68a0d1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -165,11 +165,14 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, goto gotit; } size = 0; - sk_for_each(sk2, node, head) - if (++size < best_size_so_far) { - best_size_so_far = size; - best = result; - } + sk_for_each(sk2, node, head) { + if (++size >= best_size_so_far) + goto next; + } + best_size_so_far = size; + best = result; + next: + ; } result = best; for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { @@ -626,7 +629,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .sport = inet->sport, .dport = dport } } }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); + err = ip_route_output_flow(&rt, &fl, sk, 1); if (err) goto out; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index e23c21d31a5..e54c5494c88 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -23,6 +23,12 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) IP_ECN_set_ce(inner_iph); } +static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +{ + if (INET_ECN_is_ce(iph->tos)) + IP6_ECN_set_ce(skb->nh.ipv6h); +} + /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. The following fields @@ -36,6 +42,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_dst *xdst = (struct xfrm_dst*)dst; struct iphdr *iph, *top_iph; int flags; @@ -48,15 +55,27 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ihl = 5; top_iph->version = 4; + flags = x->props.flags; + /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + if (xdst->route->ops->family == AF_INET) { + top_iph->protocol = IPPROTO_IPIP; + top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (iph->frag_off & htons(IP_DF)); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else { + struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph; + top_iph->protocol = IPPROTO_IPV6; + top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h)); + top_iph->frag_off = 0; + } +#endif - flags = x->props.flags; if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : (iph->frag_off & htons(IP_DF)); if (!top_iph->frag_off) __ip_select_ident(top_iph, dst->child, 0); @@ -64,7 +83,6 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - top_iph->protocol = IPPROTO_IPIP; memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); return 0; @@ -75,8 +93,16 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; int err = -EINVAL; - if (iph->protocol != IPPROTO_IPIP) - goto out; + switch(iph->protocol){ + case IPPROTO_IPIP: +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case IPPROTO_IPV6: + break; +#endif + default: + goto out; + } + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; @@ -84,10 +110,19 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv4_copy_dscp(iph, skb->h.ipiph); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip_ecn_decapsulate(skb); + if (iph->protocol == IPPROTO_IPIP) { + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(iph, skb->h.ipiph); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else { + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(iph, skb); + skb->protocol = htons(ETH_P_IPV6); + } +#endif skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fb9f69c616f..699f27ce62a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -72,13 +72,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; - __be32 remote = fl->fl4_dst; - __be32 local = fl->fl4_src; struct flowi fl_tunnel = { .nl_u = { .ip4_u = { - .saddr = local, - .daddr = remote, + .saddr = fl->fl4_src, + .daddr = fl->fl4_dst, .tos = fl->fl4_tos } } @@ -94,7 +92,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops); struct xfrm_dst *xdst; - int tunnel = 0; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; @@ -116,19 +113,28 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = xfrm[i]->id.daddr.a4; - local = xfrm[i]->props.saddr.a4; - tunnel = 1; - } + header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (tunnel) { - fl_tunnel.fl4_src = local; - fl_tunnel.fl4_dst = remote; + if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + unsigned short encap_family = xfrm[i]->props.family; + switch(encap_family) { + case AF_INET: + fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; + fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; + break; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case AF_INET6: + ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6); + ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6); + break; +#endif + default: + BUG_ON(1); + } err = xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET); + &fl_tunnel, encap_family); if (err) goto error; } else @@ -145,6 +151,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; + struct xfrm_state_afinfo *afinfo; x->u.rt.fl = *fl; dst_prev->xfrm = xfrm[i++]; @@ -162,8 +169,18 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = xfrm4_output; - if (rt->peer) + /* XXX: When IPv6 module can be unloaded, we should manage reference + * to xfrm6_output in afinfo->output. Miyazawa + * */ + afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); + if (!afinfo) { + dst = *dst_p; + err = -EAFNOSUPPORT; + goto error; + } + dst_prev->output = afinfo->output; + xfrm_state_put_afinfo(afinfo); + if (dst_prev->xfrm->props.family == AF_INET && rt->peer) atomic_inc(&rt->peer->refcnt); x->u.rt.peer = rt->peer; /* Sheit... I remember I did this right. Apparently, @@ -274,7 +291,7 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt.idev)) in_dev_put(xdst->u.rt.idev); - if (likely(xdst->u.rt.peer)) + if (dst->xfrm->props.family == AF_INET && likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); xfrm_dst_destroy(xdst); } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 3cc3df0c6ec..93e2c061cdd 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -51,6 +51,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .output = xfrm4_output, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a5e8d207a51..fe5e1d83387 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -341,6 +341,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) static struct inet6_dev * ipv6_add_dev(struct net_device *dev) { struct inet6_dev *ndev; + struct in6_addr maddr; ASSERT_RTNL(); @@ -413,8 +414,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (netif_carrier_ok(dev)) ndev->if_flags |= IF_READY; - /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, ndev); ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; @@ -425,6 +424,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) NULL); addrconf_sysctl_register(ndev, &ndev->cnf); #endif + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, ndev); + + /* Join all-node multicast group */ + ipv6_addr_all_nodes(&maddr); + ipv6_dev_mc_inc(dev, &maddr); + return ndev; } @@ -3111,7 +3117,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); @@ -3131,8 +3137,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) - return nlmsg_cancel(skb, nlh); + put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return nlmsg_end(skb, nlh); } @@ -3149,13 +3157,15 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - return nlmsg_cancel(skb, nlh); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return nlmsg_end(skb, nlh); } @@ -3172,13 +3182,15 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - return nlmsg_cancel(skb, nlh); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return nlmsg_end(skb, nlh); } @@ -3328,9 +3340,12 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWADDR, 0); - /* failure implies BUG in inet6_ifaddr_msgsize() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout_ifa; + } err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); @@ -3348,9 +3363,12 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) goto errout; err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); - /* failure implies BUG in inet6_ifaddr_msgsize() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) @@ -3387,7 +3405,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, #ifdef CONFIG_IPV6_ROUTER_PREF array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; -#ifdef CONFIV_IPV6_ROUTE_INFO +#ifdef CONFIG_IPV6_ROUTE_INFO array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif @@ -3420,7 +3438,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; hdr = nlmsg_data(nlh); hdr->ifi_family = AF_INET6; @@ -3463,7 +3481,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -3501,9 +3520,12 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) goto errout; err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); - /* failure implies BUG in inet6_if_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) @@ -3527,7 +3549,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; pmsg = nlmsg_data(nlh); pmsg->prefix_family = AF_INET6; @@ -3552,7 +3574,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static void inet6_prefix_notify(int event, struct inet6_dev *idev, @@ -3566,9 +3589,12 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, goto errout; err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); - /* failure implies BUG in inet6_prefix_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) @@ -3656,8 +3682,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { int *valp = table->data; int new; @@ -3893,7 +3918,7 @@ static struct addrconf_sysctl_table .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, -#ifdef CONFIV_IPV6_ROUTE_INFO +#ifdef CONFIG_IPV6_ROUTE_INFO { .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, .procname = "accept_ra_rt_info_max_plen", diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e5cd83b2205..0e0e4262f4d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -171,7 +171,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); - inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; + inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { inet->num = protocol; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c94fea90e9..ecde30140f4 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -178,7 +178,7 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto out; /* source address lookup done in ip6_dst_lookup */ diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index c700302ad51..116f94a4907 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -139,8 +139,9 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); -int inet6_csk_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok) +int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b7e5bae0e34..e61116949be 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -79,7 +79,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + sk_for_each(sk, node, &head->twchain) { const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__portpair *)&(tw->tw_dport)) == ports && @@ -183,7 +183,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + sk_for_each(sk2, node, &head->twchain) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); tw = inet_twsk(sk2); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e9212c7ff5c..7b7bd44fbf4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -72,20 +72,11 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f static inline int ip6_output_finish(struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; - struct hh_cache *hh = dst->hh; - - if (hh) { - int hh_alen; - - read_lock_bh(&hh->hh_lock); - hh_alen = HH_DATA_ALIGN(hh->hh_len); - memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); - read_unlock_bh(&hh->hh_lock); - skb_push(skb, hh->hh_len); - return hh->hh_output(skb); - } else if (dst->neighbour) + + if (dst->hh) + return neigh_hh_output(dst->hh, skb); + else if (dst->neighbour) return dst->neighbour->output(skb); IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8d918348f5b..2b9e3bb7da6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -999,7 +999,8 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; dev = t->dev; } - err = unregister_netdevice(dev); + err = 0; + unregister_netdevice(dev); break; default: err = -EINVAL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1eafcfc95e8..352690e2ab8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -978,12 +978,27 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_UNICAST_HOPS: - val = np->hop_limit; - break; - case IPV6_MULTICAST_HOPS: - val = np->mcast_hops; + { + struct dst_entry *dst; + + if (optname == IPV6_UNICAST_HOPS) + val = np->hop_limit; + else + val = np->mcast_hops; + + dst = sk_dst_get(sk); + if (dst) { + if (val < 0) + val = dst_metric(dst, RTAX_HOPLIMIT); + if (val < 0) + val = ipv6_get_hoplimit(dst->dev); + dst_release(dst); + } + if (val < 0) + val = ipv6_devconf.hop_limit; break; + } case IPV6_MULTICAST_LOOP: val = np->mc_loop; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a1c231a04ac..e3ec2169583 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1582,6 +1582,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } + if (!skb) + return NULL; psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc)); *psrc = psf->sf_addr; scount++; stotal++; @@ -2258,8 +2260,6 @@ void ipv6_mc_up(struct inet6_dev *idev) void ipv6_mc_init_dev(struct inet6_dev *idev) { - struct in6_addr maddr; - write_lock_bh(&idev->lock); rwlock_init(&idev->mc_lock); idev->mc_gq_running = 0; @@ -2275,10 +2275,6 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; write_unlock_bh(&idev->lock); - - /* Add all-nodes address. */ - ipv6_addr_all_nodes(&maddr); - ipv6_dev_mc_inc(idev->dev, &maddr); } /* diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index be7dd7db65d..681bb077eac 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -89,7 +89,6 @@ static int mip6_mh_len(int type) int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh *mh; - int mhlen; if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) @@ -103,31 +102,6 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); return -1; } - mhlen = (mh->ip6mh_hdrlen + 1) << 3; - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, - mhlen, IPPROTO_MH, - skb->csum)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); - skb->ip_summed = CHECKSUM_NONE; - } - } - if (skb->ip_summed == CHECKSUM_NONE) { - if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, - mhlen, IPPROTO_MH, - skb_checksum(skb, 0, mhlen, 0))) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed " - "[" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(skb->nh.ipv6h->saddr), - NIP6(skb->nh.ipv6h->daddr)); - return -1; - } - skb->ip_summed = CHECKSUM_UNNECESSARY; - } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 56ea9283730..39bb658f3c4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1413,6 +1413,13 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } + if (!ipv6_addr_equal(&skb->nh.ipv6h->daddr, target) && + !(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) { + ND_PRINTK2(KERN_WARNING + "ICMPv6 Redirect: target address is not link-local.\n"); + return; + } + ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr, dev->ifindex); @@ -1667,8 +1674,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; @@ -1681,14 +1687,12 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, switch (ctl->ctl_name) { case NET_NEIGH_REACHABLE_TIME: ret = sysctl_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen, - context); + oldval, oldlenp, newval, newlen); break; case NET_NEIGH_RETRANS_TIME_MS: case NET_NEIGH_REACHABLE_TIME_MS: ret = sysctl_ms_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen, - context); + oldval, oldlenp, newval, newlen); break; default: ret = 0; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index fc3e5eb4bc3..cd549aea84f 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -7,7 +7,7 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" config NF_CONNTRACK_IPV6 tristate "IPv6 connection tracking support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -21,6 +21,7 @@ config NF_CONNTRACK_IPV6 config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" + depends on INET && IPV6 && NETFILTER && EXPERIMENTAL ---help--- This option adds a queue handler to the kernel for IPv6 @@ -41,7 +42,7 @@ config IP6_NF_QUEUE config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" - depends on NETFILTER_XTABLES + depends on INET && IPV6 && EXPERIMENTAL && NETFILTER_XTABLES help ip6tables is a general, extensible packet identification framework. Currently only the packet filtering and packet mangling subsystem @@ -113,6 +114,14 @@ config IP6_NF_MATCH_AH To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_MH + tristate "MH match support" + depends on IP6_NF_IPTABLES + help + This module allows one to match MH packets. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_MATCH_EUI64 tristate "EUI64 address check" depends on IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index ac1dfebde17..4513eab7739 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o +obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4eec4b3988b..7083e1cfb2f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -413,6 +413,7 @@ mark_source_chains(struct xt_table_info *newinfo, unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); + int visited = e->comefrom & (1 << hook); if (!(valid_hooks & (1 << hook))) continue; @@ -433,11 +434,11 @@ mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ip6t_entry) + if ((e->target_offset == sizeof(struct ip6t_entry) && (strcmp(t->target.u.user.name, IP6T_STANDARD_TARGET) == 0) && t->verdict < 0 - && unconditional(&e->ipv6)) { + && unconditional(&e->ipv6)) || visited) { unsigned int oldpos, size; if (t->verdict < -NF_MAX_VERDICT - 1) { @@ -529,7 +530,7 @@ check_match(struct ip6t_entry_match *m, unsigned int hookmask, unsigned int *i) { - struct ip6t_match *match; + struct xt_match *match; int ret; match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, @@ -563,14 +564,14 @@ err: return ret; } -static struct ip6t_target ip6t_standard_target; +static struct xt_target ip6t_standard_target; static inline int check_entry(struct ip6t_entry *e, const char *name, unsigned int size, unsigned int *i) { struct ip6t_entry_target *t; - struct ip6t_target *target; + struct xt_target *target; int ret; unsigned int j; @@ -1347,13 +1348,13 @@ icmp6_checkentry(const char *tablename, } /* The built-in targets: standard (NULL) and error. */ -static struct ip6t_target ip6t_standard_target = { +static struct xt_target ip6t_standard_target = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET6, }; -static struct ip6t_target ip6t_error_target = { +static struct xt_target ip6t_error_target = { .name = IP6T_ERROR_TARGET, .target = ip6t_error, .targetsize = IP6T_FUNCTION_MAXNAMELEN, @@ -1370,7 +1371,7 @@ static struct nf_sockopt_ops ip6t_sockopts = { .get = do_ip6t_get_ctl, }; -static struct ip6t_match icmp6_matchstruct = { +static struct xt_match icmp6_matchstruct = { .name = "icmp6", .match = &icmp6_match, .matchsize = sizeof(struct ip6t_icmp), diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 435750f664d..04e500172fb 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -9,12 +9,13 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> +#include <linux/ipv6.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_HL.h> MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); -MODULE_DESCRIPTION("IP tables Hop Limit modification module"); +MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); static unsigned int ip6t_hl_target(struct sk_buff **pskb, @@ -52,10 +53,9 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, break; } - if (new_hl != ip6h->hop_limit) - ip6h->hop_limit = new_hl; + ip6h->hop_limit = new_hl; - return IP6T_CONTINUE; + return XT_CONTINUE; } static int ip6t_hl_checkentry(const char *tablename, @@ -79,8 +79,9 @@ static int ip6t_hl_checkentry(const char *tablename, return 1; } -static struct ip6t_target ip6t_HL = { +static struct xt_target ip6t_HL = { .name = "HL", + .family = AF_INET6, .target = ip6t_hl_target, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", @@ -90,12 +91,12 @@ static struct ip6t_target ip6t_HL = { static int __init ip6t_hl_init(void) { - return ip6t_register_target(&ip6t_HL); + return xt_register_target(&ip6t_HL); } static void __exit ip6t_hl_fini(void) { - ip6t_unregister_target(&ip6t_HL); + xt_unregister_target(&ip6t_HL); } module_init(ip6t_hl_init); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 33b1faa90d7..5587a77b884 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -21,6 +21,7 @@ #include <net/tcp.h> #include <net/ipv6.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); @@ -442,7 +443,7 @@ ip6t_log_target(struct sk_buff **pskb, ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); - return IP6T_CONTINUE; + return XT_CONTINUE; } @@ -466,8 +467,9 @@ static int ip6t_log_checkentry(const char *tablename, return 1; } -static struct ip6t_target ip6t_log_reg = { +static struct xt_target ip6t_log_reg = { .name = "LOG", + .family = AF_INET6, .target = ip6t_log_target, .targetsize = sizeof(struct ip6t_log_info), .checkentry = ip6t_log_checkentry, @@ -482,8 +484,11 @@ static struct nf_logger ip6t_logger = { static int __init ip6t_log_init(void) { - if (ip6t_register_target(&ip6t_log_reg)) - return -EINVAL; + int ret; + + ret = xt_register_target(&ip6t_log_reg); + if (ret < 0) + return ret; if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { printk(KERN_WARNING "ip6t_LOG: not logging via system console " "since somebody else already registered for PF_INET6\n"); @@ -497,7 +502,7 @@ static int __init ip6t_log_init(void) static void __exit ip6t_log_fini(void) { nf_log_unregister_logger(&ip6t_logger); - ip6t_unregister_target(&ip6t_log_reg); + xt_unregister_target(&ip6t_log_reg); } module_init(ip6t_log_init); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 311eae82feb..278349c1879 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -26,6 +26,7 @@ #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/flow.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> @@ -234,7 +235,7 @@ static int check(const char *tablename, } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ipv6.proto != IPPROTO_TCP - || (e->ipv6.invflags & IP6T_INV_PROTO)) { + || (e->ipv6.invflags & XT_INV_PROTO)) { DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); return 0; } @@ -242,8 +243,9 @@ static int check(const char *tablename, return 1; } -static struct ip6t_target ip6t_reject_reg = { +static struct xt_target ip6t_reject_reg = { .name = "REJECT", + .family = AF_INET6, .target = reject6_target, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", @@ -255,12 +257,12 @@ static struct ip6t_target ip6t_reject_reg = { static int __init ip6t_reject_init(void) { - return ip6t_register_target(&ip6t_reject_reg); + return xt_register_target(&ip6t_reject_reg); } static void __exit ip6t_reject_fini(void) { - ip6t_unregister_target(&ip6t_reject_reg); + xt_unregister_target(&ip6t_reject_reg); } module_init(ip6t_reject_init); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 46486645eb7..456c76adcbf 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -15,6 +15,7 @@ #include <net/checksum.h> #include <net/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_ah.h> @@ -118,8 +119,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match ah_match = { +static struct xt_match ah_match = { .name = "ah", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_ah), .checkentry = checkentry, @@ -128,12 +130,12 @@ static struct ip6t_match ah_match = { static int __init ip6t_ah_init(void) { - return ip6t_register_match(&ah_match); + return xt_register_match(&ah_match); } static void __exit ip6t_ah_fini(void) { - ip6t_unregister_match(&ah_match); + xt_unregister_match(&ah_match); } module_init(ip6t_ah_init); diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 4f6b84c8f4a..967bed71d4a 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -12,6 +12,7 @@ #include <linux/ipv6.h> #include <linux/if_ether.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); @@ -61,8 +62,9 @@ match(const struct sk_buff *skb, return 0; } -static struct ip6t_match eui64_match = { +static struct xt_match eui64_match = { .name = "eui64", + .family = AF_INET6, .match = match, .matchsize = sizeof(int), .hooks = (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | @@ -72,12 +74,12 @@ static struct ip6t_match eui64_match = { static int __init ip6t_eui64_init(void) { - return ip6t_register_match(&eui64_match); + return xt_register_match(&eui64_match); } static void __exit ip6t_eui64_fini(void) { - ip6t_unregister_match(&eui64_match); + xt_unregister_match(&eui64_match); } module_init(ip6t_eui64_init); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index cd22eaaccdc..5a5da71321b 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -14,6 +14,7 @@ #include <net/checksum.h> #include <net/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_frag.h> @@ -135,8 +136,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match frag_match = { +static struct xt_match frag_match = { .name = "frag", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_frag), .checkentry = checkentry, @@ -145,12 +147,12 @@ static struct ip6t_match frag_match = { static int __init ip6t_frag_init(void) { - return ip6t_register_match(&frag_match); + return xt_register_match(&frag_match); } static void __exit ip6t_frag_fini(void) { - ip6t_unregister_match(&frag_match); + xt_unregister_match(&frag_match); } module_init(ip6t_frag_init); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 3f25babe044..d2373c7cd35 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -16,6 +16,7 @@ #include <asm/byteorder.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_opts.h> diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 44a729e17c4..601cc1211c6 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -8,11 +8,12 @@ * published by the Free Software Foundation. */ +#include <linux/ipv6.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_ipv6/ip6t_hl.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("IP tables Hop Limit matching module"); @@ -48,8 +49,9 @@ static int match(const struct sk_buff *skb, return 0; } -static struct ip6t_match hl_match = { +static struct xt_match hl_match = { .name = "hl", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, @@ -57,13 +59,12 @@ static struct ip6t_match hl_match = { static int __init ip6t_hl_init(void) { - return ip6t_register_match(&hl_match); + return xt_register_match(&hl_match); } static void __exit ip6t_hl_fini(void) { - ip6t_unregister_match(&hl_match); - + xt_unregister_match(&hl_match); } module_init(ip6t_hl_init); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 3093c398002..26ac084adef 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -18,6 +18,7 @@ #include <net/checksum.h> #include <net/ipv6.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_ipv6header.h> @@ -140,8 +141,9 @@ ipv6header_checkentry(const char *tablename, return 1; } -static struct ip6t_match ip6t_ipv6header_match = { +static struct xt_match ip6t_ipv6header_match = { .name = "ipv6header", + .family = AF_INET6, .match = &ipv6header_match, .matchsize = sizeof(struct ip6t_ipv6header_info), .checkentry = &ipv6header_checkentry, @@ -151,12 +153,12 @@ static struct ip6t_match ip6t_ipv6header_match = { static int __init ipv6header_init(void) { - return ip6t_register_match(&ip6t_ipv6header_match); + return xt_register_match(&ip6t_ipv6header_match); } static void __exit ipv6header_exit(void) { - ip6t_unregister_match(&ip6t_ipv6header_match); + xt_unregister_match(&ip6t_ipv6header_match); } module_init(ipv6header_init); diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c new file mode 100644 index 00000000000..2c7efc6a506 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -0,0 +1,108 @@ +/* + * Copyright (C)2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Masahide NAKAMURA @USAGI <masahide.nakamura.cz@hitachi.com> + * + * Based on net/netfilter/xt_tcpudp.c + * + */ +#include <linux/types.h> +#include <linux/module.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <net/mip6.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv6/ip6t_mh.h> + +MODULE_DESCRIPTION("ip6t_tables match for MH"); +MODULE_LICENSE("GPL"); + +#ifdef DEBUG_IP_FIREWALL_USER +#define duprintf(format, args...) printk(format , ## args) +#else +#define duprintf(format, args...) +#endif + +/* Returns 1 if the type is matched by the range, 0 otherwise */ +static inline int +type_match(u_int8_t min, u_int8_t max, u_int8_t type, int invert) +{ + int ret; + + ret = (type >= min && type <= max) ^ invert; + return ret; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + struct ip6_mh _mh, *mh; + const struct ip6t_mh *mhinfo = matchinfo; + + /* Must not be a fragment. */ + if (offset) + return 0; + + mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh); + if (mh == NULL) { + /* We've been asked to examine this packet, and we + can't. Hence, no choice but to drop. */ + duprintf("Dropping evil MH tinygram.\n"); + *hotdrop = 1; + return 0; + } + + return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type, + !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); +} + +/* Called when user tries to insert an entry of this type. */ +static int +mh_checkentry(const char *tablename, + const void *entry, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) +{ + const struct ip6t_mh *mhinfo = matchinfo; + + /* Must specify no unknown invflags */ + return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); +} + +static struct xt_match mh_match = { + .name = "mh", + .family = AF_INET6, + .checkentry = mh_checkentry, + .match = match, + .matchsize = sizeof(struct ip6t_mh), + .proto = IPPROTO_MH, + .me = THIS_MODULE, +}; + +static int __init ip6t_mh_init(void) +{ + return xt_register_match(&mh_match); +} + +static void __exit ip6t_mh_fini(void) +{ + xt_unregister_match(&mh_match); +} + +module_init(ip6t_mh_init); +module_exit(ip6t_mh_fini); diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 4eb9bbc4ebc..43738bba00b 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -16,6 +16,7 @@ #include <linux/netfilter_ipv6/ip6t_owner.h> #include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("IP6 tables owner matching module"); @@ -69,8 +70,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match owner_match = { +static struct xt_match owner_match = { .name = "owner", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_owner_info), .hooks = (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING), @@ -80,12 +82,12 @@ static struct ip6t_match owner_match = { static int __init ip6t_owner_init(void) { - return ip6t_register_match(&owner_match); + return xt_register_match(&owner_match); } static void __exit ip6t_owner_fini(void) { - ip6t_unregister_match(&owner_match); + xt_unregister_match(&owner_match); } module_init(ip6t_owner_init); diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 54d7d14134f..81ab00d8c18 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -16,6 +16,7 @@ #include <asm/byteorder.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_rt.h> @@ -221,8 +222,9 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match rt_match = { +static struct xt_match rt_match = { .name = "rt", + .family = AF_INET6, .match = match, .matchsize = sizeof(struct ip6t_rt), .checkentry = checkentry, @@ -231,12 +233,12 @@ static struct ip6t_match rt_match = { static int __init ip6t_rt_init(void) { - return ip6t_register_match(&rt_match); + return xt_register_match(&rt_match); } static void __exit ip6t_rt_fini(void) { - ip6t_unregister_match(&rt_match); + xt_unregister_match(&rt_match); } module_init(ip6t_rt_init); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 2fc07c74dec..112a21d0c6d 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -19,25 +19,6 @@ MODULE_DESCRIPTION("ip6tables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT)) -/* Standard entry. */ -struct ip6t_standard -{ - struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target -{ - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; -}; - -struct ip6t_error -{ - struct ip6t_entry entry; - struct ip6t_error_target target; -}; - static struct { struct ip6t_replace repl; @@ -92,7 +73,7 @@ static struct } }; -static struct ip6t_table packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 6250e86a6dd..5f5aa0e5147 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -29,25 +29,6 @@ MODULE_DESCRIPTION("ip6tables mangle table"); #define DEBUGP(x, args...) #endif -/* Standard entry. */ -struct ip6t_standard -{ - struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target -{ - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; -}; - -struct ip6t_error -{ - struct ip6t_entry entry; - struct ip6t_error_target target; -}; - static struct { struct ip6t_replace repl; @@ -122,7 +103,7 @@ static struct } }; -static struct ip6t_table packet_mangler = { +static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index b4154da575c..277bf34638b 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -14,25 +14,6 @@ #define DEBUGP(x, args...) #endif -/* Standard entry. */ -struct ip6t_standard -{ - struct ip6t_entry entry; - struct ip6t_standard_target target; -}; - -struct ip6t_error_target -{ - struct ip6t_entry_target target; - char errorname[IP6T_FUNCTION_MAXNAMELEN]; -}; - -struct ip6t_error -{ - struct ip6t_entry entry; - struct ip6t_error_target target; -}; - static struct { struct ip6t_replace repl; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 37e5fca923a..d9c15402ba6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -835,6 +835,8 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, s->nfct_reasm = skb; s2 = s->next; + s->next = NULL; + NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); s = s2; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4ae1b19ada5..c2d8059e754 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -815,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto out; if (hlimit < 0) { @@ -1094,10 +1094,19 @@ static void rawv6_close(struct sock *sk, long timeout) static int rawv6_init_sk(struct sock *sk) { - if (inet_sk(sk)->num == IPPROTO_ICMPV6) { - struct raw6_sock *rp = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); + + switch (inet_sk(sk)->num) { + case IPPROTO_ICMPV6: rp->checksum = 1; rp->offset = 2; + break; + case IPPROTO_MH: + rp->checksum = 1; + rp->offset = 4; + break; + default: + break; } return(0); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9f80518aacb..19c906f6efa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -311,12 +311,21 @@ static inline void rt6_probe(struct rt6_info *rt) static int inline rt6_check_dev(struct rt6_info *rt, int oif) { struct net_device *dev = rt->rt6i_dev; - if (!oif || dev->ifindex == oif) + int ret = 0; + + if (!oif) return 2; - if ((dev->flags & IFF_LOOPBACK) && - rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) - return 1; - return 0; + if (dev->flags & IFF_LOOPBACK) { + if (!WARN_ON(rt->rt6i_idev == NULL) && + rt->rt6i_idev->dev->ifindex == oif) + ret = 1; + else + return 0; + } + if (dev->ifindex == oif) + return 2; + + return ret; } static int inline rt6_check_neigh(struct rt6_info *rt) @@ -494,7 +503,7 @@ do { \ goto out; \ pn = fn->parent; \ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(pn->subtree, NULL, saddr); \ + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ else \ fn = pn; \ if (fn->fn_flags & RTN_RTINFO) \ @@ -2017,6 +2026,7 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_IIF */ + nla_total_size(4) /* RTA_OIF */ + nla_total_size(4) /* RTA_PRIORITY */ + + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)); } @@ -2039,7 +2049,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); if (nlh == NULL) - return -ENOBUFS; + return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; @@ -2110,7 +2120,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, return nlmsg_end(skb, nlh); nla_put_failure: - return nlmsg_cancel(skb, nlh); + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } int rt6_dump_route(struct rt6_info *rt, void *p_arg) @@ -2221,9 +2232,12 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); - /* failure implies BUG in rt6_nlmsg_size() */ - BUG_ON(err < 0); - + if (err < 0) { + /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: if (err < 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 77b7b091143..47cfeadac6d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -686,7 +686,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; dev = t->dev; } - err = unregister_netdevice(dev); + unregister_netdevice(dev); + err = 0; break; default: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c25e930c2c6..dcb7b00a737 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto failure; if (saddr == NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f52a5c3cc0a..15e5195549c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -736,7 +736,7 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 1)) < 0) goto out; if (hlimit < 0) { diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 5e7d8a7d641..0bc866c0d83 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -25,6 +25,12 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } +static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) +{ + if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h))) + IP_ECN_set_ce(skb->h.ipiph); +} + /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. The following fields @@ -40,6 +46,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_dst *xdst = (struct xfrm_dst*)dst; struct ipv6hdr *iph, *top_iph; int dsfield; @@ -52,16 +59,24 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) skb->h.ipv6h = top_iph + 1; top_iph->version = 6; - top_iph->priority = iph->priority; - top_iph->flow_lbl[0] = iph->flow_lbl[0]; - top_iph->flow_lbl[1] = iph->flow_lbl[1]; - top_iph->flow_lbl[2] = iph->flow_lbl[2]; + if (xdst->route->ops->family == AF_INET6) { + top_iph->priority = iph->priority; + top_iph->flow_lbl[0] = iph->flow_lbl[0]; + top_iph->flow_lbl[1] = iph->flow_lbl[1]; + top_iph->flow_lbl[2] = iph->flow_lbl[2]; + top_iph->nexthdr = IPPROTO_IPV6; + } else { + top_iph->priority = 0; + top_iph->flow_lbl[0] = 0; + top_iph->flow_lbl[1] = 0; + top_iph->flow_lbl[2] = 0; + top_iph->nexthdr = IPPROTO_IPIP; + } dsfield = ipv6_get_dsfield(top_iph); dsfield = INET_ECN_encapsulate(dsfield, dsfield); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->nexthdr = IPPROTO_IPV6; top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); @@ -72,7 +87,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; - if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6) + if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6 + && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP) goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -81,10 +97,16 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); + if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + } else { + if (!(x->props.flags & XFRM_STATE_NOECN)) + ip6ip_ecn_decapsulate(skb); + skb->protocol = htons(ETH_P_IP); + } skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8dffd4daae9..59480e92177 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -131,13 +131,11 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int struct dst_entry *dst, *dst_prev; struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; - struct in6_addr *remote = &fl->fl6_dst; - struct in6_addr *local = &fl->fl6_src; struct flowi fl_tunnel = { .nl_u = { .ip6_u = { - .saddr = *local, - .daddr = *remote + .saddr = fl->fl6_src, + .daddr = fl->fl6_dst, } } }; @@ -153,7 +151,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops); struct xfrm_dst *xdst; - int tunnel = 0; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; @@ -177,19 +174,27 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); - local = __xfrm6_bundle_addr_local(xfrm[i], local); - tunnel = 1; - } + __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; - if (tunnel) { - ipv6_addr_copy(&fl_tunnel.fl6_dst, remote); - ipv6_addr_copy(&fl_tunnel.fl6_src, local); + if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + unsigned short encap_family = xfrm[i]->props.family; + switch(encap_family) { + case AF_INET: + fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; + fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; + break; + case AF_INET6: + ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6); + ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6); + break; + default: + BUG_ON(1); + } + err = xfrm_dst_lookup((struct xfrm_dst **) &rt, - &fl_tunnel, AF_INET6); + &fl_tunnel, encap_family); if (err) goto error; } else @@ -208,6 +213,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int i = 0; for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; + struct xfrm_state_afinfo *afinfo; dst_prev->xfrm = xfrm[i++]; dst_prev->dev = rt->u.dst.dev; @@ -224,7 +230,17 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int /* Copy neighbour for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = xfrm6_output; + /* XXX: When IPv4 is implemented as module and can be unloaded, + * we should manage reference to xfrm4_output in afinfo->output. + * Miyazawa + */ + afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family); + if (!afinfo) { + dst = *dst_p; + goto error; + }; + dst_prev->output = afinfo->output; + xfrm_state_put_afinfo(afinfo); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 9ddaa9d4153..60ad5f074e0 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -171,6 +171,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, + .output = xfrm6_output, }; void __init xfrm6_state_init(void) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 76c661566df..89f283c51df 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -2035,19 +2035,27 @@ static void __exit ipx_proto_finito(void) ipxitf_cleanup(); - unregister_snap_client(pSNAP_datalink); - pSNAP_datalink = NULL; + if (pSNAP_datalink) { + unregister_snap_client(pSNAP_datalink); + pSNAP_datalink = NULL; + } - unregister_8022_client(p8022_datalink); - p8022_datalink = NULL; + if (p8022_datalink) { + unregister_8022_client(p8022_datalink); + p8022_datalink = NULL; + } dev_remove_pack(&ipx_8023_packet_type); - destroy_8023_client(p8023_datalink); - p8023_datalink = NULL; + if (p8023_datalink) { + destroy_8023_client(p8023_datalink); + p8023_datalink = NULL; + } dev_remove_pack(&ipx_dix_packet_type); - destroy_EII_client(pEII_datalink); - pEII_datalink = NULL; + if (pEII_datalink) { + destroy_EII_client(pEII_datalink); + pEII_datalink = NULL; + } proto_unregister(&ipx_proto); sock_unregister(ipx_family_ops.family); diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c index 197e3e7ed7e..75e39ea599d 100644 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ b/net/irda/ircomm/ircomm_tty_ioctl.c @@ -146,7 +146,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self) * do something rational. */ void ircomm_tty_set_termios(struct tty_struct *tty, - struct termios *old_termios) + struct ktermios *old_termios) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; unsigned int cflag = tty->termios->c_cflag; diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index b1ee99a59c0..2a571b43ebe 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -91,6 +91,12 @@ struct ias_object *irias_new_object( char *name, int id) obj->magic = IAS_OBJECT_MAGIC; obj->name = strndup(name, IAS_MAX_CLASSNAME); + if (!obj->name) { + IRDA_WARNING("%s(), Unable to allocate name!\n", + __FUNCTION__); + kfree(obj); + return NULL; + } obj->id = id; /* Locking notes : the attrib spinlock has lower precendence @@ -101,6 +107,7 @@ struct ias_object *irias_new_object( char *name, int id) if (obj->attribs == NULL) { IRDA_WARNING("%s(), Unable to allocate attribs!\n", __FUNCTION__); + kfree(obj->name); kfree(obj); return NULL; } @@ -357,6 +364,15 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, /* Insert value */ attrib->value = irias_new_integer_value(value); + if (!attrib->name || !attrib->value) { + IRDA_WARNING("%s: Unable to allocate attribute!\n", + __FUNCTION__); + if (attrib->value) + irias_delete_value(attrib->value); + kfree(attrib->name); + kfree(attrib); + return; + } irias_add_attrib(obj, attrib, owner); } @@ -391,6 +407,15 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); attrib->value = irias_new_octseq_value( octets, len); + if (!attrib->name || !attrib->value) { + IRDA_WARNING("%s: Unable to allocate attribute!\n", + __FUNCTION__); + if (attrib->value) + irias_delete_value(attrib->value); + kfree(attrib->name); + kfree(attrib); + return; + } irias_add_attrib(obj, attrib, owner); } @@ -424,6 +449,15 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); attrib->value = irias_new_string_value(value); + if (!attrib->name || !attrib->value) { + IRDA_WARNING("%s: Unable to allocate attribute!\n", + __FUNCTION__); + if (attrib->value) + irias_delete_value(attrib->value); + kfree(attrib->name); + kfree(attrib); + return; + } irias_add_attrib(obj, attrib, owner); } @@ -473,6 +507,12 @@ struct ias_value *irias_new_string_value(char *string) value->type = IAS_STRING; value->charset = CS_ASCII; value->t.string = strndup(string, IAS_MAX_STRING); + if (!value->t.string) { + IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); + kfree(value); + return NULL; + } + value->len = strlen(value->t.string); return value; diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 2bb04ac0932..310776dd610 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -144,12 +144,18 @@ static int __init irlan_init(void) /* Register with IrLMP as a client */ ckey = irlmp_register_client(hints, &irlan_client_discovery_indication, NULL, NULL); - + if (!ckey) + goto err_ckey; + /* Register with IrLMP as a service */ - skey = irlmp_register_service(hints); + skey = irlmp_register_service(hints); + if (!skey) + goto err_skey; /* Start the master IrLAN instance (the only one for now) */ - new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY); + new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY); + if (!new) + goto err_open; /* The master will only open its (listen) control TSAP */ irlan_provider_open_ctrl_tsap(new); @@ -158,6 +164,17 @@ static int __init irlan_init(void) irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS); return 0; + +err_open: + irlmp_unregister_service(skey); +err_skey: + irlmp_unregister_client(ckey); +err_ckey: +#ifdef CONFIG_PROC_FS + remove_proc_entry("irlan", proc_irda); +#endif /* CONFIG_PROC_FS */ + + return -ENOMEM; } static void __exit irlan_cleanup(void) diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig new file mode 100644 index 00000000000..f8fcc3d1032 --- /dev/null +++ b/net/iucv/Kconfig @@ -0,0 +1,15 @@ +config IUCV + tristate "IUCV support (VM only)" + depends on S390 + help + Select this option if you want to use inter-user communication under + VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + communication link between VM guests. + +config AFIUCV + tristate "AF_IUCV support (VM only)" + depends on IUCV + help + Select this option if you want to use inter-user communication under + VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + communication link between VM guests. diff --git a/net/iucv/Makefile b/net/iucv/Makefile new file mode 100644 index 00000000000..7bfdc853267 --- /dev/null +++ b/net/iucv/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for IUCV +# + +obj-$(CONFIG_IUCV) += iucv.o +obj-$(CONFIG_AFIUCV) += af_iucv.o diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c new file mode 100644 index 00000000000..acc94214bde --- /dev/null +++ b/net/iucv/af_iucv.c @@ -0,0 +1,1077 @@ +/* + * linux/net/iucv/af_iucv.c + * + * IUCV protocol stack for Linux on zSeries + * + * Copyright 2006 IBM Corporation + * + * Author(s): Jennifer Hunt <jenhunt@us.ibm.com> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <net/sock.h> +#include <asm/ebcdic.h> +#include <asm/cpcmd.h> +#include <linux/kmod.h> + +#include <net/iucv/iucv.h> +#include <net/iucv/af_iucv.h> + +#define CONFIG_IUCV_SOCK_DEBUG 1 + +#define IPRMDATA 0x80 +#define VERSION "1.0" + +static char iucv_userid[80]; + +static struct proto_ops iucv_sock_ops; + +static struct proto iucv_proto = { + .name = "AF_IUCV", + .owner = THIS_MODULE, + .obj_size = sizeof(struct iucv_sock), +}; + +/* Call Back functions */ +static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); +static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); +static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); +static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]); +static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); + +static struct iucv_sock_list iucv_sk_list = { + .lock = RW_LOCK_UNLOCKED, + .autobind_name = ATOMIC_INIT(0) +}; + +static struct iucv_handler af_iucv_handler = { + .path_pending = iucv_callback_connreq, + .path_complete = iucv_callback_connack, + .path_severed = iucv_callback_connrej, + .message_pending = iucv_callback_rx, + .message_complete = iucv_callback_txdone +}; + +static inline void high_nmcpy(unsigned char *dst, char *src) +{ + memcpy(dst, src, 8); +} + +static inline void low_nmcpy(unsigned char *dst, char *src) +{ + memcpy(&dst[8], src, 8); +} + +/* Timers */ +static void iucv_sock_timeout(unsigned long arg) +{ + struct sock *sk = (struct sock *)arg; + + bh_lock_sock(sk); + sk->sk_err = ETIMEDOUT; + sk->sk_state_change(sk); + bh_unlock_sock(sk); + + iucv_sock_kill(sk); + sock_put(sk); +} + +static void iucv_sock_clear_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} + +static void iucv_sock_init_timer(struct sock *sk) +{ + init_timer(&sk->sk_timer); + sk->sk_timer.function = iucv_sock_timeout; + sk->sk_timer.data = (unsigned long)sk; +} + +static struct sock *__iucv_get_sock_by_name(char *nm) +{ + struct sock *sk; + struct hlist_node *node; + + sk_for_each(sk, node, &iucv_sk_list.head) + if (!memcmp(&iucv_sk(sk)->src_name, nm, 8)) + return sk; + + return NULL; +} + +static void iucv_sock_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + +/* Cleanup Listen */ +static void iucv_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + /* Close non-accepted connections */ + while ((sk = iucv_accept_dequeue(parent, NULL))) { + iucv_sock_close(sk); + iucv_sock_kill(sk); + } + + parent->sk_state = IUCV_CLOSED; + sock_set_flag(parent, SOCK_ZAPPED); +} + +/* Kill socket */ +static void iucv_sock_kill(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + return; + + iucv_sock_unlink(&iucv_sk_list, sk); + sock_set_flag(sk, SOCK_DEAD); + sock_put(sk); +} + +/* Close an IUCV socket */ +static void iucv_sock_close(struct sock *sk) +{ + unsigned char user_data[16]; + struct iucv_sock *iucv = iucv_sk(sk); + int err; + + iucv_sock_clear_timer(sk); + lock_sock(sk); + + switch(sk->sk_state) { + case IUCV_LISTEN: + iucv_sock_cleanup_listen(sk); + break; + + case IUCV_CONNECTED: + case IUCV_DISCONN: + err = 0; + if (iucv->path) { + low_nmcpy(user_data, iucv->src_name); + high_nmcpy(user_data, iucv->dst_name); + ASCEBC(user_data, sizeof(user_data)); + err = iucv_path_sever(iucv->path, user_data); + iucv_path_free(iucv->path); + iucv->path = NULL; + } + + sk->sk_state = IUCV_CLOSED; + sk->sk_state_change(sk); + sk->sk_err = ECONNRESET; + sk->sk_state_change(sk); + + skb_queue_purge(&iucv->send_skb_q); + + sock_set_flag(sk, SOCK_ZAPPED); + break; + + default: + sock_set_flag(sk, SOCK_ZAPPED); + break; + }; + + release_sock(sk); + iucv_sock_kill(sk); +} + +static void iucv_sock_init(struct sock *sk, struct sock *parent) +{ + if (parent) + sk->sk_type = parent->sk_type; +} + +static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) +{ + struct sock *sk; + + sk = sk_alloc(PF_IUCV, prio, &iucv_proto, 1); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); + skb_queue_head_init(&iucv_sk(sk)->send_skb_q); + iucv_sk(sk)->send_tag = 0; + + sk->sk_destruct = iucv_sock_destruct; + sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; + sk->sk_allocation = GFP_DMA; + + sock_reset_flag(sk, SOCK_ZAPPED); + + sk->sk_protocol = proto; + sk->sk_state = IUCV_OPEN; + + iucv_sock_init_timer(sk); + + iucv_sock_link(&iucv_sk_list, sk); + return sk; +} + +/* Create an IUCV socket */ +static int iucv_sock_create(struct socket *sock, int protocol) +{ + struct sock *sk; + + if (sock->type != SOCK_STREAM) + return -ESOCKTNOSUPPORT; + + sock->state = SS_UNCONNECTED; + sock->ops = &iucv_sock_ops; + + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); + if (!sk) + return -ENOMEM; + + iucv_sock_init(sk, NULL); + + return 0; +} + +void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_add_node(sk, &l->head); + write_unlock_bh(&l->lock); +} + +void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_del_node_init(sk); + write_unlock_bh(&l->lock); +} + +void iucv_accept_enqueue(struct sock *parent, struct sock *sk) +{ + sock_hold(sk); + list_add_tail(&iucv_sk(sk)->accept_q, &iucv_sk(parent)->accept_q); + iucv_sk(sk)->parent = parent; + parent->sk_ack_backlog++; +} + +void iucv_accept_unlink(struct sock *sk) +{ + list_del_init(&iucv_sk(sk)->accept_q); + iucv_sk(sk)->parent->sk_ack_backlog--; + iucv_sk(sk)->parent = NULL; + sock_put(sk); +} + +struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) +{ + struct iucv_sock *isk, *n; + struct sock *sk; + + list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){ + sk = (struct sock *) isk; + lock_sock(sk); + + if (sk->sk_state == IUCV_CLOSED) { + release_sock(sk); + iucv_accept_unlink(sk); + continue; + } + + if (sk->sk_state == IUCV_CONNECTED || + sk->sk_state == IUCV_SEVERED || + !newsock) { + iucv_accept_unlink(sk); + if (newsock) + sock_graft(sk, newsock); + + if (sk->sk_state == IUCV_SEVERED) + sk->sk_state = IUCV_DISCONN; + + release_sock(sk); + return sk; + } + + release_sock(sk); + } + return NULL; +} + +int iucv_sock_wait_state(struct sock *sk, int state, int state2, + unsigned long timeo) +{ + DECLARE_WAITQUEUE(wait, current); + int err = 0; + + add_wait_queue(sk->sk_sleep, &wait); + while (sk->sk_state != state && sk->sk_state != state2) { + set_current_state(TASK_INTERRUPTIBLE); + + if (!timeo) { + err = -EAGAIN; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + + err = sock_error(sk); + if (err) + break; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(sk->sk_sleep, &wait); + return err; +} + +/* Bind an unbound socket */ +static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, + int addr_len) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv; + int err; + + /* Verify the input sockaddr */ + if (!addr || addr->sa_family != AF_IUCV) + return -EINVAL; + + lock_sock(sk); + if (sk->sk_state != IUCV_OPEN) { + err = -EBADFD; + goto done; + } + + write_lock_bh(&iucv_sk_list.lock); + + iucv = iucv_sk(sk); + if (__iucv_get_sock_by_name(sa->siucv_name)) { + err = -EADDRINUSE; + goto done_unlock; + } + if (iucv->path) { + err = 0; + goto done_unlock; + } + + /* Bind the socket */ + memcpy(iucv->src_name, sa->siucv_name, 8); + + /* Copy the user id */ + memcpy(iucv->src_user_id, iucv_userid, 8); + sk->sk_state = IUCV_BOUND; + err = 0; + +done_unlock: + /* Release the socket list lock */ + write_unlock_bh(&iucv_sk_list.lock); +done: + release_sock(sk); + return err; +} + +/* Automatically bind an unbound socket */ +static int iucv_sock_autobind(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + char query_buffer[80]; + char name[12]; + int err = 0; + + /* Set the userid and name */ + cpcmd("QUERY USERID", query_buffer, sizeof(query_buffer), &err); + if (unlikely(err)) + return -EPROTO; + + memcpy(iucv->src_user_id, query_buffer, 8); + + write_lock_bh(&iucv_sk_list.lock); + + sprintf(name, "%08x", atomic_inc_return(&iucv_sk_list.autobind_name)); + while (__iucv_get_sock_by_name(name)) { + sprintf(name, "%08x", + atomic_inc_return(&iucv_sk_list.autobind_name)); + } + + write_unlock_bh(&iucv_sk_list.lock); + + memcpy(&iucv->src_name, name, 8); + + return err; +} + +/* Connect an unconnected socket */ +static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv; + unsigned char user_data[16]; + int err; + + if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + return -EINVAL; + + if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) + return -EBADFD; + + if (sk->sk_type != SOCK_STREAM) + return -EINVAL; + + iucv = iucv_sk(sk); + + if (sk->sk_state == IUCV_OPEN) { + err = iucv_sock_autobind(sk); + if (unlikely(err)) + return err; + } + + lock_sock(sk); + + /* Set the destination information */ + memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8); + memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8); + + high_nmcpy(user_data, sa->siucv_name); + low_nmcpy(user_data, iucv_sk(sk)->src_name); + ASCEBC(user_data, sizeof(user_data)); + + iucv = iucv_sk(sk); + /* Create path. */ + iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT, + IPRMDATA, GFP_KERNEL); + err = iucv_path_connect(iucv->path, &af_iucv_handler, + sa->siucv_user_id, NULL, user_data, sk); + if (err) { + iucv_path_free(iucv->path); + iucv->path = NULL; + err = -ECONNREFUSED; + goto done; + } + + if (sk->sk_state != IUCV_CONNECTED) { + err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, + sock_sndtimeo(sk, flags & O_NONBLOCK)); + } + + if (sk->sk_state == IUCV_DISCONN) { + release_sock(sk); + return -ECONNREFUSED; + } +done: + release_sock(sk); + return err; +} + +/* Move a socket into listening state. */ +static int iucv_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err; + + lock_sock(sk); + + err = -EINVAL; + if (sk->sk_state != IUCV_BOUND || sock->type != SOCK_STREAM) + goto done; + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + sk->sk_state = IUCV_LISTEN; + err = 0; + +done: + release_sock(sk); + return err; +} + +/* Accept a pending connection */ +static int iucv_sock_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *nsk; + long timeo; + int err = 0; + + lock_sock(sk); + + if (sk->sk_state != IUCV_LISTEN) { + err = -EBADFD; + goto done; + } + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + /* Wait for an incoming connection */ + add_wait_queue_exclusive(sk->sk_sleep, &wait); + while (!(nsk = iucv_accept_dequeue(sk, newsock))){ + set_current_state(TASK_INTERRUPTIBLE); + if (!timeo) { + err = -EAGAIN; + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + + if (sk->sk_state != IUCV_LISTEN) { + err = -EBADFD; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(sk->sk_sleep, &wait); + + if (err) + goto done; + + newsock->state = SS_CONNECTED; + +done: + release_sock(sk); + return err; +} + +static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, + int *len, int peer) +{ + struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + + addr->sa_family = AF_IUCV; + *len = sizeof(struct sockaddr_iucv); + + if (peer) { + memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8); + memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8); + } else { + memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8); + memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8); + } + memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port)); + memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); + memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); + + return 0; +} + +static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + struct sk_buff *skb; + struct iucv_message txmsg; + int err; + + err = sock_error(sk); + if (err) + return err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (sk->sk_shutdown & SEND_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state == IUCV_CONNECTED){ + if(!(skb = sock_alloc_send_skb(sk, len, + msg->msg_flags & MSG_DONTWAIT, + &err))) + return err; + + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){ + err = -EFAULT; + goto fail; + } + + txmsg.class = 0; + txmsg.tag = iucv->send_tag++; + memcpy(skb->cb, &txmsg.tag, 4); + skb_queue_tail(&iucv->send_skb_q, skb); + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) + printk(KERN_ERR "AF_IUCV msg limit exceeded\n"); + skb_unlink(skb, &iucv->send_skb_q); + err = -EPIPE; + goto fail; + } + + } else { + err = -ENOTCONN; + goto out; + } + + release_sock(sk); + return len; + +fail: + kfree_skb(skb); +out: + release_sock(sk); + return err; +} + +static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + int target, copied = 0; + struct sk_buff *skb; + int err = 0; + + if (flags & (MSG_OOB)) + return -EOPNOTSUPP; + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) { + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 0; + return err; + } + + copied = min_t(unsigned int, skb->len, len); + + if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) { + skb_queue_head(&sk->sk_receive_queue, skb); + if (copied == 0) + return -EFAULT; + } + + len -= copied; + + /* Mark read part of skb as used */ + if (!(flags & MSG_PEEK)) { + skb_pull(skb, copied); + + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + goto done; + } + + kfree_skb(skb); + } else + skb_queue_head(&sk->sk_receive_queue, skb); + +done: + return err ? : copied; +} + +static inline unsigned int iucv_accept_poll(struct sock *parent) +{ + struct iucv_sock *isk, *n; + struct sock *sk; + + list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){ + sk = (struct sock *) isk; + + if (sk->sk_state == IUCV_CONNECTED) + return POLLIN | POLLRDNORM; + } + + return 0; +} + +unsigned int iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + unsigned int mask = 0; + + poll_wait(file, sk->sk_sleep, wait); + + if (sk->sk_state == IUCV_LISTEN) + return iucv_accept_poll(sk); + + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; + + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= POLLIN | POLLRDNORM; + + if (sk->sk_state == IUCV_CLOSED) + mask |= POLLHUP; + + if (sock_writeable(sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + else + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + return mask; +} + +static int iucv_sock_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + struct iucv_message txmsg; + int err = 0; + u8 prmmsg[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; + + how++; + + if ((how & ~SHUTDOWN_MASK) || !how) + return -EINVAL; + + lock_sock(sk); + switch(sk->sk_state) { + case IUCV_CLOSED: + err = -ENOTCONN; + goto fail; + + default: + sk->sk_shutdown |= how; + break; + } + + if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { + txmsg.class = 0; + txmsg.tag = 0; + err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, + (void *) prmmsg, 8); + if (err) { + switch(err) { + case 1: + err = -ENOTCONN; + break; + case 2: + err = -ECONNRESET; + break; + default: + err = -ENOTCONN; + break; + } + } + } + + if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { + err = iucv_path_quiesce(iucv_sk(sk)->path, NULL); + if (err) + err = -ENOTCONN; + + skb_queue_purge(&sk->sk_receive_queue); + } + + /* Wake up anyone sleeping in poll */ + sk->sk_state_change(sk); + +fail: + release_sock(sk); + return err; +} + +static int iucv_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + int err = 0; + + if (!sk) + return 0; + + iucv_sock_close(sk); + + /* Unregister with IUCV base support */ + if (iucv_sk(sk)->path) { + iucv_path_sever(iucv_sk(sk)->path, NULL); + iucv_path_free(iucv_sk(sk)->path); + iucv_sk(sk)->path = NULL; + } + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){ + lock_sock(sk); + err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, + sk->sk_lingertime); + release_sock(sk); + } + + sock_orphan(sk); + iucv_sock_kill(sk); + return err; +} + +/* Callback wrappers - called from iucv base support */ +static int iucv_callback_connreq(struct iucv_path *path, + u8 ipvmid[8], u8 ipuser[16]) +{ + unsigned char user_data[16]; + unsigned char nuser_data[16]; + unsigned char src_name[8]; + struct hlist_node *node; + struct sock *sk, *nsk; + struct iucv_sock *iucv, *niucv; + int err; + + memcpy(src_name, ipuser, 8); + EBCASC(src_name, 8); + /* Find out if this path belongs to af_iucv. */ + read_lock(&iucv_sk_list.lock); + iucv = NULL; + sk_for_each(sk, node, &iucv_sk_list.head) + if (sk->sk_state == IUCV_LISTEN && + !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { + /* + * Found a listening socket with + * src_name == ipuser[0-7]. + */ + iucv = iucv_sk(sk); + break; + } + read_unlock(&iucv_sk_list.lock); + if (!iucv) + /* No socket found, not one of our paths. */ + return -EINVAL; + + bh_lock_sock(sk); + + /* Check if parent socket is listening */ + low_nmcpy(user_data, iucv->src_name); + high_nmcpy(user_data, iucv->dst_name); + ASCEBC(user_data, sizeof(user_data)); + if (sk->sk_state != IUCV_LISTEN) { + err = iucv_path_sever(path, user_data); + goto fail; + } + + /* Check for backlog size */ + if (sk_acceptq_is_full(sk)) { + err = iucv_path_sever(path, user_data); + goto fail; + } + + /* Create the new socket */ + nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); + if (!nsk){ + err = iucv_path_sever(path, user_data); + goto fail; + } + + niucv = iucv_sk(nsk); + iucv_sock_init(nsk, sk); + + /* Set the new iucv_sock */ + memcpy(niucv->dst_name, ipuser + 8, 8); + EBCASC(niucv->dst_name, 8); + memcpy(niucv->dst_user_id, ipvmid, 8); + memcpy(niucv->src_name, iucv->src_name, 8); + memcpy(niucv->src_user_id, iucv->src_user_id, 8); + niucv->path = path; + + /* Call iucv_accept */ + high_nmcpy(nuser_data, ipuser + 8); + memcpy(nuser_data + 8, niucv->src_name, 8); + ASCEBC(nuser_data + 8, 8); + + path->msglim = IUCV_QUEUELEN_DEFAULT; + err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); + if (err){ + err = iucv_path_sever(path, user_data); + goto fail; + } + + iucv_accept_enqueue(sk, nsk); + + /* Wake up accept */ + nsk->sk_state = IUCV_CONNECTED; + sk->sk_data_ready(sk, 1); + err = 0; +fail: + bh_unlock_sock(sk); + return 0; +} + +static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16]) +{ + struct sock *sk = path->private; + + sk->sk_state = IUCV_CONNECTED; + sk->sk_state_change(sk); +} + +static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) +{ + struct sock *sk = path->private; + struct sk_buff *skb; + int rc; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return; + + skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); + if (!skb) { + iucv_message_reject(path, msg); + return; + } + + if (msg->flags & IPRMDATA) { + skb->data = NULL; + skb->len = 0; + } else { + rc = iucv_message_receive(path, msg, 0, skb->data, + msg->length, NULL); + if (rc) { + kfree_skb(skb); + return; + } + + skb->h.raw = skb->data; + skb->nh.raw = skb->data; + skb->len = msg->length; + } + + if (sock_queue_rcv_skb(sk, skb)) + kfree_skb(skb); +} + +static void iucv_callback_txdone(struct iucv_path *path, + struct iucv_message *msg) +{ + struct sock *sk = path->private; + struct sk_buff *this; + struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q; + struct sk_buff *list_skb = list->next; + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + + do { + this = list_skb; + list_skb = list_skb->next; + } while (memcmp(&msg->tag, this->cb, 4)); + + spin_unlock_irqrestore(&list->lock, flags); + + skb_unlink(this, &iucv_sk(sk)->send_skb_q); + kfree_skb(this); +} + +static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16]) +{ + struct sock *sk = path->private; + + if (!list_empty(&iucv_sk(sk)->accept_q)) + sk->sk_state = IUCV_SEVERED; + else + sk->sk_state = IUCV_DISCONN; + + sk->sk_state_change(sk); +} + +static struct proto_ops iucv_sock_ops = { + .family = PF_IUCV, + .owner = THIS_MODULE, + .release = iucv_sock_release, + .bind = iucv_sock_bind, + .connect = iucv_sock_connect, + .listen = iucv_sock_listen, + .accept = iucv_sock_accept, + .getname = iucv_sock_getname, + .sendmsg = iucv_sock_sendmsg, + .recvmsg = iucv_sock_recvmsg, + .poll = iucv_sock_poll, + .ioctl = sock_no_ioctl, + .mmap = sock_no_mmap, + .socketpair = sock_no_socketpair, + .shutdown = iucv_sock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt +}; + +static struct net_proto_family iucv_sock_family_ops = { + .family = AF_IUCV, + .owner = THIS_MODULE, + .create = iucv_sock_create, +}; + +static int afiucv_init(void) +{ + int err; + + if (!MACHINE_IS_VM) { + printk(KERN_ERR "AF_IUCV connection needs VM as base\n"); + err = -EPROTONOSUPPORT; + goto out; + } + cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); + if (unlikely(err)) { + printk(KERN_ERR "AF_IUCV needs the VM userid\n"); + err = -EPROTONOSUPPORT; + goto out; + } + + err = iucv_register(&af_iucv_handler, 0); + if (err) + goto out; + err = proto_register(&iucv_proto, 0); + if (err) + goto out_iucv; + err = sock_register(&iucv_sock_family_ops); + if (err) + goto out_proto; + printk(KERN_INFO "AF_IUCV lowlevel driver initialized\n"); + return 0; + +out_proto: + proto_unregister(&iucv_proto); +out_iucv: + iucv_unregister(&af_iucv_handler, 0); +out: + return err; +} + +static void __exit afiucv_exit(void) +{ + sock_unregister(PF_IUCV); + proto_unregister(&iucv_proto); + iucv_unregister(&af_iucv_handler, 0); + + printk(KERN_INFO "AF_IUCV lowlevel driver unloaded\n"); +} + +module_init(afiucv_init); +module_exit(afiucv_exit); + +MODULE_AUTHOR("Jennifer Hunt <jenhunt@us.ibm.com>"); +MODULE_DESCRIPTION("IUCV Sockets ver " VERSION); +MODULE_VERSION(VERSION); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_IUCV); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c new file mode 100644 index 00000000000..1b10d576f22 --- /dev/null +++ b/net/iucv/iucv.c @@ -0,0 +1,1619 @@ +/* + * IUCV base infrastructure. + * + * Copyright 2001, 2006 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): + * Original source: + * Alan Altmark (Alan_Altmark@us.ibm.com) Sept. 2000 + * Xenia Tkatschow (xenia@us.ibm.com) + * 2Gb awareness and general cleanup: + * Fritz Elfert (elfert@de.ibm.com, felfert@millenux.com) + * Rewritten for af_iucv: + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * + * Documentation used: + * The original source + * CP Programming Service, IBM document # SC24-5760 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> + +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/cpu.h> +#include <net/iucv/iucv.h> +#include <asm/atomic.h> +#include <asm/ebcdic.h> +#include <asm/io.h> +#include <asm/s390_ext.h> +#include <asm/s390_rdev.h> +#include <asm/smp.h> + +/* + * FLAGS: + * All flags are defined in the field IPFLAGS1 of each function + * and can be found in CP Programming Services. + * IPSRCCLS - Indicates you have specified a source class. + * IPTRGCLS - Indicates you have specified a target class. + * IPFGPID - Indicates you have specified a pathid. + * IPFGMID - Indicates you have specified a message ID. + * IPNORPY - Indicates a one-way message. No reply expected. + * IPALL - Indicates that all paths are affected. + */ +#define IUCV_IPSRCCLS 0x01 +#define IUCV_IPTRGCLS 0x01 +#define IUCV_IPFGPID 0x02 +#define IUCV_IPFGMID 0x04 +#define IUCV_IPNORPY 0x10 +#define IUCV_IPALL 0x80 + +static int iucv_bus_match (struct device *dev, struct device_driver *drv) +{ + return 0; +} + +struct bus_type iucv_bus = { + .name = "iucv", + .match = iucv_bus_match, +}; + +struct device *iucv_root; +static int iucv_available; + +/* General IUCV interrupt structure */ +struct iucv_irq_data { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u32 res2[8]; +}; + +struct iucv_work { + struct list_head list; + struct iucv_irq_data data; +}; + +static LIST_HEAD(iucv_work_queue); +static DEFINE_SPINLOCK(iucv_work_lock); + +static struct iucv_irq_data *iucv_irq_data; +static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE; +static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE; + +static void iucv_tasklet_handler(unsigned long); +static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0); + +enum iucv_command_codes { + IUCV_QUERY = 0, + IUCV_RETRIEVE_BUFFER = 2, + IUCV_SEND = 4, + IUCV_RECEIVE = 5, + IUCV_REPLY = 6, + IUCV_REJECT = 8, + IUCV_PURGE = 9, + IUCV_ACCEPT = 10, + IUCV_CONNECT = 11, + IUCV_DECLARE_BUFFER = 12, + IUCV_QUIESCE = 13, + IUCV_RESUME = 14, + IUCV_SEVER = 15, + IUCV_SETMASK = 16, +}; + +/* + * Error messages that are used with the iucv_sever function. They get + * converted to EBCDIC. + */ +static char iucv_error_no_listener[16] = "NO LISTENER"; +static char iucv_error_no_memory[16] = "NO MEMORY"; +static char iucv_error_pathid[16] = "INVALID PATHID"; + +/* + * iucv_handler_list: List of registered handlers. + */ +static LIST_HEAD(iucv_handler_list); + +/* + * iucv_path_table: an array of iucv_path structures. + */ +static struct iucv_path **iucv_path_table; +static unsigned long iucv_max_pathid; + +/* + * iucv_lock: spinlock protecting iucv_handler_list and iucv_pathid_table + */ +static DEFINE_SPINLOCK(iucv_table_lock); + +/* + * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet. + * Needed for iucv_path_sever called from tasklet. + */ +static int iucv_tasklet_cpu = -1; + +/* + * Mutex and wait queue for iucv_register/iucv_unregister. + */ +static DEFINE_MUTEX(iucv_register_mutex); + +/* + * Counter for number of non-smp capable handlers. + */ +static int iucv_nonsmp_handler; + +/* + * IUCV control data structure. Used by iucv_path_accept, iucv_path_connect, + * iucv_path_quiesce and iucv_path_sever. + */ +struct iucv_cmd_control { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u16 ipmsglim; + u16 res1; + u8 ipvmid[8]; + u8 ipuser[16]; + u8 iptarget[8]; +} __attribute__ ((packed,aligned(8))); + +/* + * Data in parameter list iucv structure. Used by iucv_message_send, + * iucv_message_send2way and iucv_message_reply. + */ +struct iucv_cmd_dpl { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u32 ipmsgid; + u32 iptrgcls; + u8 iprmmsg[8]; + u32 ipsrccls; + u32 ipmsgtag; + u32 ipbfadr2; + u32 ipbfln2f; + u32 res; +} __attribute__ ((packed,aligned(8))); + +/* + * Data in buffer iucv structure. Used by iucv_message_receive, + * iucv_message_reject, iucv_message_send, iucv_message_send2way + * and iucv_declare_cpu. + */ +struct iucv_cmd_db { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u32 ipmsgid; + u32 iptrgcls; + u32 ipbfadr1; + u32 ipbfln1f; + u32 ipsrccls; + u32 ipmsgtag; + u32 ipbfadr2; + u32 ipbfln2f; + u32 res; +} __attribute__ ((packed,aligned(8))); + +/* + * Purge message iucv structure. Used by iucv_message_purge. + */ +struct iucv_cmd_purge { + u16 ippathid; + u8 ipflags1; + u8 iprcode; + u32 ipmsgid; + u8 ipaudit[3]; + u8 res1[5]; + u32 res2; + u32 ipsrccls; + u32 ipmsgtag; + u32 res3[3]; +} __attribute__ ((packed,aligned(8))); + +/* + * Set mask iucv structure. Used by iucv_enable_cpu. + */ +struct iucv_cmd_set_mask { + u8 ipmask; + u8 res1[2]; + u8 iprcode; + u32 res2[9]; +} __attribute__ ((packed,aligned(8))); + +union iucv_param { + struct iucv_cmd_control ctrl; + struct iucv_cmd_dpl dpl; + struct iucv_cmd_db db; + struct iucv_cmd_purge purge; + struct iucv_cmd_set_mask set_mask; +}; + +/* + * Anchor for per-cpu IUCV command parameter block. + */ +static union iucv_param *iucv_param; + +/** + * iucv_call_b2f0 + * @code: identifier of IUCV call to CP. + * @parm: pointer to a struct iucv_parm block + * + * Calls CP to execute IUCV commands. + * + * Returns the result of the CP IUCV call. + */ +static inline int iucv_call_b2f0(int command, union iucv_param *parm) +{ + register unsigned long reg0 asm ("0"); + register unsigned long reg1 asm ("1"); + int ccode; + + reg0 = command; + reg1 = virt_to_phys(parm); + asm volatile( + " .long 0xb2f01000\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1) + : "m" (*parm) : "cc"); + return (ccode == 1) ? parm->ctrl.iprcode : ccode; +} + +/** + * iucv_query_maxconn + * + * Determines the maximum number of connections that may be established. + * + * Returns the maximum number of connections or -EPERM is IUCV is not + * available. + */ +static int iucv_query_maxconn(void) +{ + register unsigned long reg0 asm ("0"); + register unsigned long reg1 asm ("1"); + void *param; + int ccode; + + param = kzalloc(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA); + if (!param) + return -ENOMEM; + reg0 = IUCV_QUERY; + reg1 = (unsigned long) param; + asm volatile ( + " .long 0xb2f01000\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc"); + if (ccode == 0) + iucv_max_pathid = reg0; + kfree(param); + return ccode ? -EPERM : 0; +} + +/** + * iucv_allow_cpu + * @data: unused + * + * Allow iucv interrupts on this cpu. + */ +static void iucv_allow_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + + /* + * Enable all iucv interrupts. + * ipmask contains bits for the different interrupts + * 0x80 - Flag to allow nonpriority message pending interrupts + * 0x40 - Flag to allow priority message pending interrupts + * 0x20 - Flag to allow nonpriority message completion interrupts + * 0x10 - Flag to allow priority message completion interrupts + * 0x08 - Flag to allow IUCV control interrupts + */ + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->set_mask.ipmask = 0xf8; + iucv_call_b2f0(IUCV_SETMASK, parm); + + /* Set indication that iucv interrupts are allowed for this cpu. */ + cpu_set(cpu, iucv_irq_cpumask); +} + +/** + * iucv_block_cpu + * @data: unused + * + * Block iucv interrupts on this cpu. + */ +static void iucv_block_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + + /* Disable all iucv interrupts. */ + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + iucv_call_b2f0(IUCV_SETMASK, parm); + + /* Clear indication that iucv interrupts are allowed for this cpu. */ + cpu_clear(cpu, iucv_irq_cpumask); +} + +/** + * iucv_declare_cpu + * @data: unused + * + * Declare a interupt buffer on this cpu. + */ +static void iucv_declare_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + int rc; + + if (cpu_isset(cpu, iucv_buffer_cpumask)) + return; + + /* Declare interrupt buffer. */ + parm = percpu_ptr(iucv_param, cpu); + memset(parm, 0, sizeof(union iucv_param)); + parm->db.ipbfadr1 = virt_to_phys(percpu_ptr(iucv_irq_data, cpu)); + rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm); + if (rc) { + char *err = "Unknown"; + switch(rc) { + case 0x03: + err = "Directory error"; + break; + case 0x0a: + err = "Invalid length"; + break; + case 0x13: + err = "Buffer already exists"; + break; + case 0x3e: + err = "Buffer overlap"; + break; + case 0x5c: + err = "Paging or storage error"; + break; + } + printk(KERN_WARNING "iucv_register: iucv_declare_buffer " + "on cpu %i returned error 0x%02x (%s)\n", cpu, rc, err); + return; + } + + /* Set indication that an iucv buffer exists for this cpu. */ + cpu_set(cpu, iucv_buffer_cpumask); + + if (iucv_nonsmp_handler == 0 || cpus_empty(iucv_irq_cpumask)) + /* Enable iucv interrupts on this cpu. */ + iucv_allow_cpu(NULL); + else + /* Disable iucv interrupts on this cpu. */ + iucv_block_cpu(NULL); +} + +/** + * iucv_retrieve_cpu + * @data: unused + * + * Retrieve interrupt buffer on this cpu. + */ +static void iucv_retrieve_cpu(void *data) +{ + int cpu = smp_processor_id(); + union iucv_param *parm; + + if (!cpu_isset(cpu, iucv_buffer_cpumask)) + return; + + /* Block iucv interrupts. */ + iucv_block_cpu(NULL); + + /* Retrieve interrupt buffer. */ + parm = percpu_ptr(iucv_param, cpu); + iucv_call_b2f0(IUCV_RETRIEVE_BUFFER, parm); + + /* Clear indication that an iucv buffer exists for this cpu. */ + cpu_clear(cpu, iucv_buffer_cpumask); +} + +/** + * iucv_setmask_smp + * + * Allow iucv interrupts on all cpus. + */ +static void iucv_setmask_mp(void) +{ + int cpu; + + for_each_online_cpu(cpu) + /* Enable all cpus with a declared buffer. */ + if (cpu_isset(cpu, iucv_buffer_cpumask) && + !cpu_isset(cpu, iucv_irq_cpumask)) + smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu); +} + +/** + * iucv_setmask_up + * + * Allow iucv interrupts on a single cpus. + */ +static void iucv_setmask_up(void) +{ + cpumask_t cpumask; + int cpu; + + /* Disable all cpu but the first in cpu_irq_cpumask. */ + cpumask = iucv_irq_cpumask; + cpu_clear(first_cpu(iucv_irq_cpumask), cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_call_function_on(iucv_block_cpu, NULL, 0, 1, cpu); +} + +/** + * iucv_enable + * + * This function makes iucv ready for use. It allocates the pathid + * table, declares an iucv interrupt buffer and enables the iucv + * interrupts. Called when the first user has registered an iucv + * handler. + */ +static int iucv_enable(void) +{ + size_t alloc_size; + int cpu, rc; + + rc = -ENOMEM; + alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); + if (!iucv_path_table) + goto out; + /* Declare per cpu buffers. */ + rc = -EIO; + for_each_online_cpu(cpu) + smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + if (cpus_empty(iucv_buffer_cpumask)) + /* No cpu could declare an iucv buffer. */ + goto out_path; + return 0; + +out_path: + kfree(iucv_path_table); +out: + return rc; +} + +/** + * iucv_disable + * + * This function shuts down iucv. It disables iucv interrupts, retrieves + * the iucv interrupt buffer and frees the pathid table. Called after the + * last user unregister its iucv handler. + */ +static void iucv_disable(void) +{ + on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + kfree(iucv_path_table); +} + +#ifdef CONFIG_HOTPLUG_CPU +static int __cpuinit iucv_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + cpumask_t cpumask; + long cpu = (long) hcpu; + + switch (action) { + case CPU_UP_PREPARE: + if (!percpu_populate(iucv_irq_data, + sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA, cpu)) + return NOTIFY_BAD; + if (!percpu_populate(iucv_param, sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA, cpu)) { + percpu_depopulate(iucv_irq_data, cpu); + return NOTIFY_BAD; + } + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + percpu_depopulate(iucv_param, cpu); + percpu_depopulate(iucv_irq_data, cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); + break; + case CPU_DOWN_PREPARE: + cpumask = iucv_buffer_cpumask; + cpu_clear(cpu, cpumask); + if (cpus_empty(cpumask)) + /* Can't offline last IUCV enabled cpu. */ + return NOTIFY_BAD; + smp_call_function_on(iucv_retrieve_cpu, NULL, 0, 1, cpu); + if (cpus_empty(iucv_irq_cpumask)) + smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, + first_cpu(iucv_buffer_cpumask)); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block iucv_cpu_notifier = { + .notifier_call = iucv_cpu_notify, +}; +#endif + +/** + * iucv_sever_pathid + * @pathid: path identification number. + * @userdata: 16-bytes of user data. + * + * Sever an iucv path to free up the pathid. Used internally. + */ +static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) +{ + union iucv_param *parm; + + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ippathid = pathid; + return iucv_call_b2f0(IUCV_SEVER, parm); +} + +/** + * __iucv_cleanup_pathid + * @dummy: unused dummy argument + * + * Nop function called via smp_call_function to force work items from + * pending external iucv interrupts to the work queue. + */ +static void __iucv_cleanup_pathid(void *dummy) +{ +} + +/** + * iucv_cleanup_pathid + * @pathid: 16 bit pathid + * + * Function called after a path has been severed to find all remaining + * work items for the now stale pathid. The caller needs to hold the + * iucv_table_lock. + */ +static void iucv_cleanup_pathid(u16 pathid) +{ + struct iucv_work *p, *n; + + /* + * Path is severed, the pathid can be reused immediatly on + * a iucv connect or a connection pending interrupt. + * iucv_path_connect and connection pending interrupt will + * wait until the iucv_table_lock is released before the + * recycled pathid enters the system. + * Force remaining interrupts to the work queue, then + * scan the work queue for items of this path. + */ + smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1); + spin_lock_irq(&iucv_work_lock); + list_for_each_entry_safe(p, n, &iucv_work_queue, list) { + /* Remove work items for pathid except connection pending */ + if (p->data.ippathid == pathid && p->data.iptype != 0x01) { + list_del(&p->list); + kfree(p); + } + } + spin_unlock_irq(&iucv_work_lock); +} + +/** + * iucv_register: + * @handler: address of iucv handler structure + * @smp: != 0 indicates that the handler can deal with out of order messages + * + * Registers a driver with IUCV. + * + * Returns 0 on success, -ENOMEM if the memory allocation for the pathid + * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus. + */ +int iucv_register(struct iucv_handler *handler, int smp) +{ + int rc; + + if (!iucv_available) + return -ENOSYS; + mutex_lock(&iucv_register_mutex); + if (!smp) + iucv_nonsmp_handler++; + if (list_empty(&iucv_handler_list)) { + rc = iucv_enable(); + if (rc) + goto out_mutex; + } else if (!smp && iucv_nonsmp_handler == 1) + iucv_setmask_up(); + INIT_LIST_HEAD(&handler->paths); + + spin_lock_irq(&iucv_table_lock); + list_add_tail(&handler->list, &iucv_handler_list); + spin_unlock_irq(&iucv_table_lock); + rc = 0; +out_mutex: + mutex_unlock(&iucv_register_mutex); + return rc; +} + +/** + * iucv_unregister + * @handler: address of iucv handler structure + * @smp: != 0 indicates that the handler can deal with out of order messages + * + * Unregister driver from IUCV. + */ +void iucv_unregister(struct iucv_handler *handler, int smp) +{ + struct iucv_path *p, *n; + + mutex_lock(&iucv_register_mutex); + spin_lock_bh(&iucv_table_lock); + /* Remove handler from the iucv_handler_list. */ + list_del_init(&handler->list); + /* Sever all pathids still refering to the handler. */ + list_for_each_entry_safe(p, n, &handler->paths, list) { + iucv_sever_pathid(p->pathid, NULL); + iucv_path_table[p->pathid] = NULL; + list_del(&p->list); + iucv_cleanup_pathid(p->pathid); + iucv_path_free(p); + } + spin_unlock_bh(&iucv_table_lock); + if (!smp) + iucv_nonsmp_handler--; + if (list_empty(&iucv_handler_list)) + iucv_disable(); + else if (!smp && iucv_nonsmp_handler == 0) + iucv_setmask_mp(); + mutex_unlock(&iucv_register_mutex); +} + +/** + * iucv_path_accept + * @path: address of iucv path structure + * @handler: address of iucv handler structure + * @userdata: 16 bytes of data reflected to the communication partner + * @private: private data passed to interrupt handlers for this path + * + * This function is issued after the user received a connection pending + * external interrupt and now wishes to complete the IUCV communication path. + * + * Returns the result of the CP IUCV call. + */ +int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, + u8 userdata[16], void *private) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + /* Prepare parameter block. */ + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->ctrl.ippathid = path->pathid; + parm->ctrl.ipmsglim = path->msglim; + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ipflags1 = path->flags; + + rc = iucv_call_b2f0(IUCV_ACCEPT, parm); + if (!rc) { + path->private = private; + path->msglim = parm->ctrl.ipmsglim; + path->flags = parm->ctrl.ipflags1; + } + local_bh_enable(); + return rc; +} + +/** + * iucv_path_connect + * @path: address of iucv path structure + * @handler: address of iucv handler structure + * @userid: 8-byte user identification + * @system: 8-byte target system identification + * @userdata: 16 bytes of data reflected to the communication partner + * @private: private data passed to interrupt handlers for this path + * + * This function establishes an IUCV path. Although the connect may complete + * successfully, you are not able to use the path until you receive an IUCV + * Connection Complete external interrupt. + * + * Returns the result of the CP IUCV call. + */ +int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, + u8 userid[8], u8 system[8], u8 userdata[16], + void *private) +{ + union iucv_param *parm; + int rc; + + preempt_disable(); + if (iucv_tasklet_cpu != smp_processor_id()) + spin_lock_bh(&iucv_table_lock); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->ctrl.ipmsglim = path->msglim; + parm->ctrl.ipflags1 = path->flags; + if (userid) { + memcpy(parm->ctrl.ipvmid, userid, sizeof(parm->ctrl.ipvmid)); + ASCEBC(parm->ctrl.ipvmid, sizeof(parm->ctrl.ipvmid)); + EBC_TOUPPER(parm->ctrl.ipvmid, sizeof(parm->ctrl.ipvmid)); + } + if (system) { + memcpy(parm->ctrl.iptarget, system, + sizeof(parm->ctrl.iptarget)); + ASCEBC(parm->ctrl.iptarget, sizeof(parm->ctrl.iptarget)); + EBC_TOUPPER(parm->ctrl.iptarget, sizeof(parm->ctrl.iptarget)); + } + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + + rc = iucv_call_b2f0(IUCV_CONNECT, parm); + if (!rc) { + if (parm->ctrl.ippathid < iucv_max_pathid) { + path->pathid = parm->ctrl.ippathid; + path->msglim = parm->ctrl.ipmsglim; + path->flags = parm->ctrl.ipflags1; + path->handler = handler; + path->private = private; + list_add_tail(&path->list, &handler->paths); + iucv_path_table[path->pathid] = path; + } else { + iucv_sever_pathid(parm->ctrl.ippathid, + iucv_error_pathid); + rc = -EIO; + } + } + if (iucv_tasklet_cpu != smp_processor_id()) + spin_unlock_bh(&iucv_table_lock); + preempt_enable(); + return rc; +} + +/** + * iucv_path_quiesce: + * @path: address of iucv path structure + * @userdata: 16 bytes of data reflected to the communication partner + * + * This function temporarily suspends incoming messages on an IUCV path. + * You can later reactivate the path by invoking the iucv_resume function. + * + * Returns the result from the CP IUCV call. + */ +int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ippathid = path->pathid; + rc = iucv_call_b2f0(IUCV_QUIESCE, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_path_resume: + * @path: address of iucv path structure + * @userdata: 16 bytes of data reflected to the communication partner + * + * This function resumes incoming messages on an IUCV path that has + * been stopped with iucv_path_quiesce. + * + * Returns the result from the CP IUCV call. + */ +int iucv_path_resume(struct iucv_path *path, u8 userdata[16]) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (userdata) + memcpy(parm->ctrl.ipuser, userdata, sizeof(parm->ctrl.ipuser)); + parm->ctrl.ippathid = path->pathid; + rc = iucv_call_b2f0(IUCV_RESUME, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_path_sever + * @path: address of iucv path structure + * @userdata: 16 bytes of data reflected to the communication partner + * + * This function terminates an IUCV path. + * + * Returns the result from the CP IUCV call. + */ +int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) +{ + int rc; + + + preempt_disable(); + if (iucv_tasklet_cpu != smp_processor_id()) + spin_lock_bh(&iucv_table_lock); + rc = iucv_sever_pathid(path->pathid, userdata); + if (!rc) { + iucv_path_table[path->pathid] = NULL; + list_del_init(&path->list); + iucv_cleanup_pathid(path->pathid); + } + if (iucv_tasklet_cpu != smp_processor_id()) + spin_unlock_bh(&iucv_table_lock); + preempt_enable(); + return rc; +} + +/** + * iucv_message_purge + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @srccls: source class of message + * + * Cancels a message you have sent. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, + u32 srccls) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->purge.ippathid = path->pathid; + parm->purge.ipmsgid = msg->id; + parm->purge.ipsrccls = srccls; + parm->purge.ipflags1 = IUCV_IPSRCCLS | IUCV_IPFGMID | IUCV_IPFGPID; + rc = iucv_call_b2f0(IUCV_PURGE, parm); + if (!rc) { + msg->audit = (*(u32 *) &parm->purge.ipaudit) >> 8; + msg->tag = parm->purge.ipmsgtag; + } + local_bh_enable(); + return rc; +} + +/** + * iucv_message_receive + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is received (IUCV_IPBUFLST) + * @buffer: address of data buffer or address of struct iucv_array + * @size: length of data buffer + * @residual: + * + * This function receives messages that are being sent to you over + * established paths. This function will deal with RMDATA messages + * embedded in struct iucv_message as well. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, + u8 flags, void *buffer, size_t size, size_t *residual) +{ + union iucv_param *parm; + struct iucv_array *array; + u8 *rmmsg; + size_t copy; + int rc; + + if (msg->flags & IUCV_IPRMDATA) { + /* + * Message is 8 bytes long and has been stored to the + * message descriptor itself. + */ + rc = (size < 8) ? 5 : 0; + if (residual) + *residual = abs(size - 8); + rmmsg = msg->rmmsg; + if (flags & IUCV_IPBUFLST) { + /* Copy to struct iucv_array. */ + size = (size < 8) ? size : 8; + for (array = buffer; size > 0; array++) { + copy = min_t(size_t, size, array->length); + memcpy((u8 *)(addr_t) array->address, + rmmsg, copy); + rmmsg += copy; + size -= copy; + } + } else { + /* Copy to direct buffer. */ + memcpy(buffer, rmmsg, min_t(size_t, size, 8)); + } + return 0; + } + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfln1f = (u32) size; + parm->db.ipmsgid = msg->id; + parm->db.ippathid = path->pathid; + parm->db.iptrgcls = msg->class; + parm->db.ipflags1 = (flags | IUCV_IPFGPID | + IUCV_IPFGMID | IUCV_IPTRGCLS); + rc = iucv_call_b2f0(IUCV_RECEIVE, parm); + if (!rc || rc == 5) { + msg->flags = parm->db.ipflags1; + if (residual) + *residual = parm->db.ipbfln1f; + } + local_bh_enable(); + return rc; +} + +/** + * iucv_message_reject + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * + * The reject function refuses a specified message. Between the time you + * are notified of a message and the time that you complete the message, + * the message may be rejected. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + parm->db.ippathid = path->pathid; + parm->db.ipmsgid = msg->id; + parm->db.iptrgcls = msg->class; + parm->db.ipflags1 = (IUCV_IPTRGCLS | IUCV_IPFGMID | IUCV_IPFGPID); + rc = iucv_call_b2f0(IUCV_REJECT, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_message_reply + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the reply is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) + * @reply: address of reply data buffer or address of struct iucv_array + * @size: length of reply data buffer + * + * This function responds to the two-way messages that you receive. You + * must identify completely the message to which you wish to reply. ie, + * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into + * the parameter list. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, + u8 flags, void *reply, size_t size) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (flags & IUCV_IPRMDATA) { + parm->dpl.ippathid = path->pathid; + parm->dpl.ipflags1 = flags; + parm->dpl.ipmsgid = msg->id; + parm->dpl.iptrgcls = msg->class; + memcpy(parm->dpl.iprmmsg, reply, min_t(size_t, size, 8)); + } else { + parm->db.ipbfadr1 = (u32)(addr_t) reply; + parm->db.ipbfln1f = (u32) size; + parm->db.ippathid = path->pathid; + parm->db.ipflags1 = flags; + parm->db.ipmsgid = msg->id; + parm->db.iptrgcls = msg->class; + } + rc = iucv_call_b2f0(IUCV_REPLY, parm); + local_bh_enable(); + return rc; +} + +/** + * iucv_message_send + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST) + * @srccls: source class of message + * @buffer: address of send buffer or address of struct iucv_array + * @size: length of send buffer + * + * This function transmits data to another application. Data to be + * transmitted is in a buffer and this is a one-way message and the + * receiver will not reply to the message. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, + u8 flags, u32 srccls, void *buffer, size_t size) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (flags & IUCV_IPRMDATA) { + /* Message of 8 bytes can be placed into the parameter list. */ + parm->dpl.ippathid = path->pathid; + parm->dpl.ipflags1 = flags | IUCV_IPNORPY; + parm->dpl.iptrgcls = msg->class; + parm->dpl.ipsrccls = srccls; + parm->dpl.ipmsgtag = msg->tag; + memcpy(parm->dpl.iprmmsg, buffer, 8); + } else { + parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfln1f = (u32) size; + parm->db.ippathid = path->pathid; + parm->db.ipflags1 = flags | IUCV_IPNORPY; + parm->db.iptrgcls = msg->class; + parm->db.ipsrccls = srccls; + parm->db.ipmsgtag = msg->tag; + } + rc = iucv_call_b2f0(IUCV_SEND, parm); + if (!rc) + msg->id = parm->db.ipmsgid; + local_bh_enable(); + return rc; +} + +/** + * iucv_message_send2way + * @path: address of iucv path structure + * @msg: address of iucv msg structure + * @flags: how the message is sent and the reply is received + * (IUCV_IPRMDATA, IUCV_IPBUFLST, IUCV_IPPRTY, IUCV_ANSLST) + * @srccls: source class of message + * @buffer: address of send buffer or address of struct iucv_array + * @size: length of send buffer + * @ansbuf: address of answer buffer or address of struct iucv_array + * @asize: size of reply buffer + * + * This function transmits data to another application. Data to be + * transmitted is in a buffer. The receiver of the send is expected to + * reply to the message and a buffer is provided into which IUCV moves + * the reply to this message. + * + * Returns the result from the CP IUCV call. + */ +int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, + u8 flags, u32 srccls, void *buffer, size_t size, + void *answer, size_t asize, size_t *residual) +{ + union iucv_param *parm; + int rc; + + local_bh_disable(); + parm = percpu_ptr(iucv_param, smp_processor_id()); + memset(parm, 0, sizeof(union iucv_param)); + if (flags & IUCV_IPRMDATA) { + parm->dpl.ippathid = path->pathid; + parm->dpl.ipflags1 = path->flags; /* priority message */ + parm->dpl.iptrgcls = msg->class; + parm->dpl.ipsrccls = srccls; + parm->dpl.ipmsgtag = msg->tag; + parm->dpl.ipbfadr2 = (u32)(addr_t) answer; + parm->dpl.ipbfln2f = (u32) asize; + memcpy(parm->dpl.iprmmsg, buffer, 8); + } else { + parm->db.ippathid = path->pathid; + parm->db.ipflags1 = path->flags; /* priority message */ + parm->db.iptrgcls = msg->class; + parm->db.ipsrccls = srccls; + parm->db.ipmsgtag = msg->tag; + parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfln1f = (u32) size; + parm->db.ipbfadr2 = (u32)(addr_t) answer; + parm->db.ipbfln2f = (u32) asize; + } + rc = iucv_call_b2f0(IUCV_SEND, parm); + if (!rc) + msg->id = parm->db.ipmsgid; + local_bh_enable(); + return rc; +} + +/** + * iucv_path_pending + * @data: Pointer to external interrupt buffer + * + * Process connection pending work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_pending { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u16 ipmsglim; + u16 res1; + u8 ipvmid[8]; + u8 ipuser[16]; + u32 res3; + u8 ippollfg; + u8 res4[3]; +} __attribute__ ((packed)); + +static void iucv_path_pending(struct iucv_irq_data *data) +{ + struct iucv_path_pending *ipp = (void *) data; + struct iucv_handler *handler; + struct iucv_path *path; + char *error; + + BUG_ON(iucv_path_table[ipp->ippathid]); + /* New pathid, handler found. Create a new path struct. */ + error = iucv_error_no_memory; + path = iucv_path_alloc(ipp->ipmsglim, ipp->ipflags1, GFP_ATOMIC); + if (!path) + goto out_sever; + path->pathid = ipp->ippathid; + iucv_path_table[path->pathid] = path; + EBCASC(ipp->ipvmid, 8); + + /* Call registered handler until one is found that wants the path. */ + list_for_each_entry(handler, &iucv_handler_list, list) { + if (!handler->path_pending) + continue; + /* + * Add path to handler to allow a call to iucv_path_sever + * inside the path_pending function. If the handler returns + * an error remove the path from the handler again. + */ + list_add(&path->list, &handler->paths); + path->handler = handler; + if (!handler->path_pending(path, ipp->ipvmid, ipp->ipuser)) + return; + list_del(&path->list); + path->handler = NULL; + } + /* No handler wanted the path. */ + iucv_path_table[path->pathid] = NULL; + iucv_path_free(path); + error = iucv_error_no_listener; +out_sever: + iucv_sever_pathid(ipp->ippathid, error); +} + +/** + * iucv_path_complete + * @data: Pointer to external interrupt buffer + * + * Process connection complete work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_complete { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u16 ipmsglim; + u16 res1; + u8 res2[8]; + u8 ipuser[16]; + u32 res3; + u8 ippollfg; + u8 res4[3]; +} __attribute__ ((packed)); + +static void iucv_path_complete(struct iucv_irq_data *data) +{ + struct iucv_path_complete *ipc = (void *) data; + struct iucv_path *path = iucv_path_table[ipc->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_complete) + path->handler->path_complete(path, ipc->ipuser); +} + +/** + * iucv_path_severed + * @data: Pointer to external interrupt buffer + * + * Process connection severed work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_severed { + u16 ippathid; + u8 res1; + u8 iptype; + u32 res2; + u8 res3[8]; + u8 ipuser[16]; + u32 res4; + u8 ippollfg; + u8 res5[3]; +} __attribute__ ((packed)); + +static void iucv_path_severed(struct iucv_irq_data *data) +{ + struct iucv_path_severed *ips = (void *) data; + struct iucv_path *path = iucv_path_table[ips->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_severed) + path->handler->path_severed(path, ips->ipuser); + else { + iucv_sever_pathid(path->pathid, NULL); + iucv_path_table[path->pathid] = NULL; + list_del_init(&path->list); + iucv_cleanup_pathid(path->pathid); + iucv_path_free(path); + } +} + +/** + * iucv_path_quiesced + * @data: Pointer to external interrupt buffer + * + * Process connection quiesced work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_quiesced { + u16 ippathid; + u8 res1; + u8 iptype; + u32 res2; + u8 res3[8]; + u8 ipuser[16]; + u32 res4; + u8 ippollfg; + u8 res5[3]; +} __attribute__ ((packed)); + +static void iucv_path_quiesced(struct iucv_irq_data *data) +{ + struct iucv_path_quiesced *ipq = (void *) data; + struct iucv_path *path = iucv_path_table[ipq->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_quiesced) + path->handler->path_quiesced(path, ipq->ipuser); +} + +/** + * iucv_path_resumed + * @data: Pointer to external interrupt buffer + * + * Process connection resumed work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_path_resumed { + u16 ippathid; + u8 res1; + u8 iptype; + u32 res2; + u8 res3[8]; + u8 ipuser[16]; + u32 res4; + u8 ippollfg; + u8 res5[3]; +} __attribute__ ((packed)); + +static void iucv_path_resumed(struct iucv_irq_data *data) +{ + struct iucv_path_resumed *ipr = (void *) data; + struct iucv_path *path = iucv_path_table[ipr->ippathid]; + + BUG_ON(!path || !path->handler); + if (path->handler->path_resumed) + path->handler->path_resumed(path, ipr->ipuser); +} + +/** + * iucv_message_complete + * @data: Pointer to external interrupt buffer + * + * Process message complete work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_message_complete { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u32 ipmsgid; + u32 ipaudit; + u8 iprmmsg[8]; + u32 ipsrccls; + u32 ipmsgtag; + u32 res; + u32 ipbfln2f; + u8 ippollfg; + u8 res2[3]; +} __attribute__ ((packed)); + +static void iucv_message_complete(struct iucv_irq_data *data) +{ + struct iucv_message_complete *imc = (void *) data; + struct iucv_path *path = iucv_path_table[imc->ippathid]; + struct iucv_message msg; + + BUG_ON(!path || !path->handler); + if (path->handler->message_complete) { + msg.flags = imc->ipflags1; + msg.id = imc->ipmsgid; + msg.audit = imc->ipaudit; + memcpy(msg.rmmsg, imc->iprmmsg, 8); + msg.class = imc->ipsrccls; + msg.tag = imc->ipmsgtag; + msg.length = imc->ipbfln2f; + path->handler->message_complete(path, &msg); + } +} + +/** + * iucv_message_pending + * @data: Pointer to external interrupt buffer + * + * Process message pending work item. Called from tasklet while holding + * iucv_table_lock. + */ +struct iucv_message_pending { + u16 ippathid; + u8 ipflags1; + u8 iptype; + u32 ipmsgid; + u32 iptrgcls; + union { + u32 iprmmsg1_u32; + u8 iprmmsg1[4]; + } ln1msg1; + union { + u32 ipbfln1f; + u8 iprmmsg2[4]; + } ln1msg2; + u32 res1[3]; + u32 ipbfln2f; + u8 ippollfg; + u8 res2[3]; +} __attribute__ ((packed)); + +static void iucv_message_pending(struct iucv_irq_data *data) +{ + struct iucv_message_pending *imp = (void *) data; + struct iucv_path *path = iucv_path_table[imp->ippathid]; + struct iucv_message msg; + + BUG_ON(!path || !path->handler); + if (path->handler->message_pending) { + msg.flags = imp->ipflags1; + msg.id = imp->ipmsgid; + msg.class = imp->iptrgcls; + if (imp->ipflags1 & IUCV_IPRMDATA) { + memcpy(msg.rmmsg, imp->ln1msg1.iprmmsg1, 8); + msg.length = 8; + } else + msg.length = imp->ln1msg2.ipbfln1f; + msg.reply_size = imp->ipbfln2f; + path->handler->message_pending(path, &msg); + } +} + +/** + * iucv_tasklet_handler: + * + * This tasklet loops over the queue of irq buffers created by + * iucv_external_interrupt, calls the appropriate action handler + * and then frees the buffer. + */ +static void iucv_tasklet_handler(unsigned long ignored) +{ + typedef void iucv_irq_fn(struct iucv_irq_data *); + static iucv_irq_fn *irq_fn[] = { + [0x01] = iucv_path_pending, + [0x02] = iucv_path_complete, + [0x03] = iucv_path_severed, + [0x04] = iucv_path_quiesced, + [0x05] = iucv_path_resumed, + [0x06] = iucv_message_complete, + [0x07] = iucv_message_complete, + [0x08] = iucv_message_pending, + [0x09] = iucv_message_pending, + }; + struct iucv_work *p; + + /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ + spin_lock(&iucv_table_lock); + iucv_tasklet_cpu = smp_processor_id(); + + spin_lock_irq(&iucv_work_lock); + while (!list_empty(&iucv_work_queue)) { + p = list_entry(iucv_work_queue.next, struct iucv_work, list); + list_del_init(&p->list); + spin_unlock_irq(&iucv_work_lock); + irq_fn[p->data.iptype](&p->data); + kfree(p); + spin_lock_irq(&iucv_work_lock); + } + spin_unlock_irq(&iucv_work_lock); + + iucv_tasklet_cpu = -1; + spin_unlock(&iucv_table_lock); +} + +/** + * iucv_external_interrupt + * @code: irq code + * + * Handles external interrupts coming in from CP. + * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler(). + */ +static void iucv_external_interrupt(u16 code) +{ + struct iucv_irq_data *p; + struct iucv_work *work; + + p = percpu_ptr(iucv_irq_data, smp_processor_id()); + if (p->ippathid >= iucv_max_pathid) { + printk(KERN_WARNING "iucv_do_int: Got interrupt with " + "pathid %d > max_connections (%ld)\n", + p->ippathid, iucv_max_pathid - 1); + iucv_sever_pathid(p->ippathid, iucv_error_no_listener); + return; + } + if (p->iptype < 0x01 || p->iptype > 0x09) { + printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n"); + return; + } + work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC); + if (!work) { + printk(KERN_WARNING "iucv_external_interrupt: out of memory\n"); + return; + } + memcpy(&work->data, p, sizeof(work->data)); + spin_lock(&iucv_work_lock); + list_add_tail(&work->list, &iucv_work_queue); + spin_unlock(&iucv_work_lock); + tasklet_schedule(&iucv_tasklet); +} + +/** + * iucv_init + * + * Allocates and initializes various data structures. + */ +static int iucv_init(void) +{ + int rc; + + if (!MACHINE_IS_VM) { + rc = -EPROTONOSUPPORT; + goto out; + } + rc = iucv_query_maxconn(); + if (rc) + goto out; + rc = register_external_interrupt (0x4000, iucv_external_interrupt); + if (rc) + goto out; + rc = bus_register(&iucv_bus); + if (rc) + goto out_int; + iucv_root = s390_root_dev_register("iucv"); + if (IS_ERR(iucv_root)) { + rc = PTR_ERR(iucv_root); + goto out_bus; + } + /* Note: GFP_DMA used used to get memory below 2G */ + iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data), + GFP_KERNEL|GFP_DMA); + if (!iucv_irq_data) { + rc = -ENOMEM; + goto out_root; + } + /* Allocate parameter blocks. */ + iucv_param = percpu_alloc(sizeof(union iucv_param), + GFP_KERNEL|GFP_DMA); + if (!iucv_param) { + rc = -ENOMEM; + goto out_extint; + } + register_hotcpu_notifier(&iucv_cpu_notifier); + ASCEBC(iucv_error_no_listener, 16); + ASCEBC(iucv_error_no_memory, 16); + ASCEBC(iucv_error_pathid, 16); + iucv_available = 1; + return 0; + +out_extint: + percpu_free(iucv_irq_data); +out_root: + s390_root_dev_unregister(iucv_root); +out_bus: + bus_unregister(&iucv_bus); +out_int: + unregister_external_interrupt(0x4000, iucv_external_interrupt); +out: + return rc; +} + +/** + * iucv_exit + * + * Frees everything allocated from iucv_init. + */ +static void iucv_exit(void) +{ + struct iucv_work *p, *n; + + spin_lock_irq(&iucv_work_lock); + list_for_each_entry_safe(p, n, &iucv_work_queue, list) + kfree(p); + spin_unlock_irq(&iucv_work_lock); + unregister_hotcpu_notifier(&iucv_cpu_notifier); + percpu_free(iucv_param); + percpu_free(iucv_irq_data); + s390_root_dev_unregister(iucv_root); + bus_unregister(&iucv_bus); + unregister_external_interrupt(0x4000, iucv_external_interrupt); +} + +subsys_initcall(iucv_init); +module_exit(iucv_exit); + +/** + * Export all public stuff + */ +EXPORT_SYMBOL (iucv_bus); +EXPORT_SYMBOL (iucv_root); +EXPORT_SYMBOL (iucv_register); +EXPORT_SYMBOL (iucv_unregister); +EXPORT_SYMBOL (iucv_path_accept); +EXPORT_SYMBOL (iucv_path_connect); +EXPORT_SYMBOL (iucv_path_quiesce); +EXPORT_SYMBOL (iucv_path_sever); +EXPORT_SYMBOL (iucv_message_purge); +EXPORT_SYMBOL (iucv_message_receive); +EXPORT_SYMBOL (iucv_message_reject); +EXPORT_SYMBOL (iucv_message_reply); +EXPORT_SYMBOL (iucv_message_send); +EXPORT_SYMBOL (iucv_message_send2way); + +MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)"); +MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver"); +MODULE_LICENSE("GPL"); diff --git a/net/key/af_key.c b/net/key/af_key.c index 5dd5094659a..b4e444063d1 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2345,6 +2345,196 @@ out: return err; } +#ifdef CONFIG_NET_KEY_MIGRATE +static int pfkey_sockaddr_pair_size(sa_family_t family) +{ + switch (family) { + case AF_INET: + return PFKEY_ALIGN8(sizeof(struct sockaddr_in) * 2); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + return PFKEY_ALIGN8(sizeof(struct sockaddr_in6) * 2); +#endif + default: + return 0; + } + /* NOTREACHED */ +} + +static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, + xfrm_address_t *saddr, xfrm_address_t *daddr, + u16 *family) +{ + struct sockaddr *sa = (struct sockaddr *)(rq + 1); + if (rq->sadb_x_ipsecrequest_len < + pfkey_sockaddr_pair_size(sa->sa_family)) + return -EINVAL; + + switch (sa->sa_family) { + case AF_INET: + { + struct sockaddr_in *sin; + sin = (struct sockaddr_in *)sa; + if ((sin+1)->sin_family != AF_INET) + return -EINVAL; + memcpy(&saddr->a4, &sin->sin_addr, sizeof(saddr->a4)); + sin++; + memcpy(&daddr->a4, &sin->sin_addr, sizeof(daddr->a4)); + *family = AF_INET; + break; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + { + struct sockaddr_in6 *sin6; + sin6 = (struct sockaddr_in6 *)sa; + if ((sin6+1)->sin6_family != AF_INET6) + return -EINVAL; + memcpy(&saddr->a6, &sin6->sin6_addr, + sizeof(saddr->a6)); + sin6++; + memcpy(&daddr->a6, &sin6->sin6_addr, + sizeof(daddr->a6)); + *family = AF_INET6; + break; + } +#endif + default: + return -EINVAL; + } + + return 0; +} + +static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, + struct xfrm_migrate *m) +{ + int err; + struct sadb_x_ipsecrequest *rq2; + + if (len <= sizeof(struct sadb_x_ipsecrequest) || + len < rq1->sadb_x_ipsecrequest_len) + return -EINVAL; + + /* old endoints */ + err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + &m->old_family); + if (err) + return err; + + rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); + len -= rq1->sadb_x_ipsecrequest_len; + + if (len <= sizeof(struct sadb_x_ipsecrequest) || + len < rq2->sadb_x_ipsecrequest_len) + return -EINVAL; + + /* new endpoints */ + err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + &m->new_family); + if (err) + return err; + + if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto || + rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode || + rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid) + return -EINVAL; + + m->proto = rq1->sadb_x_ipsecrequest_proto; + m->mode = rq1->sadb_x_ipsecrequest_mode - 1; + m->reqid = rq1->sadb_x_ipsecrequest_reqid; + + return ((int)(rq1->sadb_x_ipsecrequest_len + + rq2->sadb_x_ipsecrequest_len)); +} + +static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, + struct sadb_msg *hdr, void **ext_hdrs) +{ + int i, len, ret, err = -EINVAL; + u8 dir; + struct sadb_address *sa; + struct sadb_x_policy *pol; + struct sadb_x_ipsecrequest *rq; + struct xfrm_selector sel; + struct xfrm_migrate m[XFRM_MAX_DEPTH]; + + if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + !ext_hdrs[SADB_X_EXT_POLICY - 1]) { + err = -EINVAL; + goto out; + } + + pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; + if (!pol) { + err = -EINVAL; + goto out; + } + + if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { + err = -EINVAL; + goto out; + } + + dir = pol->sadb_x_policy_dir - 1; + memset(&sel, 0, sizeof(sel)); + + /* set source address info of selector */ + sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1]; + sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); + sel.prefixlen_s = sa->sadb_address_prefixlen; + sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); + sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; + if (sel.sport) + sel.sport_mask = ~0; + + /* set destination address info of selector */ + sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1], + pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); + sel.prefixlen_d = sa->sadb_address_prefixlen; + sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); + sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; + if (sel.dport) + sel.dport_mask = ~0; + + rq = (struct sadb_x_ipsecrequest *)(pol + 1); + + /* extract ipsecrequests */ + i = 0; + len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy); + + while (len > 0 && i < XFRM_MAX_DEPTH) { + ret = ipsecrequests_to_migrate(rq, len, &m[i]); + if (ret < 0) { + err = ret; + goto out; + } else { + rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret); + len -= ret; + i++; + } + } + + if (!i || len > 0) { + err = -EINVAL; + goto out; + } + + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + + out: + return err; +} +#else +static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, + struct sadb_msg *hdr, void **ext_hdrs) +{ + return -ENOPROTOOPT; +} +#endif + + static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { unsigned int dir; @@ -2473,6 +2663,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_X_SPDFLUSH] = pfkey_spdflush, [SADB_X_SPDSETIDX] = pfkey_spdadd, [SADB_X_SPDDELETE2] = pfkey_spdget, + [SADB_X_MIGRATE] = pfkey_migrate, }; static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr) @@ -3118,6 +3309,236 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } +#ifdef CONFIG_NET_KEY_MIGRATE +static int set_sadb_address(struct sk_buff *skb, int sasize, int type, + struct xfrm_selector *sel) +{ + struct sadb_address *addr; + struct sockaddr_in *sin; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct sockaddr_in6 *sin6; +#endif + addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); + addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; + addr->sadb_address_exttype = type; + addr->sadb_address_proto = sel->proto; + addr->sadb_address_reserved = 0; + + switch (type) { + case SADB_EXT_ADDRESS_SRC: + if (sel->family == AF_INET) { + addr->sadb_address_prefixlen = sel->prefixlen_s; + sin = (struct sockaddr_in *)(addr + 1); + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, &sel->saddr, + sizeof(sin->sin_addr.s_addr)); + sin->sin_port = 0; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (sel->family == AF_INET6) { + addr->sadb_address_prefixlen = sel->prefixlen_s; + sin6 = (struct sockaddr_in6 *)(addr + 1); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, &sel->saddr, + sizeof(sin6->sin6_addr.s6_addr)); + } +#endif + break; + case SADB_EXT_ADDRESS_DST: + if (sel->family == AF_INET) { + addr->sadb_address_prefixlen = sel->prefixlen_d; + sin = (struct sockaddr_in *)(addr + 1); + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, &sel->daddr, + sizeof(sin->sin_addr.s_addr)); + sin->sin_port = 0; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (sel->family == AF_INET6) { + addr->sadb_address_prefixlen = sel->prefixlen_d; + sin6 = (struct sockaddr_in6 *)(addr + 1); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, &sel->daddr, + sizeof(sin6->sin6_addr.s6_addr)); + } +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int set_ipsecrequest(struct sk_buff *skb, + uint8_t proto, uint8_t mode, int level, + uint32_t reqid, uint8_t family, + xfrm_address_t *src, xfrm_address_t *dst) +{ + struct sadb_x_ipsecrequest *rq; + struct sockaddr_in *sin; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct sockaddr_in6 *sin6; +#endif + int size_req; + + size_req = sizeof(struct sadb_x_ipsecrequest) + + pfkey_sockaddr_pair_size(family); + + rq = (struct sadb_x_ipsecrequest *)skb_put(skb, size_req); + memset(rq, 0, size_req); + rq->sadb_x_ipsecrequest_len = size_req; + rq->sadb_x_ipsecrequest_proto = proto; + rq->sadb_x_ipsecrequest_mode = mode; + rq->sadb_x_ipsecrequest_level = level; + rq->sadb_x_ipsecrequest_reqid = reqid; + + switch (family) { + case AF_INET: + sin = (struct sockaddr_in *)(rq + 1); + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, src, + sizeof(sin->sin_addr.s_addr)); + sin++; + sin->sin_family = AF_INET; + memcpy(&sin->sin_addr.s_addr, dst, + sizeof(sin->sin_addr.s_addr)); + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + sin6 = (struct sockaddr_in6 *)(rq + 1); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, src, + sizeof(sin6->sin6_addr.s6_addr)); + sin6++; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + memcpy(&sin6->sin6_addr.s6_addr, dst, + sizeof(sin6->sin6_addr.s6_addr)); + break; +#endif + default: + return -EINVAL; + } + + return 0; +} +#endif + +#ifdef CONFIG_NET_KEY_MIGRATE +static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_bundles) +{ + int i; + int sasize_sel; + int size = 0; + int size_pol = 0; + struct sk_buff *skb; + struct sadb_msg *hdr; + struct sadb_x_policy *pol; + struct xfrm_migrate *mp; + + if (type != XFRM_POLICY_TYPE_MAIN) + return 0; + + if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) + return -EINVAL; + + /* selector */ + sasize_sel = pfkey_sockaddr_size(sel->family); + if (!sasize_sel) + return -EINVAL; + size += (sizeof(struct sadb_address) + sasize_sel) * 2; + + /* policy info */ + size_pol += sizeof(struct sadb_x_policy); + + /* ipsecrequests */ + for (i = 0, mp = m; i < num_bundles; i++, mp++) { + /* old locator pair */ + size_pol += sizeof(struct sadb_x_ipsecrequest) + + pfkey_sockaddr_pair_size(mp->old_family); + /* new locator pair */ + size_pol += sizeof(struct sadb_x_ipsecrequest) + + pfkey_sockaddr_pair_size(mp->new_family); + } + + size += sizeof(struct sadb_msg) + size_pol; + + /* alloc buffer */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + hdr = (struct sadb_msg *)skb_put(skb, sizeof(struct sadb_msg)); + hdr->sadb_msg_version = PF_KEY_V2; + hdr->sadb_msg_type = SADB_X_MIGRATE; + hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); + hdr->sadb_msg_len = size / 8; + hdr->sadb_msg_errno = 0; + hdr->sadb_msg_reserved = 0; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_pid = 0; + + /* selector src */ + set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); + + /* selector dst */ + set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); + + /* policy information */ + pol = (struct sadb_x_policy *)skb_put(skb, sizeof(struct sadb_x_policy)); + pol->sadb_x_policy_len = size_pol / 8; + pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; + pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; + pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_id = 0; + pol->sadb_x_policy_priority = 0; + + for (i = 0, mp = m; i < num_bundles; i++, mp++) { + /* old ipsecrequest */ + if (set_ipsecrequest(skb, mp->proto, mp->mode + 1, + (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), + mp->reqid, mp->old_family, + &mp->old_saddr, &mp->old_daddr) < 0) { + return -EINVAL; + } + + /* new ipsecrequest */ + if (set_ipsecrequest(skb, mp->proto, mp->mode + 1, + (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), + mp->reqid, mp->new_family, + &mp->new_saddr, &mp->new_daddr) < 0) { + return -EINVAL; + } + } + + /* broadcast migrate message to sockets */ + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL); + + return 0; +} +#else +static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_bundles) +{ + return -ENOPROTOOPT; +} +#endif + static int pfkey_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -3287,6 +3708,7 @@ static struct xfrm_mgr pfkeyv2_mgr = .compile_policy = pfkey_compile_policy, .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, + .migrate = pfkey_send_migrate, }; static void __exit ipsec_pfkey_exit(void) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3a66878a182..748f7f00909 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,5 +1,5 @@ menu "Core Netfilter Configuration" - depends on NET && NETFILTER + depends on NET && INET && NETFILTER config NETFILTER_NETLINK tristate "Netfilter netlink interface" @@ -44,8 +44,7 @@ choice depends on NF_CONNTRACK_ENABLED config NF_CONNTRACK_SUPPORT - bool "Layer 3 Independent Connection tracking (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Layer 3 Independent Connection tracking" help Layer 3 independent connection tracking is experimental scheme which generalize ip_conntrack to support other layer 3 protocols. @@ -122,7 +121,7 @@ config NF_CONNTRACK_EVENTS config NF_CT_PROTO_GRE tristate - depends on EXPERIMENTAL && NF_CONNTRACK + depends on NF_CONNTRACK config NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' @@ -136,8 +135,8 @@ config NF_CT_PROTO_SCTP Documentation/modules.txt. If unsure, say `N'. config NF_CONNTRACK_AMANDA - tristate "Amanda backup protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "Amanda backup protocol support" + depends on NF_CONNTRACK select TEXTSEARCH select TEXTSEARCH_KMP help @@ -151,8 +150,8 @@ config NF_CONNTRACK_AMANDA To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_FTP - tristate "FTP protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "FTP protocol support" + depends on NF_CONNTRACK help Tracking FTP connections is problematic: special helpers are required for tracking them, and doing masquerading and other forms @@ -166,7 +165,7 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL && NF_CONNTRACK && (IPV6 || IPV6=n) help H.323 is a VoIP signalling protocol from ITU-T. As one of the most important VoIP protocols, it is widely used by voice hardware and @@ -184,8 +183,8 @@ config NF_CONNTRACK_H323 To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_IRC - tristate "IRC protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "IRC protocol support" + depends on NF_CONNTRACK help There is a commonly-used extension to IRC called Direct Client-to-Client Protocol (DCC). This enables users to send @@ -218,8 +217,8 @@ config NF_CONNTRACK_NETBIOS_NS To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_PPTP - tristate "PPtP protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "PPtP protocol support" + depends on NF_CONNTRACK select NF_CT_PROTO_GRE help This module adds support for PPTP (Point to Point Tunnelling @@ -236,6 +235,19 @@ config NF_CONNTRACK_PPTP To compile it as a module, choose M here. If unsure, say N. +config NF_CONNTRACK_SANE + tristate "SANE protocol support (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + SANE is a protocol for remote access to scanners as implemented + by the 'saned' daemon. Like FTP, it uses separate control and + data connections. + + With this module you can support SANE on a connection tracking + firewall. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_SIP tristate "SIP protocol support (EXPERIMENTAL)" depends on EXPERIMENTAL && NF_CONNTRACK @@ -249,8 +261,8 @@ config NF_CONNTRACK_SIP To compile it as a module, choose M here. If unsure, say N. config NF_CONNTRACK_TFTP - tristate "TFTP protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "TFTP protocol support" + depends on NF_CONNTRACK help TFTP connection tracking helper, this is required depending on how restrictive your ruleset is. @@ -383,6 +395,32 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TCPMSS + tristate '"TCPMSS" target support' + depends on NETFILTER_XTABLES && (IPV6 || IPV6=n) + ---help--- + This option adds a `TCPMSS' target, which allows you to alter the + MSS value of TCP SYN packets, to control the maximum size for that + connection (usually limiting it to your outgoing interface's MTU + minus 40). + + This is used to overcome criminally braindead ISPs or servers which + block ICMP Fragmentation Needed packets. The symptoms of this + problem are that everything works fine from your Linux + firewall/router, but machines behind it can never exchange large + packets: + 1) Web browsers connect, then hang with no data received. + 2) Small mail works fine, but large emails hang. + 3) ssh works fine, but scp hangs after initial handshaking. + + Workaround: activate this option and add a rule to your firewall + configuration like: + + iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \ + -j TCPMSS --clamp-mss-to-pmtu + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_XTABLES @@ -629,7 +667,7 @@ config NETFILTER_XT_MATCH_TCPMSS config NETFILTER_XT_MATCH_HASHLIMIT tristate '"hashlimit" match support' - depends on NETFILTER_XTABLES + depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) help This option adds a `hashlimit' match. diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5dc5574f7e9..b2b5c7566b2 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o +obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o @@ -44,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index a5b234e444d..2a48efdf0d6 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -89,6 +89,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff, exp->expectfn = NULL; exp->flags = NF_CT_EXPECT_PERMANENT; + exp->helper = NULL; nf_conntrack_expect_related(exp); nf_conntrack_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bd1d2de75e4..c64f029f705 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -389,9 +389,11 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, && ctnetlink_dump_helpinfo(skb, ct) < 0) goto nfattr_failure; +#ifdef CONFIG_NF_CONNTRACK_MARK if ((events & IPCT_MARK || ct->mark) && ctnetlink_dump_mark(skb, ct) < 0) goto nfattr_failure; +#endif if (events & IPCT_COUNTER_FILLING && (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || @@ -981,7 +983,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (cda[CTA_PROTOINFO-1]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) - return err; + goto err; } #if defined(CONFIG_NF_CONNTRACK_MARK) diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index f0ff00e0d05..c59df3bc2bb 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -113,7 +113,7 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_lock(); nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); - if (nf_nat_pptp_expectfn && ct->status & IPS_NAT_MASK) + if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) nf_nat_pptp_expectfn(ct, exp); else { struct nf_conntrack_tuple inv_t; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 626b0011dd8..6fccdcf43e0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -60,12 +60,9 @@ static DEFINE_RWLOCK(tcp_lock); If it's non-zero, we mark only out of window RST segments as INVALID. */ int nf_ct_tcp_be_liberal __read_mostly = 0; -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established +/* If it is set to zero, we disable picking up already established connections. */ -int nf_ct_tcp_loose __read_mostly = 3; +int nf_ct_tcp_loose __read_mostly = 1; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer @@ -650,11 +647,10 @@ static int tcp_in_window(struct ip_ct_tcp *state, before(sack, receiver->td_end + 1), after(ack, receiver->td_end - MAXACKWINDOW(sender))); - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + if (before(seq, sender->td_maxend + 1) && + after(end, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender))) { /* * Take into account window scaling (RFC 1323). */ @@ -699,15 +695,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, state->retrans = 0; } } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - res = 1; } else { - if (LOG_INVALID(IPPROTO_TCP)) + res = 0; + if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || + nf_ct_tcp_be_liberal) + res = 1; + if (!res && LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -718,8 +712,6 @@ static int tcp_in_window(struct ip_ct_tcp *state, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); - - res = nf_ct_tcp_be_liberal; } DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " @@ -1063,8 +1055,6 @@ static int tcp_new(struct nf_conn *conntrack, tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; } else if (nf_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1085,11 +1075,11 @@ static int tcp_new(struct nf_conn *conntrack, conntrack->proto.tcp.seen[0].td_maxwin; conntrack->proto.tcp.seen[0].td_scale = 0; - /* We assume SACK. Should we assume window scaling too? */ + /* We assume SACK and liberal window checking to handle + * window scaling */ conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; + conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; } conntrack->proto.tcp.seen[1].td_end = 0; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c new file mode 100644 index 00000000000..eb2d1dc46d4 --- /dev/null +++ b/net/netfilter/nf_conntrack_sane.c @@ -0,0 +1,242 @@ +/* SANE connection tracking helper + * (SANE = Scanner Access Now Easy) + * For documentation about the SANE network protocol see + * http://www.sane-project.org/html/doc015.html + */ + +/* Copyright (C) 2007 Red Hat, Inc. + * Author: Michal Schmidt <mschmidt@redhat.com> + * Based on the FTP conntrack helper (net/netfilter/nf_conntrack_ftp.c): + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2003 Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netfilter.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_sane.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>"); +MODULE_DESCRIPTION("SANE connection tracking helper"); + +static char *sane_buffer; + +static DEFINE_SPINLOCK(nf_sane_lock); + +#define MAX_PORTS 8 +static u_int16_t ports[MAX_PORTS]; +static unsigned int ports_c; +module_param_array(ports, ushort, &ports_c, 0400); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct sane_request { + __be32 RPC_code; +#define SANE_NET_START 7 /* RPC code */ + + __be32 handle; +}; + +struct sane_reply_net_start { + __be32 status; +#define SANE_STATUS_SUCCESS 0 + + __be16 zero; + __be16 port; + /* other fields aren't interesting for conntrack */ +}; + +static int help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + struct tcphdr _tcph, *th; + char *sb_ptr; + int ret = NF_ACCEPT; + int dir = CTINFO2DIR(ctinfo); + struct nf_ct_sane_master *ct_sane_info; + struct nf_conntrack_expect *exp; + struct nf_conntrack_tuple *tuple; + struct sane_request *req; + struct sane_reply_net_start *reply; + int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + + ct_sane_info = &nfct_help(ct)->help.ct_sane_info; + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + return NF_ACCEPT; + + /* Not a full tcp header? */ + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + + /* No data? */ + dataoff = protoff + th->doff * 4; + if (dataoff >= (*pskb)->len) + return NF_ACCEPT; + + datalen = (*pskb)->len - dataoff; + + spin_lock_bh(&nf_sane_lock); + sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer); + BUG_ON(sb_ptr == NULL); + + if (dir == IP_CT_DIR_ORIGINAL) { + if (datalen != sizeof(struct sane_request)) + goto out; + + req = (struct sane_request *)sb_ptr; + if (req->RPC_code != htonl(SANE_NET_START)) { + /* Not an interesting command */ + ct_sane_info->state = SANE_STATE_NORMAL; + goto out; + } + + /* We're interested in the next reply */ + ct_sane_info->state = SANE_STATE_START_REQUESTED; + goto out; + } + + /* Is it a reply to an uninteresting command? */ + if (ct_sane_info->state != SANE_STATE_START_REQUESTED) + goto out; + + /* It's a reply to SANE_NET_START. */ + ct_sane_info->state = SANE_STATE_NORMAL; + + if (datalen < sizeof(struct sane_reply_net_start)) { + DEBUGP("nf_ct_sane: NET_START reply too short\n"); + goto out; + } + + reply = (struct sane_reply_net_start *)sb_ptr; + if (reply->status != htonl(SANE_STATUS_SUCCESS)) { + /* saned refused the command */ + DEBUGP("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", + ntohl(reply->status)); + goto out; + } + + /* Invalid saned reply? Ignore it. */ + if (reply->zero != 0) + goto out; + + exp = nf_conntrack_expect_alloc(ct); + if (exp == NULL) { + ret = NF_DROP; + goto out; + } + + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + nf_conntrack_expect_init(exp, family, + &tuple->src.u3, &tuple->dst.u3, + IPPROTO_TCP, + NULL, &reply->port); + + DEBUGP("nf_ct_sane: expect: "); + NF_CT_DUMP_TUPLE(&exp->tuple); + NF_CT_DUMP_TUPLE(&exp->mask); + + /* Can't expect this? Best to drop packet now. */ + if (nf_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + + nf_conntrack_expect_put(exp); + +out: + spin_unlock_bh(&nf_sane_lock); + return ret; +} + +static struct nf_conntrack_helper sane[MAX_PORTS][2]; +static char sane_names[MAX_PORTS][2][sizeof("sane-65535")]; + +/* don't make this __exit, since it's called from __init ! */ +static void nf_conntrack_sane_fini(void) +{ + int i, j; + + for (i = 0; i < ports_c; i++) { + for (j = 0; j < 2; j++) { + DEBUGP("nf_ct_sane: unregistering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_helper_unregister(&sane[i][j]); + } + } + + kfree(sane_buffer); +} + +static int __init nf_conntrack_sane_init(void) +{ + int i, j = -1, ret = 0; + char *tmpname; + + sane_buffer = kmalloc(65536, GFP_KERNEL); + if (!sane_buffer) + return -ENOMEM; + + if (ports_c == 0) + ports[ports_c++] = SANE_PORT; + + /* FIXME should be configurable whether IPv4 and IPv6 connections + are tracked or not - YK */ + for (i = 0; i < ports_c; i++) { + sane[i][0].tuple.src.l3num = PF_INET; + sane[i][1].tuple.src.l3num = PF_INET6; + for (j = 0; j < 2; j++) { + sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); + sane[i][j].tuple.dst.protonum = IPPROTO_TCP; + sane[i][j].mask.src.u.tcp.port = 0xFFFF; + sane[i][j].mask.dst.protonum = 0xFF; + sane[i][j].max_expected = 1; + sane[i][j].timeout = 5 * 60; /* 5 Minutes */ + sane[i][j].me = THIS_MODULE; + sane[i][j].help = help; + tmpname = &sane_names[i][j][0]; + if (ports[i] == SANE_PORT) + sprintf(tmpname, "sane"); + else + sprintf(tmpname, "sane-%d", ports[i]); + sane[i][j].name = tmpname; + + DEBUGP("nf_ct_sane: registering helper for pf: %d " + "port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); + ret = nf_conntrack_helper_register(&sane[i][j]); + if (ret) { + printk(KERN_ERR "nf_ct_sane: failed to " + "register helper for pf: %d port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_sane_fini(); + return ret; + } + } + } + + return 0; +} + +module_init(nf_conntrack_sane_init); +module_exit(nf_conntrack_sane_fini); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index eb2a2411f97..9dec1153467 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -303,10 +303,16 @@ static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, { int s = *shift; - for (; dptr <= limit && *dptr != '@'; dptr++) + /* Search for @, but stop at the end of the line. + * We are inside a sip: URI, so we don't need to worry about + * continuation lines. */ + while (dptr <= limit && + *dptr != '@' && *dptr != '\r' && *dptr != '\n') { (*shift)++; + dptr++; + } - if (*dptr == '@') { + if (dptr <= limit && *dptr == '@') { dptr++; (*shift)++; } else diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 50de965bb10..195e92990da 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -33,9 +33,7 @@ target(struct sk_buff **pskb, { const struct xt_classify_target_info *clinfo = targinfo; - if ((*pskb)->priority != clinfo->priority) - (*pskb)->priority = clinfo->priority; - + (*pskb)->priority = clinfo->priority; return XT_CONTINUE; } diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index b5548239d41..795c058b16a 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -61,7 +61,7 @@ target(struct sk_buff **pskb, #else nf_conntrack_event_cache(IPCT_MARK, *pskb); #endif - } + } break; case XT_CONNMARK_SAVE: newmark = (*ctmark & ~markinfo->mask) | @@ -78,8 +78,7 @@ target(struct sk_buff **pskb, case XT_CONNMARK_RESTORE: mark = (*pskb)->mark; diff = (*ctmark ^ mark) & markinfo->mask; - if (diff != 0) - (*pskb)->mark = mark ^ diff; + (*pskb)->mark = mark ^ diff; break; } } @@ -96,6 +95,11 @@ checkentry(const char *tablename, { struct xt_connmark_target_info *matchinfo = targinfo; + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } if (matchinfo->mode == XT_CONNMARK_RESTORE) { if (strcmp(tablename, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " @@ -111,6 +115,12 @@ checkentry(const char *tablename, return 1; } +static void +destroy(const struct xt_target *target, void *targinfo) +{ + nf_ct_l3proto_module_put(target->family); +} + #ifdef CONFIG_COMPAT struct compat_xt_connmark_target_info { compat_ulong_t mark, mask; @@ -147,6 +157,7 @@ static struct xt_target xt_connmark_target[] = { .name = "CONNMARK", .family = AF_INET, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), #ifdef CONFIG_COMPAT @@ -160,6 +171,7 @@ static struct xt_target xt_connmark_target[] = { .name = "CONNMARK", .family = AF_INET6, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), .me = THIS_MODULE @@ -168,7 +180,6 @@ static struct xt_target xt_connmark_target[] = { static int __init xt_connmark_init(void) { - need_conntrack(); return xt_register_targets(xt_connmark_target, ARRAY_SIZE(xt_connmark_target)); } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 46738626667..1ab0db641f9 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -41,8 +41,7 @@ static void secmark_save(struct sk_buff *skb) connsecmark = nf_ct_get_secmark(skb, &ctinfo); if (connsecmark && !*connsecmark) - if (*connsecmark != skb->secmark) - *connsecmark = skb->secmark; + *connsecmark = skb->secmark; } } @@ -58,8 +57,7 @@ static void secmark_restore(struct sk_buff *skb) connsecmark = nf_ct_get_secmark(skb, &ctinfo); if (connsecmark && *connsecmark) - if (skb->secmark != *connsecmark) - skb->secmark = *connsecmark; + skb->secmark = *connsecmark; } } @@ -93,6 +91,11 @@ static int checkentry(const char *tablename, const void *entry, { struct xt_connsecmark_target_info *info = targinfo; + if (nf_ct_l3proto_try_module_get(target->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", target->family); + return 0; + } switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: @@ -106,11 +109,18 @@ static int checkentry(const char *tablename, const void *entry, return 1; } +static void +destroy(const struct xt_target *target, void *targinfo) +{ + nf_ct_l3proto_module_put(target->family); +} + static struct xt_target xt_connsecmark_target[] = { { .name = "CONNSECMARK", .family = AF_INET, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connsecmark_target_info), .table = "mangle", @@ -120,6 +130,7 @@ static struct xt_target xt_connsecmark_target[] = { .name = "CONNSECMARK", .family = AF_INET6, .checkentry = checkentry, + .destroy = destroy, .target = target, .targetsize = sizeof(struct xt_connsecmark_target_info), .table = "mangle", @@ -129,7 +140,6 @@ static struct xt_target xt_connsecmark_target[] = { static int __init xt_connsecmark_init(void) { - need_conntrack(); return xt_register_targets(xt_connsecmark_target, ARRAY_SIZE(xt_connsecmark_target)); } diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 0b48547e8d6..cfc45af357d 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -31,9 +31,7 @@ target_v0(struct sk_buff **pskb, { const struct xt_mark_target_info *markinfo = targinfo; - if((*pskb)->mark != markinfo->mark) - (*pskb)->mark = markinfo->mark; - + (*pskb)->mark = markinfo->mark; return XT_CONTINUE; } @@ -62,9 +60,7 @@ target_v1(struct sk_buff **pskb, break; } - if((*pskb)->mark != mark) - (*pskb)->mark = mark; - + (*pskb)->mark = mark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index add75219629..f1131c3a9db 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -47,9 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, BUG(); } - if ((*pskb)->secmark != secmark) - (*pskb)->secmark = secmark; - + (*pskb)->secmark = secmark; return XT_CONTINUE; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c new file mode 100644 index 00000000000..db7e38c08de --- /dev/null +++ b/net/netfilter/xt_TCPMSS.c @@ -0,0 +1,296 @@ +/* + * This is a module which is used for setting the MSS option in TCP packets. + * + * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <net/ipv6.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_tcpudp.h> +#include <linux/netfilter/xt_TCPMSS.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); +MODULE_DESCRIPTION("x_tables TCP MSS modification module"); +MODULE_ALIAS("ipt_TCPMSS"); +MODULE_ALIAS("ip6t_TCPMSS"); + +static inline unsigned int +optlen(const u_int8_t *opt, unsigned int offset) +{ + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; +} + +static int +tcpmss_mangle_packet(struct sk_buff **pskb, + const struct xt_tcpmss_info *info, + unsigned int tcphoff, + unsigned int minlen) +{ + struct tcphdr *tcph; + unsigned int tcplen, i; + __be16 oldval; + u16 newmss; + u8 *opt; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return -1; + + tcplen = (*pskb)->len - tcphoff; + tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff); + + /* Since it passed flags test in tcp match, we know it is is + not a fragment, and has data >= tcp header length. SYN + packets should not contain data: if they did, then we risk + running over MTU, sending Frag Needed and breaking things + badly. --RR */ + if (tcplen != tcph->doff*4) { + if (net_ratelimit()) + printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", + (*pskb)->len); + return -1; + } + + if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + if (dst_mtu((*pskb)->dst) <= minlen) { + if (net_ratelimit()) + printk(KERN_ERR "xt_TCPMSS: " + "unknown or invalid path-MTU (%u)\n", + dst_mtu((*pskb)->dst)); + return -1; + } + newmss = dst_mtu((*pskb)->dst) - minlen; + } else + newmss = info->mss; + + opt = (u_int8_t *)tcph; + for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && + opt[i+1] == TCPOLEN_MSS) { + u_int16_t oldmss; + + oldmss = (opt[i+2] << 8) | opt[i+3]; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + oldmss <= newmss) + return 0; + + opt[i+2] = (newmss & 0xff00) >> 8; + opt[i+3] = (newmss & 0x00ff); + + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(oldmss), htons(newmss), 0); + return 0; + } + } + + /* + * MSS Option not found ?! add it.. + */ + if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), + TCPOLEN_MSS, GFP_ATOMIC); + if (!newskb) + return -1; + kfree_skb(*pskb); + *pskb = newskb; + tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff); + } + + skb_put((*pskb), TCPOLEN_MSS); + + opt = (u_int8_t *)tcph + sizeof(struct tcphdr); + memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); + + nf_proto_csum_replace2(&tcph->check, *pskb, + htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); + opt[0] = TCPOPT_MSS; + opt[1] = TCPOLEN_MSS; + opt[2] = (newmss & 0xff00) >> 8; + opt[3] = (newmss & 0x00ff); + + nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); + + oldval = ((__be16 *)tcph)[6]; + tcph->doff += TCPOLEN_MSS/4; + nf_proto_csum_replace2(&tcph->check, *pskb, + oldval, ((__be16 *)tcph)[6], 0); + return TCPOLEN_MSS; +} + +static unsigned int +xt_tcpmss_target4(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + __be16 newlen; + int ret; + + ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4, + sizeof(*iph) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + iph = (*pskb)->nh.iph; + newlen = htons(ntohs(iph->tot_len) + ret); + nf_csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + } + return XT_CONTINUE; +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static unsigned int +xt_tcpmss_target6(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h; + u8 nexthdr; + int tcphoff; + int ret; + + nexthdr = ipv6h->nexthdr; + tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); + if (tcphoff < 0) { + WARN_ON(1); + return NF_DROP; + } + ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, + sizeof(*ipv6h) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + ipv6h = (*pskb)->nh.ipv6h; + ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); + } + return XT_CONTINUE; +} +#endif + +#define TH_SYN 0x02 + +/* Must specify -p tcp --syn */ +static inline int find_syn_match(const struct xt_entry_match *m) +{ + const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; + + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TH_SYN && + !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) + return 1; + + return 0; +} + +static int +xt_tcpmss_checkentry4(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + const struct xt_tcpmss_info *info = targinfo; + const struct ipt_entry *e = entry; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP_FORWARD) | + (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_POST_ROUTING))) != 0) { + printk("xt_TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); + return 0; + } + if (IPT_MATCH_ITERATE(e, find_syn_match)) + return 1; + printk("xt_TCPMSS: Only works on TCP SYN packets\n"); + return 0; +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static int +xt_tcpmss_checkentry6(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + const struct xt_tcpmss_info *info = targinfo; + const struct ip6t_entry *e = entry; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP6_FORWARD) | + (1 << NF_IP6_LOCAL_OUT) | + (1 << NF_IP6_POST_ROUTING))) != 0) { + printk("xt_TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); + return 0; + } + if (IP6T_MATCH_ITERATE(e, find_syn_match)) + return 1; + printk("xt_TCPMSS: Only works on TCP SYN packets\n"); + return 0; +} +#endif + +static struct xt_target xt_tcpmss_reg[] = { + { + .family = AF_INET, + .name = "TCPMSS", + .checkentry = xt_tcpmss_checkentry4, + .target = xt_tcpmss_target4, + .targetsize = sizeof(struct xt_tcpmss_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + { + .family = AF_INET6, + .name = "TCPMSS", + .checkentry = xt_tcpmss_checkentry6, + .target = xt_tcpmss_target6, + .targetsize = sizeof(struct xt_tcpmss_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, +#endif +}; + +static int __init xt_tcpmss_init(void) +{ + return xt_register_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); +} + +static void __exit xt_tcpmss_fini(void) +{ + xt_unregister_targets(xt_tcpmss_reg, ARRAY_SIZE(xt_tcpmss_reg)); +} + +module_init(xt_tcpmss_init); +module_exit(xt_tcpmss_fini); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index dcc497ea818..5e32dfa2668 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -52,6 +52,8 @@ match(const struct sk_buff *skb, { const struct xt_connbytes_info *sinfo = matchinfo; u_int64_t what = 0; /* initialize to make gcc happy */ + u_int64_t bytes = 0; + u_int64_t pkts = 0; const struct ip_conntrack_counter *counters; if (!(counters = nf_ct_get_counters(skb))) @@ -89,29 +91,22 @@ match(const struct sk_buff *skb, case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes, - counters[IP_CT_DIR_ORIGINAL].packets); + bytes = counters[IP_CT_DIR_ORIGINAL].bytes; + pkts = counters[IP_CT_DIR_ORIGINAL].packets; break; case XT_CONNBYTES_DIR_REPLY: - what = div64_64(counters[IP_CT_DIR_REPLY].bytes, - counters[IP_CT_DIR_REPLY].packets); + bytes = counters[IP_CT_DIR_REPLY].bytes; + pkts = counters[IP_CT_DIR_REPLY].packets; break; case XT_CONNBYTES_DIR_BOTH: - { - u_int64_t bytes; - u_int64_t pkts; - bytes = counters[IP_CT_DIR_ORIGINAL].bytes + - counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets+ - counters[IP_CT_DIR_REPLY].packets; - - /* FIXME_THEORETICAL: what to do if sum - * overflows ? */ - - what = div64_64(bytes, pkts); - } + bytes = counters[IP_CT_DIR_ORIGINAL].bytes + + counters[IP_CT_DIR_REPLY].bytes; + pkts = counters[IP_CT_DIR_ORIGINAL].packets + + counters[IP_CT_DIR_REPLY].packets; break; } + if (pkts != 0) + what = div64_64(bytes, pkts); break; } @@ -139,15 +134,28 @@ static int check(const char *tablename, sinfo->direction != XT_CONNBYTES_DIR_BOTH) return 0; + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "can't load conntrack support for " + "proto=%d\n", match->family); + return 0; + } + return 1; } +static void +destroy(const struct xt_match *match, void *matchinfo) +{ + nf_ct_l3proto_module_put(match->family); +} + static struct xt_match xt_connbytes_match[] = { { .name = "connbytes", .family = AF_INET, .checkentry = check, .match = match, + .destroy = destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE }, @@ -156,6 +164,7 @@ static struct xt_match xt_connbytes_match[] = { .family = AF_INET6, .checkentry = check, .match = match, + .destroy = destroy, .matchsize = sizeof(struct xt_connbytes_info), .me = THIS_MODULE }, diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index a8f03057dbd..36c2defff23 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -63,22 +63,18 @@ checkentry(const char *tablename, printk(KERN_WARNING "connmark: only support 32bit mark\n"); return 0; } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } #ifdef CONFIG_COMPAT @@ -140,7 +136,6 @@ static struct xt_match xt_connmark_match[] = { static int __init xt_connmark_init(void) { - need_conntrack(); return xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0ea501a2fda..3dc2357b8de 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -20,6 +20,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_conntrack.h> +#include <net/netfilter/nf_conntrack_compat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); @@ -228,21 +229,17 @@ checkentry(const char *tablename, void *matchinfo, unsigned int hook_mask) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match conntrack_match = { @@ -257,7 +254,6 @@ static struct xt_match conntrack_match = { static int __init xt_conntrack_init(void) { - need_conntrack(); return xt_register_match(&conntrack_match); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a5a6e192ac2..bd1f7a2048d 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -414,6 +414,7 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, switch (nexthdr) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: case IPPROTO_DCCP: ports = skb_header_pointer(skb, protoff, sizeof(_ports), @@ -745,7 +746,7 @@ static int __init xt_hashlimit_init(void) } hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); if (!hashlimit_procdir6) { - printk(KERN_ERR "xt_hashlimit: tnable to create proc dir " + printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err4; } diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 5d7818b73e3..04bc32ba719 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -24,6 +24,7 @@ #endif #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_helper.h> +#include <net/netfilter/nf_conntrack_compat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>"); @@ -143,13 +144,11 @@ static int check(const char *tablename, { struct xt_helper_info *info = matchinfo; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif info->name[29] = '\0'; return 1; } @@ -157,9 +156,7 @@ static int check(const char *tablename, static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match xt_helper_match[] = { @@ -185,7 +182,6 @@ static struct xt_match xt_helper_match[] = { static int __init xt_helper_init(void) { - need_conntrack(); return xt_register_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index fd8f954cded..b9b3ffc5451 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -113,20 +113,16 @@ checkentry(const char *tablename, if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return 0; - if (brnf_deferred_hooks == 0 && - info->bitmask & XT_PHYSDEV_OP_OUT && + if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && hook_mask & ((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | (1 << NF_IP_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " - "traffic is deprecated and breaks other things, it will " - "be removed in January 2007. See Documentation/" - "feature-removal-schedule.txt for details. This doesn't " - "affect you in case you're using it for purely bridged " - "traffic.\n"); - brnf_deferred_hooks = 1; + "traffic is not supported anymore.\n"); + if (hook_mask & (1 << NF_IP_LOCAL_OUT)) + return 0; } return 1; } diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index d9010b16a1f..df37b912163 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -50,22 +50,18 @@ static int check(const char *tablename, void *matchinfo, unsigned int hook_mask) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (nf_ct_l3proto_try_module_get(match->family) < 0) { - printk(KERN_WARNING "can't load nf_conntrack support for " + printk(KERN_WARNING "can't load conntrack support for " "proto=%d\n", match->family); return 0; } -#endif return 1; } static void destroy(const struct xt_match *match, void *matchinfo) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); -#endif } static struct xt_match xt_state_match[] = { @@ -91,7 +87,6 @@ static struct xt_match xt_state_match[] = { static int __init xt_state_init(void) { - need_conntrack(); return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 743b05734a4..73e0ff469bf 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -130,12 +130,12 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { - if (iter > CIPSO_V4_TAG_MAXCNT) + if (iter >= CIPSO_V4_TAG_MAXCNT) return -EINVAL; doi_def->tags[iter++] = nla_get_u8(nla); } - if (iter < CIPSO_V4_TAG_MAXCNT) - doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; + while (iter < CIPSO_V4_TAG_MAXCNT) + doi_def->tags[iter++] = CIPSO_V4_TAG_INVALID; return 0; } @@ -162,6 +162,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) struct nlattr *nla_b; int nla_a_rem; int nla_b_rem; + u32 iter; if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] || !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) @@ -185,20 +186,31 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) goto add_std_failure; + ret_val = -EINVAL; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_validate_nested(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_b->nla_type) { case NLBL_CIPSOV4_A_MLSLVLLOC: + if (nla_get_u32(nla_b) > + CIPSO_V4_MAX_LOC_LVLS) + goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->lvl.local_size) doi_def->map.std->lvl.local_size = nla_get_u32(nla_b) + 1; break; case NLBL_CIPSOV4_A_MLSLVLREM: + if (nla_get_u32(nla_b) > + CIPSO_V4_MAX_REM_LVLS) + goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->lvl.cipso_size) doi_def->map.std->lvl.cipso_size = @@ -206,9 +218,6 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) break; } } - if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS || - doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) - goto add_std_failure; doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, sizeof(u32), GFP_KERNEL); @@ -223,6 +232,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) ret_val = -ENOMEM; goto add_std_failure; } + for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) + doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL; + for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++) + doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) @@ -230,11 +243,6 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) struct nlattr *lvl_loc; struct nlattr *lvl_rem; - if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) - goto add_std_failure; - lvl_loc = nla_find_nested(nla_a, NLBL_CIPSOV4_A_MLSLVLLOC); lvl_rem = nla_find_nested(nla_a, @@ -264,12 +272,18 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_b->nla_type) { case NLBL_CIPSOV4_A_MLSCATLOC: + if (nla_get_u32(nla_b) > + CIPSO_V4_MAX_LOC_CATS) + goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->cat.local_size) doi_def->map.std->cat.local_size = nla_get_u32(nla_b) + 1; break; case NLBL_CIPSOV4_A_MLSCATREM: + if (nla_get_u32(nla_b) > + CIPSO_V4_MAX_REM_CATS) + goto add_std_failure; if (nla_get_u32(nla_b) >= doi_def->map.std->cat.cipso_size) doi_def->map.std->cat.cipso_size = @@ -277,9 +291,6 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) break; } } - if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS || - doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) - goto add_std_failure; doi_def->map.std->cat.local = kcalloc( doi_def->map.std->cat.local_size, sizeof(u32), @@ -296,6 +307,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) ret_val = -ENOMEM; goto add_std_failure; } + for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) + doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT; + for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++) + doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3baafb10f8f..383dd4e82ee 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -472,8 +472,7 @@ static int netlink_release(struct socket *sock) NETLINK_URELEASE, &n); } - if (nlk->module) - module_put(nlk->module); + module_put(nlk->module); netlink_table_grab(); if (nlk->flags & NETLINK_KERNEL_SOCKET) { @@ -699,7 +698,7 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) struct sock *netlink_getsockbyfilp(struct file *filp) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; struct sock *sock; if (!S_ISSOCK(inode->i_mode)) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1d50f801f18..43bbe2c9e49 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1377,6 +1377,15 @@ static struct notifier_block nr_dev_notifier = { static struct net_device **dev_nr; +static struct ax25_protocol nr_pid = { + .pid = AX25_P_NETROM, + .func = nr_route_frame +}; + +static struct ax25_linkfail nr_linkfail_notifier = { + .func = nr_link_failed, +}; + static int __init nr_proto_init(void) { int i; @@ -1424,8 +1433,8 @@ static int __init nr_proto_init(void) register_netdevice_notifier(&nr_dev_notifier); - ax25_protocol_register(AX25_P_NETROM, nr_route_frame); - ax25_linkfail_register(nr_link_failed); + ax25_register_pid(&nr_pid); + ax25_linkfail_register(&nr_linkfail_notifier); #ifdef CONFIG_SYSCTL nr_register_sysctl(); @@ -1474,7 +1483,7 @@ static void __exit nr_exit(void) nr_unregister_sysctl(); #endif - ax25_linkfail_release(nr_link_failed); + ax25_linkfail_release(&nr_linkfail_notifier); ax25_protocol_release(AX25_P_NETROM); unregister_netdevice_notifier(&nr_dev_notifier); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 9b8eb54971a..4700d5225b7 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -128,25 +128,37 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, unsigned short return -37; } -static int nr_set_mac_address(struct net_device *dev, void *addr) +static int __must_check nr_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; + int err; + + if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) + return 0; + + if (dev->flags & IFF_UP) { + err = ax25_listen_register((ax25_address *)sa->sa_data, NULL); + if (err) + return err; - if (dev->flags & IFF_UP) ax25_listen_release((ax25_address *)dev->dev_addr, NULL); + } memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); - if (dev->flags & IFF_UP) - ax25_listen_register((ax25_address *)dev->dev_addr, NULL); - return 0; } static int nr_open(struct net_device *dev) { + int err; + + err = ax25_listen_register((ax25_address *)dev->dev_addr, NULL); + if (err) + return err; + netif_start_queue(dev); - ax25_listen_register((ax25_address *)dev->dev_addr, NULL); + return 0; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0096105bcd4..8f88964099e 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -87,8 +87,9 @@ static void nr_remove_neigh(struct nr_neigh *); * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ -static int nr_add_node(ax25_address *nr, const char *mnemonic, ax25_address *ax25, - ax25_digi *ax25_digi, struct net_device *dev, int quality, int obs_count) +static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, + ax25_address *ax25, ax25_digi *ax25_digi, struct net_device *dev, + int quality, int obs_count) { struct nr_node *nr_node; struct nr_neigh *nr_neigh; @@ -406,7 +407,8 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n /* * Lock a neighbour with a quality. */ -static int nr_add_neigh(ax25_address *callsign, ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality) +static int __must_check nr_add_neigh(ax25_address *callsign, + ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality) { struct nr_neigh *nr_neigh; @@ -777,9 +779,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) nr_src = (ax25_address *)(skb->data + 0); nr_dest = (ax25_address *)(skb->data + 7); - if (ax25 != NULL) - nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, - ax25->ax25_dev->dev, 0, sysctl_netrom_obsolescence_count_initialiser); + if (ax25 != NULL) { + ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, + ax25->ax25_dev->dev, 0, + sysctl_netrom_obsolescence_count_initialiser); + if (ret) + return ret; + } if ((dev = nr_dev_get(nr_dest)) != NULL) { /* Its for me */ if (ax25 == NULL) /* Its from me */ @@ -844,6 +850,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) ret = (nr_neigh->ax25 != NULL); nr_node_unlock(nr_node); nr_node_put(nr_node); + return ret; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index da73e8a8c18..a6fa48788e8 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -60,6 +60,7 @@ #include <linux/netdevice.h> #include <linux/if_packet.h> #include <linux/wireless.h> +#include <linux/kernel.h> #include <linux/kmod.h> #include <net/ip.h> #include <net/protocol.h> @@ -200,7 +201,8 @@ struct packet_sock { #endif struct packet_type prot_hook; spinlock_t bind_lock; - char running; /* prot_hook is attached*/ + unsigned int running:1, /* prot_hook is attached*/ + auxdata:1; int ifindex; /* bound device */ __be16 num; #ifdef CONFIG_PACKET_MULTICAST @@ -214,6 +216,16 @@ struct packet_sock { #endif }; +struct packet_skb_cb { + unsigned int origlen; + union { + struct sockaddr_pkt pkt; + struct sockaddr_ll ll; + } sa; +}; + +#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) + #ifdef CONFIG_PACKET_MMAP static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position) @@ -293,7 +305,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct /* drop conntrack reference */ nf_reset(skb); - spkt = (struct sockaddr_pkt*)skb->cb; + spkt = &PACKET_SKB_CB(skb)->sa.pkt; skb_push(skb, skb->data-skb->mac.raw); @@ -359,6 +371,10 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, if (dev == NULL) goto out_unlock; + err = -ENETDOWN; + if (!(dev->flags & IFF_UP)) + goto out_unlock; + /* * You may not queue a frame bigger than the mtu. This is the lowest level * raw protocol and you must do your own fragmentation at this level. @@ -407,10 +423,6 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out_free; - /* * Now send it */ @@ -428,24 +440,18 @@ out_unlock: } #endif -static inline int run_filter(struct sk_buff *skb, struct sock *sk, - unsigned *snaplen) +static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned int res) { struct sk_filter *filter; - int err = 0; rcu_read_lock_bh(); filter = rcu_dereference(sk->sk_filter); - if (filter != NULL) { - err = sk_run_filter(skb, filter->insns, filter->len); - if (!err) - err = -EPERM; - else if (*snaplen > err) - *snaplen = err; - } + if (filter != NULL) + res = sk_run_filter(skb, filter->insns, filter->len); rcu_read_unlock_bh(); - return err; + return res; } /* @@ -467,7 +473,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct packet_sock *po; u8 * skb_head = skb->data; int skb_len = skb->len; - unsigned snaplen; + unsigned int snaplen, res; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -495,8 +501,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (run_filter(skb, sk, &snaplen) < 0) + res = run_filter(skb, sk, snaplen); + if (!res) goto drop_n_restore; + if (snaplen > res) + snaplen = res; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -515,7 +524,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet skb = nskb; } - sll = (struct sockaddr_ll*)skb->cb; + BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 > + sizeof(skb->cb)); + + sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; @@ -526,6 +538,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (dev->hard_header_parse) sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + PACKET_SKB_CB(skb)->origlen = skb->len; + if (pskb_trim(skb, snaplen)) goto drop_n_acct; @@ -568,7 +582,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct tpacket_hdr *h; u8 * skb_head = skb->data; int skb_len = skb->len; - unsigned snaplen; + unsigned int snaplen, res; unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; unsigned short macoff, netoff; struct sk_buff *copy_skb = NULL; @@ -585,15 +599,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb->nh.raw - skb->data); - if (skb->ip_summed == CHECKSUM_PARTIAL) - status |= TP_STATUS_CSUMNOTREADY; } } + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= TP_STATUS_CSUMNOTREADY; + snaplen = skb->len; - if (run_filter(skb, sk, &snaplen) < 0) + res = run_filter(skb, sk, snaplen); + if (!res) goto drop_n_restore; + if (snaplen > res) + snaplen = res; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; @@ -738,6 +756,10 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; + err = -ENETDOWN; + if (!(dev->flags & IFF_UP)) + goto out_unlock; + err = -EMSGSIZE; if (len > dev->mtu+reserve) goto out_unlock; @@ -770,10 +792,6 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, skb->dev = dev; skb->priority = sk->sk_priority; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out_free; - /* * Now send it */ @@ -1092,7 +1110,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, * it in now. */ - sll = (struct sockaddr_ll*)skb->cb; + sll = &PACKET_SKB_CB(skb)->sa.ll; if (sock->type == SOCK_PACKET) msg->msg_namelen = sizeof(struct sockaddr_pkt); else @@ -1117,7 +1135,22 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_timestamp(msg, sk, skb); if (msg->msg_name) - memcpy(msg->msg_name, skb->cb, msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, + msg->msg_namelen); + + if (pkt_sk(sk)->auxdata) { + struct tpacket_auxdata aux; + + aux.tp_status = TP_STATUS_USER; + if (skb->ip_summed == CHECKSUM_PARTIAL) + aux.tp_status |= TP_STATUS_CSUMNOTREADY; + aux.tp_len = PACKET_SKB_CB(skb)->origlen; + aux.tp_snaplen = skb->len; + aux.tp_mac = 0; + aux.tp_net = skb->nh.raw - skb->data; + + put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); + } /* * Free or return the buffer as appropriate. Again this @@ -1317,6 +1350,7 @@ static int packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; + struct packet_sock *po = pkt_sk(sk); int ret; if (level != SOL_PACKET) @@ -1369,6 +1403,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return 0; } #endif + case PACKET_AUXDATA: + { + int val; + + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->auxdata = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -1378,8 +1424,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { int len; + int val; struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); + void *data; + struct tpacket_stats st; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -1392,9 +1441,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, switch(optname) { case PACKET_STATISTICS: - { - struct tpacket_stats st; - if (len > sizeof(struct tpacket_stats)) len = sizeof(struct tpacket_stats); spin_lock_bh(&sk->sk_receive_queue.lock); @@ -1403,16 +1449,23 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, spin_unlock_bh(&sk->sk_receive_queue.lock); st.tp_packets += st.tp_drops; - if (copy_to_user(optval, &st, len)) - return -EFAULT; + data = &st; + break; + case PACKET_AUXDATA: + if (len > sizeof(int)) + len = sizeof(int); + val = po->auxdata; + + data = &val; break; - } default: return -ENOPROTOOPT; } if (put_user(len, optlen)) return -EFAULT; + if (copy_to_user(optval, data, len)) + return -EFAULT; return 0; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 08a54285565..9e279464c9d 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1314,7 +1314,8 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (copy_from_user(&rose_callsign, argp, sizeof(ax25_address))) return -EFAULT; if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) - ax25_listen_register(&rose_callsign, NULL); + return ax25_listen_register(&rose_callsign, NULL); + return 0; case SIOCRSGL2CALL: @@ -1481,6 +1482,15 @@ static struct notifier_block rose_dev_notifier = { static struct net_device **dev_rose; +static struct ax25_protocol rose_pid = { + .pid = AX25_P_ROSE, + .func = rose_route_frame +}; + +static struct ax25_linkfail rose_linkfail_notifier = { + .func = rose_link_failed +}; + static int __init rose_proto_init(void) { int i; @@ -1530,8 +1540,8 @@ static int __init rose_proto_init(void) sock_register(&rose_family_ops); register_netdevice_notifier(&rose_dev_notifier); - ax25_protocol_register(AX25_P_ROSE, rose_route_frame); - ax25_linkfail_register(rose_link_failed); + ax25_register_pid(&rose_pid); + ax25_linkfail_register(&rose_linkfail_notifier); #ifdef CONFIG_SYSCTL rose_register_sysctl(); @@ -1579,7 +1589,7 @@ static void __exit rose_exit(void) rose_rt_free(); ax25_protocol_release(AX25_P_ROSE); - ax25_linkfail_release(rose_link_failed); + ax25_linkfail_release(&rose_linkfail_notifier); if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) ax25_listen_release(&rose_callsign, NULL); diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 7c279e2659e..50824d345fa 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -93,20 +93,34 @@ static int rose_rebuild_header(struct sk_buff *skb) static int rose_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = addr; + int err; - rose_del_loopback_node((rose_address *)dev->dev_addr); + if (!memcpy(dev->dev_addr, sa->sa_data, dev->addr_len)) + return 0; - memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + if (dev->flags & IFF_UP) { + err = rose_add_loopback_node((rose_address *)dev->dev_addr); + if (err) + return err; + + rose_del_loopback_node((rose_address *)dev->dev_addr); + } - rose_add_loopback_node((rose_address *)dev->dev_addr); + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); return 0; } static int rose_open(struct net_device *dev) { + int err; + + err = rose_add_loopback_node((rose_address *)dev->dev_addr); + if (err) + return err; + netif_start_queue(dev); - rose_add_loopback_node((rose_address *)dev->dev_addr); + return 0; } diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 103b4d38f88..3e41bd93ab9 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -79,7 +79,8 @@ static void rose_loopback_timer(unsigned long param) skb->h.raw = skb->data; - if ((sk = rose_find_socket(lci_o, rose_loopback_neigh)) != NULL) { + sk = rose_find_socket(lci_o, &rose_loopback_neigh); + if (sk) { if (rose_process_rx_frame(sk, skb) == 0) kfree_skb(skb); continue; @@ -87,7 +88,7 @@ static void rose_loopback_timer(unsigned long param) if (frametype == ROSE_CALL_REQUEST) { if ((dev = rose_dev_get(dest)) != NULL) { - if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) + if (rose_rx_call_request(skb, dev, &rose_loopback_neigh, lci_o) == 0) kfree_skb(skb); } else { kfree_skb(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 7252344779a..8028c0d425d 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -46,13 +46,13 @@ static DEFINE_SPINLOCK(rose_neigh_list_lock); static struct rose_route *rose_route_list; static DEFINE_SPINLOCK(rose_route_list_lock); -struct rose_neigh *rose_loopback_neigh; +struct rose_neigh rose_loopback_neigh; /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ -static int rose_add_node(struct rose_route_struct *rose_route, +static int __must_check rose_add_node(struct rose_route_struct *rose_route, struct net_device *dev) { struct rose_node *rose_node, *rose_tmpn, *rose_tmpp; @@ -361,33 +361,30 @@ out: /* * Add the loopback neighbour. */ -int rose_add_loopback_neigh(void) +void rose_add_loopback_neigh(void) { - if ((rose_loopback_neigh = kmalloc(sizeof(struct rose_neigh), GFP_ATOMIC)) == NULL) - return -ENOMEM; + struct rose_neigh *sn = &rose_loopback_neigh; - rose_loopback_neigh->callsign = null_ax25_address; - rose_loopback_neigh->digipeat = NULL; - rose_loopback_neigh->ax25 = NULL; - rose_loopback_neigh->dev = NULL; - rose_loopback_neigh->count = 0; - rose_loopback_neigh->use = 0; - rose_loopback_neigh->dce_mode = 1; - rose_loopback_neigh->loopback = 1; - rose_loopback_neigh->number = rose_neigh_no++; - rose_loopback_neigh->restarted = 1; + sn->callsign = null_ax25_address; + sn->digipeat = NULL; + sn->ax25 = NULL; + sn->dev = NULL; + sn->count = 0; + sn->use = 0; + sn->dce_mode = 1; + sn->loopback = 1; + sn->number = rose_neigh_no++; + sn->restarted = 1; - skb_queue_head_init(&rose_loopback_neigh->queue); + skb_queue_head_init(&sn->queue); - init_timer(&rose_loopback_neigh->ftimer); - init_timer(&rose_loopback_neigh->t0timer); + init_timer(&sn->ftimer); + init_timer(&sn->t0timer); spin_lock_bh(&rose_neigh_list_lock); - rose_loopback_neigh->next = rose_neigh_list; - rose_neigh_list = rose_loopback_neigh; + sn->next = rose_neigh_list; + rose_neigh_list = sn; spin_unlock_bh(&rose_neigh_list_lock); - - return 0; } /* @@ -421,13 +418,13 @@ int rose_add_loopback_node(rose_address *address) rose_node->mask = 10; rose_node->count = 1; rose_node->loopback = 1; - rose_node->neighbour[0] = rose_loopback_neigh; + rose_node->neighbour[0] = &rose_loopback_neigh; /* Insert at the head of list. Address is always mask=10 */ rose_node->next = rose_node_list; rose_node_list = rose_node; - rose_loopback_neigh->count++; + rose_loopback_neigh.count++; out: spin_unlock_bh(&rose_node_list_lock); @@ -458,7 +455,7 @@ void rose_del_loopback_node(rose_address *address) rose_remove_node(rose_node); - rose_loopback_neigh->count--; + rose_loopback_neigh.count--; out: spin_unlock_bh(&rose_node_list_lock); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index a9608064a4c..4c68c718f5e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -52,10 +52,11 @@ static struct tcf_hashinfo ipt_hash_info = { static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { - struct ipt_target *target; + struct xt_target *target; int ret = 0; - target = xt_find_target(AF_INET, t->u.user.name, t->u.user.revision); + target = xt_request_find_target(AF_INET, t->u.user.name, + t->u.user.revision); if (!target) return -ENOENT; @@ -63,9 +64,10 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), table, hook, 0, 0); - if (ret) + if (ret) { + module_put(t->u.kernel.target->me); return ret; - + } if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(table, NULL, t->u.kernel.target, t->data, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ba82dfab604..f79a4f3d0a9 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -371,8 +371,6 @@ static void cbq_deactivate_class(struct cbq_class *this) return; } } - - cl = cl_prev->next_alive; return; } } while ((cl_prev = cl) != q->active[prio]); @@ -1258,6 +1256,8 @@ static unsigned int cbq_drop(struct Qdisc* sch) do { if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) { sch->q.qlen--; + if (!cl->q->q.qlen) + cbq_deactivate_class(cl); return len; } } while ((cl = cl->next_alive) != cl_head); @@ -1685,8 +1685,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, #endif } sch_tree_lock(sch); - *old = cl->q; - cl->q = new; + *old = xchg(&cl->q, new); qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1704,6 +1703,14 @@ cbq_leaf(struct Qdisc *sch, unsigned long arg) return cl ? cl->q : NULL; } +static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct cbq_class *cl = (struct cbq_class *)arg; + + if (cl->q->q.qlen == 0) + cbq_deactivate_class(cl); +} + static unsigned long cbq_get(struct Qdisc *sch, u32 classid) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -1988,12 +1995,17 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class*)arg; + unsigned int qlen; if (cl->filters || cl->children || cl == &q->link) return -EBUSY; sch_tree_lock(sch); + qlen = cl->q->q.qlen; + qdisc_reset(cl->q); + qdisc_tree_decrease_qlen(cl->q, qlen); + if (cl->next_alive) cbq_deactivate_class(cl); @@ -2084,6 +2096,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static struct Qdisc_class_ops cbq_class_ops = { .graft = cbq_graft, .leaf = cbq_leaf, + .qlen_notify = cbq_qlen_notify, .get = cbq_get, .put = cbq_put, .change = cbq_change_class, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index bc116bd6937..3b6e6a78092 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -209,7 +209,7 @@ static void dev_watchdog(unsigned long arg) dev->name); dev->tx_timeout(dev); } - if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) dev_hold(dev); } } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 215e68c2b61..15f23c5511a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -147,6 +147,10 @@ struct htb_class { psched_tdiff_t mbuffer; /* max wait time */ long tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ + + int prio; /* For parent to leaf return possible here */ + int quantum; /* we do backup. Finally full replacement */ + /* of un.leaf originals should be done. */ }; /* TODO: maybe compute rate when size is too large .. or drop ? */ @@ -1271,6 +1275,38 @@ static void htb_destroy_filters(struct tcf_proto **fl) } } +static inline int htb_parent_last_child(struct htb_class *cl) +{ + if (!cl->parent) + /* the root class */ + return 0; + + if (!(cl->parent->children.next == &cl->sibling && + cl->parent->children.prev == &cl->sibling)) + /* not the last child */ + return 0; + + return 1; +} + +static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) +{ + struct htb_class *parent = cl->parent; + + BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); + + parent->level = 0; + memset(&parent->un.inner, 0, sizeof(parent->un.inner)); + INIT_LIST_HEAD(&parent->un.leaf.drop_list); + parent->un.leaf.q = new_q ? new_q : &noop_qdisc; + parent->un.leaf.quantum = parent->quantum; + parent->un.leaf.prio = parent->prio; + parent->tokens = parent->buffer; + parent->ctokens = parent->cbuffer; + PSCHED_GET_TIME(parent->t_c); + parent->cmode = HTB_CAN_SEND; +} + static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); @@ -1328,6 +1364,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; unsigned int qlen; + struct Qdisc *new_q = NULL; + int last_child = 0; // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class @@ -1335,6 +1373,12 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (!list_empty(&cl->children) || cl->filter_cnt) return -EBUSY; + if (!cl->level && htb_parent_last_child(cl)) { + new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->parent->classid); + last_child = 1; + } + sch_tree_lock(sch); /* delete from hash and active; remainder in destroy_class */ @@ -1349,6 +1393,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (cl->prio_activity) htb_deactivate(q, cl); + if (last_child) + htb_parent_to_leaf(cl, new_q); + if (--cl->refcnt == 0) htb_destroy_class(sch, cl); @@ -1483,6 +1530,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->un.leaf.quantum = hopt->quantum; if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO) cl->un.leaf.prio = TC_HTB_NUMPRIO - 1; + + /* backup for htb_parent_to_leaf */ + cl->quantum = cl->un.leaf.quantum; + cl->prio = cl->un.leaf.prio; } cl->buffer = hopt->buffer; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2567b4c96c1..000e043ebd6 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -372,6 +372,20 @@ static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff * return 0; } +static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *cl_q; + + cl_q = q->queues[cl - 1]; + if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 || + gnet_stats_copy_queue(d, &cl_q->qstats) < 0) + return -1; + + return 0; +} + static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct prio_sched_data *q = qdisc_priv(sch); @@ -414,6 +428,7 @@ static struct Qdisc_class_ops prio_class_ops = { .bind_tcf = prio_bind, .unbind_tcf = prio_put, .dump = prio_dump_class, + .dump_stats = prio_dump_class_stats, }; static struct Qdisc_ops prio_qdisc_ops = { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 459cda258a5..82844801e42 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -143,6 +143,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_UDPLITE || iph->protocol == IPPROTO_SCTP || iph->protocol == IPPROTO_DCCP || iph->protocol == IPPROTO_ESP)) @@ -156,6 +157,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_UDPLITE || iph->nexthdr == IPPROTO_SCTP || iph->nexthdr == IPPROTO_DCCP || iph->nexthdr == IPPROTO_ESP) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ad0057db0f9..5db95caed0a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -298,6 +298,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->default_flags = sp->default_flags; asoc->default_context = sp->default_context; asoc->default_timetolive = sp->default_timetolive; + asoc->default_rcv_context = sp->default_rcv_context; return asoc; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 3c3e560087c..ef36be073a1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -78,8 +78,44 @@ #include <asm/uaccess.h> +/* Event handler for inet6 address addition/deletion events. */ +static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, + void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct sctp_sockaddr_entry *addr; + struct list_head *pos, *temp; + + switch (ev) { + case NETDEV_UP: + addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addr) { + addr->a.v6.sin6_family = AF_INET6; + addr->a.v6.sin6_port = 0; + memcpy(&addr->a.v6.sin6_addr, &ifa->addr, + sizeof(struct in6_addr)); + addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; + list_add_tail(&addr->list, &sctp_local_addr_list); + } + break; + case NETDEV_DOWN: + list_for_each_safe(pos, temp, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + list_del(pos); + kfree(addr); + break; + } + } + + break; + } + + return NOTIFY_DONE; +} + static struct notifier_block sctp_inet6addr_notifier = { - .notifier_call = sctp_inetaddr_event, + .notifier_call = sctp_inet6addr_event, }; /* ICMP error handler. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f2ba8615895..0ef48126b11 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -163,7 +163,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void __sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(void) { struct net_device *dev; struct list_head *pos; @@ -179,17 +179,8 @@ static void __sctp_get_local_addr_list(void) read_unlock(&dev_base_lock); } -static void sctp_get_local_addr_list(void) -{ - unsigned long flags; - - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_get_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); -} - /* Free the existing local addresses. */ -static void __sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(void) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; @@ -201,27 +192,15 @@ static void __sctp_free_local_addr_list(void) } } -/* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) -{ - unsigned long flags; - - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_free_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); -} - /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos; - unsigned long flags; + struct list_head *pos, *temp; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if @@ -242,7 +221,6 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return error; } @@ -622,18 +600,36 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. - * Basically, whenever there is an event, we re-build our local address list. - */ -int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, - void *ptr) +/* Event handler for inet address addition/deletion events. */ +static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, + void *ptr) { - unsigned long flags; + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct sctp_sockaddr_entry *addr; + struct list_head *pos, *temp; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - __sctp_free_local_addr_list(); - __sctp_get_local_addr_list(); - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); + switch (ev) { + case NETDEV_UP: + addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addr) { + addr->a.v4.sin_family = AF_INET; + addr->a.v4.sin_port = 0; + addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + list_add_tail(&addr->list, &sctp_local_addr_list); + } + break; + case NETDEV_DOWN: + list_for_each_safe(pos, temp, &sctp_local_addr_list) { + addr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + list_del(pos); + kfree(addr); + break; + } + } + + break; + } return NOTIFY_DONE; } @@ -808,7 +804,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, NIPQUAD(((struct rtable *)skb->dst)->rt_dst)); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, skb->sk, ipfragok); + return ip_queue_xmit(skb, ipfragok); } static struct sctp_af sctp_ipv4_specific; @@ -1172,13 +1168,12 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); + + sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ register_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_get_local_addr_list(); - __unsafe(THIS_MODULE); status = 0; out: @@ -1263,6 +1258,7 @@ module_exit(sctp_exit); * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly. */ MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); +MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); MODULE_LICENSE("GPL"); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 30927d3a597..0b1ddb1005a 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -124,8 +124,8 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, padlen = len % 4; err.length = htons(len); len += padlen; - sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); - chunk->subh.err_hdr = sctp_addto_chunk(chunk, paylen, payload); + chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); + sctp_addto_chunk(chunk, paylen, payload); } /* 3.3.2 Initiation (INIT) (1) @@ -184,7 +184,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, struct sctp_sock *sp; sctp_supported_addrs_param_t sat; __be16 types[2]; - sctp_adaption_ind_param_t aiparam; + sctp_adaptation_ind_param_t aiparam; /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -249,9 +249,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); if (sctp_prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); - aiparam.param_hdr.type = SCTP_PARAM_ADAPTION_LAYER_IND; + aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; aiparam.param_hdr.length = htons(sizeof(aiparam)); - aiparam.adaption_ind = htonl(sp->adaption_ind); + aiparam.adaptation_ind = htonl(sp->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); nodata: kfree(addrs.v); @@ -269,7 +269,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_cookie_param_t *cookie; int cookie_len; size_t chunksize; - sctp_adaption_ind_param_t aiparam; + sctp_adaptation_ind_param_t aiparam; retval = NULL; @@ -323,9 +323,9 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (asoc->peer.prsctp_capable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); - aiparam.param_hdr.type = SCTP_PARAM_ADAPTION_LAYER_IND; + aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; aiparam.param_hdr.length = htons(sizeof(aiparam)); - aiparam.adaption_ind = htonl(sctp_sk(asoc->base.sk)->adaption_ind); + aiparam.adaptation_ind = htonl(sctp_sk(asoc->base.sk)->adaptation_ind); sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); /* We need to remove the const qualifier at this point. */ @@ -1300,8 +1300,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, /* Remember PR-SCTP capability. */ cookie->c.prsctp_capable = asoc->peer.prsctp_capable; - /* Save adaption indication in the cookie. */ - cookie->c.adaption_ind = asoc->peer.adaption_ind; + /* Save adaptation indication in the cookie. */ + cookie->c.adaptation_ind = asoc->peer.adaptation_ind; /* Set an expiration time for the cookie. */ do_gettimeofday(&cookie->c.expiration); @@ -1512,7 +1512,7 @@ no_hmac: retval->addip_serial = retval->c.initial_tsn; retval->adv_peer_ack_point = retval->ctsn_ack_point; retval->peer.prsctp_capable = retval->c.prsctp_capable; - retval->peer.adaption_ind = retval->c.adaption_ind; + retval->peer.adaptation_ind = retval->c.adaptation_ind; /* The INIT stuff will be done by the side effects. */ return retval; @@ -1562,7 +1562,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, if (*errp) { report.num_missing = htonl(1); report.type = paramtype; - sctp_init_cause(*errp, SCTP_ERROR_INV_PARAM, + sctp_init_cause(*errp, SCTP_ERROR_MISS_PARAM, &report, sizeof(report)); } @@ -1743,7 +1743,7 @@ static int sctp_verify_param(const struct sctp_association *asoc, case SCTP_PARAM_HEARTBEAT_INFO: case SCTP_PARAM_UNRECOGNIZED_PARAMETERS: case SCTP_PARAM_ECN_CAPABLE: - case SCTP_PARAM_ADAPTION_LAYER_IND: + case SCTP_PARAM_ADAPTATION_LAYER_IND: break; case SCTP_PARAM_HOST_NAME_ADDRESS: @@ -1775,7 +1775,9 @@ int sctp_verify_init(const struct sctp_association *asoc, /* Verify stream values are non-zero. */ if ((0 == peer_init->init_hdr.num_outbound_streams) || - (0 == peer_init->init_hdr.num_inbound_streams)) { + (0 == peer_init->init_hdr.num_inbound_streams) || + (0 == peer_init->init_hdr.init_tag) || + (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) { sctp_process_inv_mandatory(asoc, chunk, errp); return 0; @@ -2098,8 +2100,8 @@ static int sctp_process_param(struct sctp_association *asoc, asoc->peer.ecn_capable = 1; break; - case SCTP_PARAM_ADAPTION_LAYER_IND: - asoc->peer.adaption_ind = param.aind->adaption_ind; + case SCTP_PARAM_ADAPTATION_LAYER_IND: + asoc->peer.adaptation_ind = param.aind->adaptation_ind; break; case SCTP_PARAM_FWD_TSN_SUPPORT: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 7bbc6156e45..6db77d1329f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,7 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, asoc->peer.sack_needed = 0; - error = sctp_outq_tail(&asoc->outqueue, sack); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(sack)); /* Stop the SACK timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -621,7 +621,13 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. + * If the transport's rto_pending variable has been cleared, + * it was most likely due to a retransmit. However, we want + * to re-enable it to properly update the rto. */ + if (t->rto_pending == 0) + t->rto_pending = 1; + hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 27cc444aaf1..fbbc9e6a3b7 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -440,7 +440,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; sctp_init_chunk_t *initchunk; - __u32 init_tag; struct sctp_chunk *err_chunk; struct sctp_packet *packet; sctp_error_t error; @@ -462,24 +461,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; - init_tag = ntohl(chunk->subh.init_hdr->init_tag); - - /* Verification Tag: 3.3.3 - * If the value of the Initiate Tag in a received INIT ACK - * chunk is found to be 0, the receiver MUST treat it as an - * error and close the association by transmitting an ABORT. - */ - if (!init_tag) { - struct sctp_chunk *reply = sctp_make_abort(asoc, chunk, 0); - if (!reply) - goto nomem; - - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); - return sctp_stop_t1_and_abort(commands, SCTP_ERROR_INV_PARAM, - ECONNREFUSED, asoc, - chunk->transport); - } - /* Verify the INIT chunk before processing it. */ err_chunk = NULL; if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, @@ -550,9 +531,6 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, SCTP_CHUNK(err_chunk)); return SCTP_DISPOSITION_CONSUME; - -nomem: - return SCTP_DISPOSITION_NOMEM; } /* @@ -688,12 +666,12 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, goto nomem_ev; /* Sockets API Draft Section 5.3.1.6 - * When a peer sends a Adaption Layer Indication parameter , SCTP + * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the - * peers requested adaption layer. + * peers requested adaptation layer. */ - if (new_asoc->peer.adaption_ind) { - ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc, + if (new_asoc->peer.adaptation_ind) { + ai_ev = sctp_ulpevent_make_adaptation_indication(new_asoc, GFP_ATOMIC); if (!ai_ev) goto nomem_aiev; @@ -820,12 +798,12 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Sockets API Draft Section 5.3.1.6 - * When a peer sends a Adaption Layer Indication parameter , SCTP + * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the - * peers requested adaption layer. + * peers requested adaptation layer. */ - if (asoc->peer.adaption_ind) { - ev = sctp_ulpevent_make_adaption_indication(asoc, GFP_ATOMIC); + if (asoc->peer.adaptation_ind) { + ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (!ev) goto nomem; @@ -1553,6 +1531,28 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, } +/* + * Unexpected INIT-ACK handler. + * + * Section 5.2.3 + * If an INIT ACK received by an endpoint in any state other than the + * COOKIE-WAIT state, the endpoint should discard the INIT ACK chunk. + * An unexpected INIT ACK usually indicates the processing of an old or + * duplicated INIT chunk. +*/ +sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, sctp_cmd_seq_t *commands) +{ + /* Per the above section, we'll discard the chunk if we have an + * endpoint. If this is an OOTB INIT-ACK, treat it as such. + */ + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + return sctp_sf_ootb(ep, asoc, type, arg, commands); + else + return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); +} /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A') * @@ -1698,12 +1698,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Sockets API Draft Section 5.3.1.6 - * When a peer sends a Adaption Layer Indication parameter , SCTP + * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the - * peers requested adaption layer. + * peers requested adaptation layer. */ - if (asoc->peer.adaption_ind) { - ev = sctp_ulpevent_make_adaption_indication(asoc, GFP_ATOMIC); + if (asoc->peer.adaptation_ind) { + ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (!ev) goto nomem_ev; @@ -1791,12 +1791,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, goto nomem; /* Sockets API Draft Section 5.3.1.6 - * When a peer sends a Adaption Layer Indication parameter, + * When a peer sends a Adaptation Layer Indication parameter, * SCTP delivers this notification to inform the application - * that of the peers requested adaption layer. + * that of the peers requested adaptation layer. */ - if (asoc->peer.adaption_ind) { - ai_ev = sctp_ulpevent_make_adaption_indication(asoc, + if (asoc->peer.adaptation_ind) { + ai_ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); if (!ai_ev) goto nomem; diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 733dd87b3a7..5f6cc7aa661 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -152,7 +152,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_EMPTY */ \ TYPE_SCTP_FUNC(sctp_sf_ootb), \ /* SCTP_STATE_CLOSED */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_5_2_3_initack), \ /* SCTP_STATE_COOKIE_WAIT */ \ TYPE_SCTP_FUNC(sctp_sf_do_5_1C_ack), \ /* SCTP_STATE_COOKIE_ECHOED */ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1e8132b8c4d..388d0fb1a37 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2731,17 +2731,57 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva return err; } -static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval, +static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval, int optlen) { - struct sctp_setadaption adaption; + struct sctp_setadaptation adaptation; - if (optlen != sizeof(struct sctp_setadaption)) + if (optlen != sizeof(struct sctp_setadaptation)) return -EINVAL; - if (copy_from_user(&adaption, optval, optlen)) + if (copy_from_user(&adaptation, optval, optlen)) return -EFAULT; - sctp_sk(sk)->adaption_ind = adaption.ssb_adaption_ind; + sctp_sk(sk)->adaptation_ind = adaptation.ssb_adaptation_ind; + + return 0; +} + +/* + * 7.1.29. Set or Get the default context (SCTP_CONTEXT) + * + * The context field in the sctp_sndrcvinfo structure is normally only + * used when a failed message is retrieved holding the value that was + * sent down on the actual send call. This option allows the setting of + * a default context on an association basis that will be received on + * reading messages from the peer. This is especially helpful in the + * one-2-many model for an application to keep some reference to an + * internal state machine that is processing messages on the + * association. Note that the setting of this value only effects + * received messages from the peer and does not effect the value that is + * saved with outbound messages. + */ +static int sctp_setsockopt_context(struct sock *sk, char __user *optval, + int optlen) +{ + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; + + if (optlen != sizeof(struct sctp_assoc_value)) + return -EINVAL; + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + asoc->default_rcv_context = params.assoc_value; + } else { + sp->default_rcv_context = params.assoc_value; + } return 0; } @@ -2854,8 +2894,11 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_MAXSEG: retval = sctp_setsockopt_maxseg(sk, optval, optlen); break; - case SCTP_ADAPTION_LAYER: - retval = sctp_setsockopt_adaption_layer(sk, optval, optlen); + case SCTP_ADAPTATION_LAYER: + retval = sctp_setsockopt_adaptation_layer(sk, optval, optlen); + break; + case SCTP_CONTEXT: + retval = sctp_setsockopt_context(sk, optval, optlen); break; default: @@ -3016,6 +3059,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_context = 0; sp->default_timetolive = 0; + sp->default_rcv_context = 0; + /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or * overridden by the SCTP_INIT CMSG. @@ -3078,7 +3123,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* User specified fragmentation limit. */ sp->user_frag = 0; - sp->adaption_ind = 0; + sp->adaptation_ind = 0; sp->pf = sctp_get_pf_specific(sk->sk_family); @@ -3821,10 +3866,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, sctp_assoc_t id; struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; + struct list_head *pos, *temp; struct sctp_sockaddr_entry *addr; rwlock_t *addr_lock; - unsigned long flags; int cnt = 0; if (len != sizeof(sctp_assoc_t)) @@ -3859,8 +3903,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); @@ -3869,8 +3912,6 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, continue; cnt++; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); } else { cnt = 1; } @@ -3892,15 +3933,13 @@ done: static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs, void __user *to) { - struct list_head *pos; + struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; - unsigned long flags; union sctp_addr temp; int cnt = 0; int addrlen; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, next, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) @@ -3909,16 +3948,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) { - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); + if (copy_to_user(to, &temp, addrlen)) return -EFAULT; - } + to += addrlen; cnt ++; if (cnt >= max_addrs) break; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return cnt; } @@ -3926,15 +3962,13 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, void __user **to, size_t space_left) { - struct list_head *pos; + struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; - unsigned long flags; union sctp_addr temp; int cnt = 0; int addrlen; - sctp_spin_lock_irqsave(&sctp_local_addr_lock, flags); - list_for_each(pos, &sctp_local_addr_list) { + list_for_each_safe(pos, next, &sctp_local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) @@ -3945,16 +3979,13 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if(space_left<addrlen) return -ENOMEM; - if (copy_to_user(*to, &temp, addrlen)) { - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, - flags); + if (copy_to_user(*to, &temp, addrlen)) return -EFAULT; - } + *to += addrlen; cnt ++; space_left -= addrlen; } - sctp_spin_unlock_irqrestore(&sctp_local_addr_lock, flags); return cnt; } @@ -4179,21 +4210,21 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, } /* - * 7.1.11 Set Adaption Layer Indicator (SCTP_ADAPTION_LAYER) + * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER) * - * Requests that the local endpoint set the specified Adaption Layer + * Requests that the local endpoint set the specified Adaptation Layer * Indication parameter for all future INIT and INIT-ACK exchanges. */ -static int sctp_getsockopt_adaption_layer(struct sock *sk, int len, +static int sctp_getsockopt_adaptation_layer(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_setadaption adaption; + struct sctp_setadaptation adaptation; - if (len != sizeof(struct sctp_setadaption)) + if (len != sizeof(struct sctp_setadaptation)) return -EINVAL; - adaption.ssb_adaption_ind = sctp_sk(sk)->adaption_ind; - if (copy_to_user(optval, &adaption, len)) + adaptation.ssb_adaptation_ind = sctp_sk(sk)->adaptation_ind; + if (copy_to_user(optval, &adaptation, len)) return -EFAULT; return 0; @@ -4435,6 +4466,42 @@ static int sctp_getsockopt_mappedv4(struct sock *sk, int len, } /* + * 7.1.29. Set or Get the default context (SCTP_CONTEXT) + * (chapter and verse is quoted at sctp_setsockopt_context()) + */ +static int sctp_getsockopt_context(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; + + if (len != sizeof(struct sctp_assoc_value)) + return -EINVAL; + + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + params.assoc_value = asoc->default_rcv_context; + } else { + params.assoc_value = sp->default_rcv_context; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + + return 0; +} + +/* * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG) * * This socket option specifies the maximum size to put in any outgoing @@ -4568,10 +4635,13 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_info(sk, len, optval, optlen); break; - case SCTP_ADAPTION_LAYER: - retval = sctp_getsockopt_adaption_layer(sk, len, optval, + case SCTP_ADAPTATION_LAYER: + retval = sctp_getsockopt_adaptation_layer(sk, len, optval, optlen); break; + case SCTP_CONTEXT: + retval = sctp_getsockopt_context(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index e255a709f1b..445e07a7ac4 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -609,31 +609,31 @@ fail: return NULL; } -/* Create and initialize a SCTP_ADAPTION_INDICATION notification. +/* Create and initialize a SCTP_ADAPTATION_INDICATION notification. * * Socket Extensions for SCTP - * 5.3.1.6 SCTP_ADAPTION_INDICATION + * 5.3.1.6 SCTP_ADAPTATION_INDICATION */ -struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication( +struct sctp_ulpevent *sctp_ulpevent_make_adaptation_indication( const struct sctp_association *asoc, gfp_t gfp) { struct sctp_ulpevent *event; - struct sctp_adaption_event *sai; + struct sctp_adaptation_event *sai; struct sk_buff *skb; - event = sctp_ulpevent_new(sizeof(struct sctp_adaption_event), + event = sctp_ulpevent_new(sizeof(struct sctp_adaptation_event), MSG_NOTIFICATION, gfp); if (!event) goto fail; skb = sctp_event2skb(event); - sai = (struct sctp_adaption_event *) - skb_put(skb, sizeof(struct sctp_adaption_event)); + sai = (struct sctp_adaptation_event *) + skb_put(skb, sizeof(struct sctp_adaptation_event)); - sai->sai_type = SCTP_ADAPTION_INDICATION; + sai->sai_type = SCTP_ADAPTATION_INDICATION; sai->sai_flags = 0; - sai->sai_length = sizeof(struct sctp_adaption_event); - sai->sai_adaption_ind = asoc->peer.adaption_ind; + sai->sai_length = sizeof(struct sctp_adaptation_event); + sai->sai_adaptation_ind = asoc->peer.adaptation_ind; sctp_ulpevent_set_owner(event, asoc); sai->sai_assoc_id = sctp_assoc2id(asoc); @@ -849,8 +849,10 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); + /* context value that is set via SCTP_CONTEXT socket option. */ + sinfo.sinfo_context = event->asoc->default_rcv_context; + /* These fields are not used while receiving. */ - sinfo.sinfo_context = 0; sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, diff --git a/net/socket.c b/net/socket.c index 29ea1de43ec..5f374e1ff52 100644 --- a/net/socket.c +++ b/net/socket.c @@ -362,20 +362,20 @@ static int sock_attach_fd(struct socket *sock, struct file *file) this.name = name; this.hash = 0; - file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); - if (unlikely(!file->f_dentry)) + file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); + if (unlikely(!file->f_path.dentry)) return -ENOMEM; - file->f_dentry->d_op = &sockfs_dentry_operations; + file->f_path.dentry->d_op = &sockfs_dentry_operations; /* * We dont want to push this dentry into global dentry hash table. * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED * This permits a working /proc/$pid/fd/XXX on sockets */ - file->f_dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(file->f_dentry, SOCK_INODE(sock)); - file->f_vfsmnt = mntget(sock_mnt); - file->f_mapping = file->f_dentry->d_inode->i_mapping; + file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); + file->f_path.mnt = mntget(sock_mnt); + file->f_mapping = file->f_path.dentry->d_inode->i_mapping; sock->file = file; file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; @@ -407,24 +407,11 @@ int sock_map_fd(struct socket *sock) static struct socket *sock_from_file(struct file *file, int *err) { - struct inode *inode; - struct socket *sock; - if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ - inode = file->f_dentry->d_inode; - if (!S_ISSOCK(inode->i_mode)) { - *err = -ENOTSOCK; - return NULL; - } - - sock = SOCKET_I(inode); - if (sock->file != file) { - printk(KERN_ERR "socki_lookup: socket file changed!\n"); - sock->file = file; - } - return sock; + *err = -ENOTSOCK; + return NULL; } /** @@ -1527,8 +1514,9 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, struct file *sock_file; sock_file = fget_light(fd, &fput_needed); + err = -EBADF; if (!sock_file) - return -EBADF; + goto out; sock = sock_from_file(sock_file, &err); if (!sock) @@ -1555,6 +1543,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, out_put: fput_light(sock_file, fput_needed); +out: return err; } @@ -1586,12 +1575,13 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, int fput_needed; sock_file = fget_light(fd, &fput_needed); + err = -EBADF; if (!sock_file) - return -EBADF; + goto out; sock = sock_from_file(sock_file, &err); if (!sock) - goto out; + goto out_put; msg.msg_control = NULL; msg.msg_controllen = 0; @@ -1610,8 +1600,9 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, if (err2 < 0) err = err2; } -out: +out_put: fput_light(sock_file, fput_needed); +out: return err; } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a02ecc1f230..e1a104abb78 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -501,7 +501,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!buf) goto out; - clnt = RPC_I(filp->f_dentry->d_inode)->private; + clnt = RPC_I(filp->f_path.dentry->d_inode)->private; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 41465072d0b..8ef3f1c1943 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -228,7 +228,7 @@ static int __init init_spkm3_module(void) status = gss_mech_register(&gss_spkm3_mech); if (status) printk("Failed to register spkm3 gss mechanism!\n"); - return 0; + return status; } static void __exit cleanup_spkm3_module(void) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 700353b330f..066c64a97fd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -804,19 +804,19 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) - goto out; + return stat; if (integ_len > buf->len) - goto out; + return stat; if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) BUG(); /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) BUG(); if (mic.len > RPC_MAX_AUTH_SIZE) - goto out; + return stat; mic.data = kmalloc(mic.len, GFP_KERNEL); if (!mic.data) - goto out; + return stat; if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) goto out; maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); @@ -826,6 +826,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; stat = 0; out: + kfree(mic.data); return stat; } @@ -1065,7 +1066,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { case -EAGAIN: - goto drop; + case -ETIMEDOUT: case -ENOENT: goto drop; case 0: diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index d96fd466a9a..14274490f92 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static int cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -185,6 +185,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); * * Returns 0 if the cache_head can be used, or cache_puts it and returns * -EAGAIN if upcall is pending, + * -ETIMEDOUT if upcall failed and should be retried, * -ENOENT if cache entry was negative */ int cache_check(struct cache_detail *detail, @@ -236,7 +237,8 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) - cache_defer_req(rqstp, h); + if (cache_defer_req(rqstp, h) != 0) + rv = -ETIMEDOUT; if (rv) cache_put(h, detail); @@ -523,14 +525,21 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; -static void cache_defer_req(struct cache_req *req, struct cache_head *item) +static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; int hash = DFR_HASH(item); + if (cache_defer_cnt >= DFR_MAX) { + /* too much in the cache, randomly drop this one, + * or continue and drop the oldest below + */ + if (net_random()&1) + return -ETIMEDOUT; + } dreq = req->defer(req); if (dreq == NULL) - return; + return -ETIMEDOUT; dreq->item = item; dreq->recv_time = get_seconds(); @@ -546,17 +555,8 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* it is in, now maybe clean up */ dreq = NULL; if (++cache_defer_cnt > DFR_MAX) { - /* too much in the cache, randomly drop - * first or last - */ - if (net_random()&1) - dreq = list_entry(cache_defer_list.next, - struct cache_deferred_req, - recent); - else - dreq = list_entry(cache_defer_list.prev, - struct cache_deferred_req, - recent); + dreq = list_entry(cache_defer_list.prev, + struct cache_deferred_req, recent); list_del(&dreq->recent); list_del(&dreq->hash); cache_defer_cnt--; @@ -571,6 +571,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) /* must have just been validated... */ cache_revisit_request(item); } + return 0; } static void cache_revisit_request(struct cache_head *item) @@ -670,7 +671,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; int err; if (count == 0) @@ -747,7 +748,7 @@ cache_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { int err; - struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; if (count == 0) return 0; @@ -778,7 +779,7 @@ cache_poll(struct file *filp, poll_table *wait) unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -1254,7 +1255,7 @@ static struct file_operations content_file_operations = { static ssize_t read_flush(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; int len; @@ -1275,7 +1276,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, static ssize_t write_flush(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data; + struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index aba528b9ae7..16c9fbc1db6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -490,16 +490,14 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) /* Set up the call info struct and execute the task */ status = task->tk_status; - if (status != 0) { - rpc_release_task(task); + if (status != 0) goto out; - } atomic_inc(&task->tk_count); status = rpc_execute(task); if (status == 0) status = task->tk_status; - rpc_put_task(task); out: + rpc_put_task(task); rpc_restore_sigmask(&oldset); return status; } @@ -537,7 +535,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, if (status == 0) rpc_execute(task); else - rpc_release_task(task); + rpc_put_task(task); rpc_restore_sigmask(&oldset); return status; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 19703aa9659..89273d35e0c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -214,7 +214,7 @@ out: static ssize_t rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; struct rpc_inode *rpci = RPC_I(inode); struct rpc_pipe_msg *msg; int res = 0; @@ -257,7 +257,7 @@ out_unlock: static ssize_t rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; struct rpc_inode *rpci = RPC_I(inode); int res; @@ -275,7 +275,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) struct rpc_inode *rpci; unsigned int mask = 0; - rpci = RPC_I(filp->f_dentry->d_inode); + rpci = RPC_I(filp->f_path.dentry->d_inode); poll_wait(filp, &rpci->waitq, wait); mask = POLLOUT | POLLWRNORM; @@ -290,7 +290,7 @@ static int rpc_pipe_ioctl(struct inode *ino, struct file *filp, unsigned int cmd, unsigned long arg) { - struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); + struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); int len; switch (cmd) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 79bc4cdf5d4..fc083f0b354 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -42,6 +42,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); static void rpc_async_schedule(struct work_struct *); +static void rpc_release_task(struct rpc_task *task); /* * RPC tasks sit here while waiting for conditions to improve. @@ -896,7 +897,7 @@ void rpc_put_task(struct rpc_task *task) } EXPORT_SYMBOL(rpc_put_task); -void rpc_release_task(struct rpc_task *task) +static void rpc_release_task(struct rpc_task *task) { #ifdef RPC_DEBUG BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index eb44ec929ca..c1f878131ac 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -26,7 +26,6 @@ #include <linux/sunrpc/clnt.h> #define RPCDBG_FACILITY RPCDBG_SVCDSP -#define RPC_PARANOIA 1 /* * Mode for mapping cpus to pools. @@ -308,7 +307,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, serv->sv_nrpools = npools; serv->sv_pools = - kcalloc(sizeof(struct svc_pool), serv->sv_nrpools, + kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), GFP_KERNEL); if (!serv->sv_pools) { kfree(serv); @@ -387,7 +386,7 @@ svc_destroy(struct svc_serv *serv) svsk = list_entry(serv->sv_tempsocks.next, struct svc_sock, sk_list); - svc_delete_socket(svsk); + svc_close_socket(svsk); } if (serv->sv_shutdown) serv->sv_shutdown(serv); @@ -396,7 +395,7 @@ svc_destroy(struct svc_serv *serv) svsk = list_entry(serv->sv_permsocks.next, struct svc_sock, sk_list); - svc_delete_socket(svsk); + svc_close_socket(svsk); } cache_clean_deferred(serv); @@ -872,15 +871,15 @@ svc_process(struct svc_rqst *rqstp) return 0; err_short_len: -#ifdef RPC_PARANOIA - printk("svc: short len %Zd, dropping request\n", argv->iov_len); -#endif + if (net_ratelimit()) + printk("svc: short len %Zd, dropping request\n", argv->iov_len); + goto dropit; /* drop request */ err_bad_dir: -#ifdef RPC_PARANOIA - printk("svc: bad direction %d, dropping request\n", dir); -#endif + if (net_ratelimit()) + printk("svc: bad direction %d, dropping request\n", dir); + serv->sv_stats->rpcbadfmt++; goto dropit; /* drop request */ @@ -909,9 +908,10 @@ err_bad_prog: goto sendit; err_bad_vers: -#ifdef RPC_PARANOIA - printk("svc: unknown version (%d)\n", vers); -#endif + if (net_ratelimit()) + printk("svc: unknown version (%d for prog %d, %s)\n", + vers, prog, progp->pg_name); + serv->sv_stats->rpcbadfmt++; svc_putnl(resv, RPC_PROG_MISMATCH); svc_putnl(resv, progp->pg_lovers); @@ -919,17 +919,17 @@ err_bad_vers: goto sendit; err_bad_proc: -#ifdef RPC_PARANOIA - printk("svc: unknown procedure (%d)\n", proc); -#endif + if (net_ratelimit()) + printk("svc: unknown procedure (%d)\n", proc); + serv->sv_stats->rpcbadfmt++; svc_putnl(resv, RPC_PROC_UNAVAIL); goto sendit; err_garbage: -#ifdef RPC_PARANOIA - printk("svc: failed to decode args\n"); -#endif + if (net_ratelimit()) + printk("svc: failed to decode args\n"); + rpc_stat = rpc_garbage_args; err_bad: serv->sv_stats->rpcbadfmt++; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a0a953a430c..0d1e8fb83b9 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -53,6 +53,10 @@ struct auth_domain *unix_domain_find(char *name) return NULL; kref_init(&new->h.ref); new->h.name = kstrdup(name, GFP_KERNEL); + if (new->h.name == NULL) { + kfree(new); + return NULL; + } new->h.flavour = &svcauth_unix; new->addr_changes = 0; rv = auth_domain_lookup(name, &new->h); @@ -435,6 +439,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) default: BUG(); case -EAGAIN: + case -ETIMEDOUT: return SVC_DROP; case -ENOENT: return SVC_DENIED; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 99f54fb6d66..cf93cd1d857 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -62,6 +62,12 @@ * after a clear, the socket must be read/accepted * if this succeeds, it must be set again. * SK_CLOSE can set at any time. It is never cleared. + * sk_inuse contains a bias of '1' until SK_DEAD is set. + * so when sk_inuse hits zero, we know the socket is dead + * and no-one is using it. + * SK_DEAD can only be set while SK_BUSY is held which ensures + * no other thread will be using the socket or will try to + * set SK_DEAD. * */ @@ -70,6 +76,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int *errp, int pmap_reg); +static void svc_delete_socket(struct svc_sock *svsk); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); @@ -329,8 +336,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space) static inline void svc_sock_put(struct svc_sock *svsk) { - if (atomic_dec_and_test(&svsk->sk_inuse) && - test_bit(SK_DEAD, &svsk->sk_flags)) { + if (atomic_dec_and_test(&svsk->sk_inuse)) { + BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); + dprintk("svc: releasing dead socket\n"); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); @@ -520,7 +528,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) if (!serv) return 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { int onelen = one_sock_name(buf+len, svsk); if (toclose && strcmp(toclose, buf+len) == 0) @@ -528,12 +536,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) else len += onelen; } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... */ - svc_delete_socket(closesk); + svc_close_socket(closesk); else if (toclose) return -ENOENT; return len; @@ -683,6 +691,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } + if (test_bit(SK_CLOSE, &svsk->sk_flags)) { + svc_delete_socket(svsk); + return 0; + } + clear_bit(SK_DATA, &svsk->sk_flags); while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { @@ -1062,15 +1075,19 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) * bit set in the fragment length header. * But apparently no known nfs clients send fragmented * records. */ - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n", - (unsigned long) svsk->sk_reclen); + if (net_ratelimit()) + printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" + " (non-terminal)\n", + (unsigned long) svsk->sk_reclen); goto err_delete; } svsk->sk_reclen &= 0x7fffffff; dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); if (svsk->sk_reclen > serv->sv_max_mesg) { - printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", - (unsigned long) svsk->sk_reclen); + if (net_ratelimit()) + printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx" + " (large)\n", + (unsigned long) svsk->sk_reclen); goto err_delete; } } @@ -1172,7 +1189,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp) rqstp->rq_sock->sk_server->sv_name, (sent<0)?"got error":"sent only", sent, xbufp->len); - svc_delete_socket(rqstp->rq_sock); + set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); + svc_sock_enqueue(rqstp->rq_sock); sent = -EAGAIN; } return sent; @@ -1278,6 +1296,8 @@ svc_recv(struct svc_rqst *rqstp, long timeout) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); rqstp->rq_pages[i] = p; } + rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ + BUG_ON(pages >= RPCSVC_MAXPAGES); /* Make arg->head point to first page and arg->pages point to rest */ arg = &rqstp->rq_arg; @@ -1489,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; svsk->sk_server = serv; - atomic_set(&svsk->sk_inuse, 0); + atomic_set(&svsk->sk_inuse, 1); svsk->sk_lastrecv = get_seconds(); spin_lock_init(&svsk->sk_defer_lock); INIT_LIST_HEAD(&svsk->sk_deferred); @@ -1612,7 +1632,7 @@ bummer: /* * Remove a dead socket */ -void +static void svc_delete_socket(struct svc_sock *svsk) { struct svc_serv *serv; @@ -1638,16 +1658,26 @@ svc_delete_socket(struct svc_sock *svsk) * while still attached to a queue, the queue itself * is about to be destroyed (in svc_destroy). */ - if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) + if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { + BUG_ON(atomic_read(&svsk->sk_inuse)<2); + atomic_dec(&svsk->sk_inuse); if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; + } - /* This atomic_inc should be needed - svc_delete_socket - * should have the semantic of dropping a reference. - * But it doesn't yet.... - */ - atomic_inc(&svsk->sk_inuse); spin_unlock_bh(&serv->sv_lock); +} + +void svc_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) + /* someone else will have to effect the close */ + return; + + atomic_inc(&svsk->sk_inuse); + svc_delete_socket(svsk); + clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_put(svsk); } diff --git a/net/tipc/config.c b/net/tipc/config.c index 458a2c46cef..baf55c459c8 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -208,7 +208,7 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg, if (mng.link_subscriptions > 64) break; - sub = (struct subscr_data *)kmalloc(sizeof(*sub), + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (sub == NULL) { warn("Memory squeeze; dropped remote link subscription\n"); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 746c2f4a5fa..f14ad6635fc 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ atomic_t unix_tot_inflight = ATOMIC_INIT(0); static struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; /* * Socket ? diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 769cdd62c1b..4d90a179aed 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -86,8 +86,8 @@ static int wanrouter_device_del_if(struct wan_device *wandev, static struct wan_device *wanrouter_find_device(char *name); static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); -void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); +static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); +static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); @@ -104,8 +104,8 @@ struct wan_device* wanrouter_router_devlist; /* list of registered devices */ * Organize Unique Identifiers for encapsulation/decapsulation */ -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; #if 0 +static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; #endif @@ -246,6 +246,8 @@ int unregister_wan_device(char *name) return 0; } +#if 0 + /* * Encapsulate packet. * @@ -341,6 +343,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) return ethertype; } +#endif /* 0 */ /* * WAN device IOCTL. @@ -799,23 +802,19 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) return 0; } -void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) +static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) { spin_lock_irqsave(lock, *smp_flags); } -void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) +static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) { spin_unlock_irqrestore(lock, *smp_flags); } EXPORT_SYMBOL(register_wan_device); EXPORT_SYMBOL(unregister_wan_device); -EXPORT_SYMBOL(wanrouter_encapsulate); -EXPORT_SYMBOL(wanrouter_type_trans); -EXPORT_SYMBOL(lock_adapter_irq); -EXPORT_SYMBOL(unlock_adapter_irq); MODULE_LICENSE("GPL"); diff --git a/net/x25/Makefile b/net/x25/Makefile index 587a71aa411..a2c34ab6f19 100644 --- a/net/x25/Makefile +++ b/net/x25/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_X25) += x25.o x25-y := af_x25.o x25_dev.o x25_facilities.o x25_in.o \ x25_link.o x25_out.o x25_route.o x25_subr.o \ - x25_timer.o x25_proc.o + x25_timer.o x25_proc.o x25_forward.o x25-$(CONFIG_SYSCTL) += sysctl_net_x25.o diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 52a2726d327..b37d894358e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -63,6 +63,7 @@ int sysctl_x25_call_request_timeout = X25_DEFAULT_T21; int sysctl_x25_reset_request_timeout = X25_DEFAULT_T22; int sysctl_x25_clear_request_timeout = X25_DEFAULT_T23; int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; +int sysctl_x25_forward = 0; HLIST_HEAD(x25_list); DEFINE_RWLOCK(x25_list_lock); @@ -484,8 +485,6 @@ out: return sk; } -void x25_init_timers(struct sock *sk); - static int x25_create(struct socket *sock, int protocol) { struct sock *sk; @@ -848,7 +847,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, struct x25_address source_addr, dest_addr; struct x25_facilities facilities; struct x25_dte_facilities dte_facilities; - int len, rc; + int len, addr_len, rc; /* * Remove the LCI and frame type. @@ -859,7 +858,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, * Extract the X.25 addresses and convert them to ASCII strings, * and remove them. */ - skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr)); + addr_len = x25_addr_ntoa(skb->data, &source_addr, &dest_addr); + skb_pull(skb, addr_len); /* * Get the length of the facilities, skip past them for the moment @@ -875,11 +875,28 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, sk = x25_find_listener(&source_addr,skb); skb_push(skb,len); + if (sk != NULL && sk_acceptq_is_full(sk)) { + goto out_sock_put; + } + /* - * We can't accept the Call Request. + * We dont have any listeners for this incoming call. + * Try forwarding it. */ - if (sk == NULL || sk_acceptq_is_full(sk)) - goto out_clear_request; + if (sk == NULL) { + skb_push(skb, addr_len + X25_STD_MIN_LEN); + if (sysctl_x25_forward && + x25_forward_call(&dest_addr, nb, skb, lci) > 0) + { + /* Call was forwarded, dont process it any more */ + kfree_skb(skb); + rc = 1; + goto out; + } else { + /* No listeners, can't forward, clear the call */ + goto out_clear_request; + } + } /* * Try to reach a compromise on the requested facilities. @@ -1600,6 +1617,9 @@ void x25_kill_by_neigh(struct x25_neigh *nb) x25_disconnect(s, ENETUNREACH, 0, 0); write_unlock_bh(&x25_list_lock); + + /* Remove any related forwards */ + x25_clear_forward_by_dev(nb->dev); } static int __init x25_init(void) diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index aabda59c824..2b2e7fd689f 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,6 +73,14 @@ static struct ctl_table x25_table[] = { .extra1 = &min_timer, .extra2 = &max_timer, }, + { + .ctl_name = NET_X25_FORWARD, + .procname = "x25_forward", + .data = &sysctl_x25_forward, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { 0, }, }; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 47b68a30167..f099fd6a7c0 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -56,6 +56,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) sk_add_backlog(sk, skb); } bh_unlock_sock(sk); + sock_put(sk); return queued; } @@ -66,9 +67,18 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) return x25_rx_call_request(skb, nb, lci); /* - * Its not a Call Request, nor is it a control frame. - * Let caller throw it away. + * Its not a Call Request, nor is it a control frame. + * Can we forward it? */ + + if (x25_forward_data(lci, nb, skb)) { + if (frametype == X25_CLEAR_CONFIRMATION) { + x25_clear_forward_by_lci(lci); + } + kfree_skb(skb); + return 1; + } + /* x25_transmit_clear_request(nb, lci, 0x0D); */ diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 9f42b9c9de3..27f5cc7966f 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -254,7 +254,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, * They want reverse charging, we won't accept it. */ if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) { - SOCK_DEBUG(sk, "X.25: rejecting reverse charging request"); + SOCK_DEBUG(sk, "X.25: rejecting reverse charging request\n"); return -1; } @@ -262,29 +262,29 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, if (theirs.throughput) { if (theirs.throughput < ours->throughput) { - SOCK_DEBUG(sk, "X.25: throughput negotiated down"); + SOCK_DEBUG(sk, "X.25: throughput negotiated down\n"); new->throughput = theirs.throughput; } } if (theirs.pacsize_in && theirs.pacsize_out) { if (theirs.pacsize_in < ours->pacsize_in) { - SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down"); + SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down\n"); new->pacsize_in = theirs.pacsize_in; } if (theirs.pacsize_out < ours->pacsize_out) { - SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down"); + SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down\n"); new->pacsize_out = theirs.pacsize_out; } } if (theirs.winsize_in && theirs.winsize_out) { if (theirs.winsize_in < ours->winsize_in) { - SOCK_DEBUG(sk, "X.25: window size inwards negotiated down"); + SOCK_DEBUG(sk, "X.25: window size inwards negotiated down\n"); new->winsize_in = theirs.winsize_in; } if (theirs.winsize_out < ours->winsize_out) { - SOCK_DEBUG(sk, "X.25: window size outwards negotiated down"); + SOCK_DEBUG(sk, "X.25: window size outwards negotiated down\n"); new->winsize_out = theirs.winsize_out; } } diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c new file mode 100644 index 00000000000..d339e0c810a --- /dev/null +++ b/net/x25/x25_forward.c @@ -0,0 +1,163 @@ +/* + * This module: + * This module is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * History + * 03-01-2007 Added forwarding for x.25 Andrew Hendry + */ +#include <linux/if_arp.h> +#include <linux/init.h> +#include <net/x25.h> + +struct list_head x25_forward_list = LIST_HEAD_INIT(x25_forward_list); +DEFINE_RWLOCK(x25_forward_list_lock); + +int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, + struct sk_buff *skb, int lci) +{ + struct x25_route *rt; + struct x25_neigh *neigh_new = NULL; + struct list_head *entry; + struct x25_forward *x25_frwd, *new_frwd; + struct sk_buff *skbn; + short same_lci = 0; + int rc = 0; + + if ((rt = x25_get_route(dest_addr)) != NULL) { + + if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) { + /* This shouldnt happen, if it occurs somehow + * do something sensible + */ + goto out_put_route; + } + + /* Avoid a loop. This is the normal exit path for a + * system with only one x.25 iface and default route + */ + if (rt->dev == from->dev) { + goto out_put_nb; + } + + /* Remote end sending a call request on an already + * established LCI? It shouldnt happen, just in case.. + */ + read_lock_bh(&x25_forward_list_lock); + list_for_each(entry, &x25_forward_list) { + x25_frwd = list_entry(entry, struct x25_forward, node); + if (x25_frwd->lci == lci) { + printk(KERN_WARNING "X.25: call request for lci which is already registered!, transmitting but not registering new pair\n"); + same_lci = 1; + } + } + read_unlock_bh(&x25_forward_list_lock); + + /* Save the forwarding details for future traffic */ + if (!same_lci){ + if ((new_frwd = kmalloc(sizeof(struct x25_forward), + GFP_ATOMIC)) == NULL){ + rc = -ENOMEM; + goto out_put_nb; + } + new_frwd->lci = lci; + new_frwd->dev1 = rt->dev; + new_frwd->dev2 = from->dev; + write_lock_bh(&x25_forward_list_lock); + list_add(&new_frwd->node, &x25_forward_list); + write_unlock_bh(&x25_forward_list_lock); + } + + /* Forward the call request */ + if ( (skbn = skb_clone(skb, GFP_ATOMIC)) == NULL){ + goto out_put_nb; + } + x25_transmit_link(skbn, neigh_new); + rc = 1; + } + + +out_put_nb: + x25_neigh_put(neigh_new); + +out_put_route: + x25_route_put(rt); + return rc; +} + + +int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) { + + struct x25_forward *frwd; + struct list_head *entry; + struct net_device *peer = NULL; + struct x25_neigh *nb; + struct sk_buff *skbn; + int rc = 0; + + read_lock_bh(&x25_forward_list_lock); + list_for_each(entry, &x25_forward_list) { + frwd = list_entry(entry, struct x25_forward, node); + if (frwd->lci == lci) { + /* The call is established, either side can send */ + if (from->dev == frwd->dev1) { + peer = frwd->dev2; + } else { + peer = frwd->dev1; + } + break; + } + } + read_unlock_bh(&x25_forward_list_lock); + + if ( (nb = x25_get_neigh(peer)) == NULL) + goto out; + + if ( (skbn = pskb_copy(skb, GFP_ATOMIC)) == NULL){ + goto out; + + } + x25_transmit_link(skbn, nb); + + x25_neigh_put(nb); + rc = 1; +out: + return rc; +} + +void x25_clear_forward_by_lci(unsigned int lci) +{ + struct x25_forward *fwd; + struct list_head *entry, *tmp; + + write_lock_bh(&x25_forward_list_lock); + + list_for_each_safe(entry, tmp, &x25_forward_list) { + fwd = list_entry(entry, struct x25_forward, node); + if (fwd->lci == lci) { + list_del(&fwd->node); + kfree(fwd); + } + } + write_unlock_bh(&x25_forward_list_lock); +} + + +void x25_clear_forward_by_dev(struct net_device *dev) +{ + struct x25_forward *fwd; + struct list_head *entry, *tmp; + + write_lock_bh(&x25_forward_list_lock); + + list_for_each_safe(entry, tmp, &x25_forward_list) { + fwd = list_entry(entry, struct x25_forward, node); + if ((fwd->dev1 == dev) || (fwd->dev2 == dev)){ + list_del(&fwd->node); + kfree(fwd); + } + } + write_unlock_bh(&x25_forward_list_lock); +} diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index a11837d361d..e0470bd8c2f 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -165,6 +165,75 @@ out: return 0; } +static __inline__ struct x25_forward *x25_get_forward_idx(loff_t pos) +{ + struct x25_forward *f; + struct list_head *entry; + + list_for_each(entry, &x25_forward_list) { + f = list_entry(entry, struct x25_forward, node); + if (!pos--) + goto found; + } + + f = NULL; +found: + return f; +} + +static void *x25_seq_forward_start(struct seq_file *seq, loff_t *pos) +{ + loff_t l = *pos; + + read_lock_bh(&x25_forward_list_lock); + return l ? x25_get_forward_idx(--l) : SEQ_START_TOKEN; +} + +static void *x25_seq_forward_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct x25_forward *f; + + ++*pos; + if (v == SEQ_START_TOKEN) { + f = NULL; + if (!list_empty(&x25_forward_list)) + f = list_entry(x25_forward_list.next, + struct x25_forward, node); + goto out; + } + f = v; + if (f->node.next != &x25_forward_list) + f = list_entry(f->node.next, struct x25_forward, node); + else + f = NULL; +out: + return f; + +} + +static void x25_seq_forward_stop(struct seq_file *seq, void *v) +{ + read_unlock_bh(&x25_forward_list_lock); +} + +static int x25_seq_forward_show(struct seq_file *seq, void *v) +{ + struct x25_forward *f; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "lci dev1 dev2\n"); + goto out; + } + + f = v; + + seq_printf(seq, "%d %-10s %-10s\n", + f->lci, f->dev1->name, f->dev2->name); + +out: + return 0; +} + static struct seq_operations x25_seq_route_ops = { .start = x25_seq_route_start, .next = x25_seq_route_next, @@ -179,6 +248,13 @@ static struct seq_operations x25_seq_socket_ops = { .show = x25_seq_socket_show, }; +static struct seq_operations x25_seq_forward_ops = { + .start = x25_seq_forward_start, + .next = x25_seq_forward_next, + .stop = x25_seq_forward_stop, + .show = x25_seq_forward_show, +}; + static int x25_seq_socket_open(struct inode *inode, struct file *file) { return seq_open(file, &x25_seq_socket_ops); @@ -189,6 +265,11 @@ static int x25_seq_route_open(struct inode *inode, struct file *file) return seq_open(file, &x25_seq_route_ops); } +static int x25_seq_forward_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &x25_seq_forward_ops); +} + static struct file_operations x25_seq_socket_fops = { .owner = THIS_MODULE, .open = x25_seq_socket_open, @@ -205,6 +286,14 @@ static struct file_operations x25_seq_route_fops = { .release = seq_release, }; +static struct file_operations x25_seq_forward_fops = { + .owner = THIS_MODULE, + .open = x25_seq_forward_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static struct proc_dir_entry *x25_proc_dir; int __init x25_proc_init(void) @@ -225,9 +314,17 @@ int __init x25_proc_init(void) if (!p) goto out_socket; p->proc_fops = &x25_seq_socket_fops; + + p = create_proc_entry("forward", S_IRUGO, x25_proc_dir); + if (!p) + goto out_forward; + p->proc_fops = &x25_seq_forward_fops; rc = 0; + out: return rc; +out_forward: + remove_proc_entry("socket", x25_proc_dir); out_socket: remove_proc_entry("route", x25_proc_dir); out_route: @@ -237,6 +334,7 @@ out_route: void __exit x25_proc_exit(void) { + remove_proc_entry("forward", x25_proc_dir); remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); remove_proc_entry("x25", proc_net); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 2a3fe986b24..883a848bca5 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -119,6 +119,9 @@ void x25_route_device_down(struct net_device *dev) __x25_remove_route(rt); } write_unlock_bh(&x25_route_list_lock); + + /* Remove any related forwarding */ + x25_clear_forward_by_dev(dev); } /* diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 0faab633258..577a4f821b9 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -24,6 +24,17 @@ config XFRM_SUB_POLICY If unsure, say N. +config XFRM_MIGRATE + bool "Transformation migrate database (EXPERIMENTAL)" + depends on XFRM && EXPERIMENTAL + ---help--- + A feature to update locator(s) of a given IPsec security + association dynamically. This feature is required, for + instance, in a Mobile IPv6 environment with IPsec configuration + where mobile nodes change their attachment point to the Internet. + + If unsure, say N. + config NET_KEY tristate "PF_KEY sockets" select XFRM @@ -34,4 +45,19 @@ config NET_KEY Say Y unless you know what you are doing. +config NET_KEY_MIGRATE + bool "PF_KEY MIGRATE (EXPERIMENTAL)" + depends on NET_KEY && EXPERIMENTAL + select XFRM_MIGRATE + ---help--- + Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. + The PF_KEY MIGRATE message is used to dynamically update + locator(s) of a given IPsec security association. + This feature is required, for instance, in a Mobile IPv6 + environment with IPsec configuration where mobile nodes + change their attachment point to the Internet. Detail + information can be found in the internet-draft + <draft-sugimoto-mip6-pfkey-migrate>. + + If unsure, say N. diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6b381fc0383..248f94814df 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -266,6 +266,23 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { + .name = "cbc(camellia)", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_CAMELLIACBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ .name = "cbc(twofish)", .compat = "twofish", @@ -399,7 +416,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC); + status = crypto_has_alg(list[i].name, type, + mask | CRYPTO_ALG_ASYNC); if (!status) break; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index bebd40e5a62..fa7ce060b45 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -650,19 +650,18 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; - struct hlist_node *entry, *newpos, *last; + struct hlist_node *entry, *newpos; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(&policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; - last = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { - if (!delpol && - pol->type == policy->type && + if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_sec_ctx_match(pol->security, policy->security)) { + xfrm_sec_ctx_match(pol->security, policy->security) && + !WARN_ON(delpol)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; @@ -671,17 +670,12 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - last = &pol->bydst; + newpos = &pol->bydst; continue; } - if (!newpos) - newpos = &pol->bydst; if (delpol) break; - last = &pol->bydst; } - if (!newpos) - newpos = last; if (newpos) hlist_add_after(newpos, &policy->bydst); else @@ -2242,3 +2236,234 @@ void __init xfrm_init(void) xfrm_input_init(); } +#ifdef CONFIG_XFRM_MIGRATE +static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, + struct xfrm_selector *sel_tgt) +{ + if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { + if (sel_tgt->family == sel_cmp->family && + xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr, + sel_cmp->family) == 0 && + xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr, + sel_cmp->family) == 0 && + sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && + sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { + return 1; + } + } else { + if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { + return 1; + } + } + return 0; +} + +static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, + u8 dir, u8 type) +{ + struct xfrm_policy *pol, *ret = NULL; + struct hlist_node *entry; + struct hlist_head *chain; + u32 priority = ~0U; + + read_lock_bh(&xfrm_policy_lock); + chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir); + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_migrate_selector_match(sel, &pol->selector) && + pol->type == type) { + ret = pol; + priority = ret->priority; + break; + } + } + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_migrate_selector_match(sel, &pol->selector) && + pol->type == type && + pol->priority < priority) { + ret = pol; + break; + } + } + + if (ret) + xfrm_pol_hold(ret); + + read_unlock_bh(&xfrm_policy_lock); + + return ret; +} + +static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) +{ + int match = 0; + + if (t->mode == m->mode && t->id.proto == m->proto && + (m->reqid == 0 || t->reqid == m->reqid)) { + switch (t->mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr, + m->old_family) == 0 && + xfrm_addr_cmp(&t->saddr, &m->old_saddr, + m->old_family) == 0) { + match = 1; + } + break; + case XFRM_MODE_TRANSPORT: + /* in case of transport mode, template does not store + any IP addresses, hence we just compare mode and + protocol */ + match = 1; + break; + default: + break; + } + } + return match; +} + +/* update endpoint address(es) of template(s) */ +static int xfrm_policy_migrate(struct xfrm_policy *pol, + struct xfrm_migrate *m, int num_migrate) +{ + struct xfrm_migrate *mp; + struct dst_entry *dst; + int i, j, n = 0; + + write_lock_bh(&pol->lock); + if (unlikely(pol->dead)) { + /* target policy has been deleted */ + write_unlock_bh(&pol->lock); + return -ENOENT; + } + + for (i = 0; i < pol->xfrm_nr; i++) { + for (j = 0, mp = m; j < num_migrate; j++, mp++) { + if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) + continue; + n++; + if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL) + continue; + /* update endpoints */ + memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, + sizeof(pol->xfrm_vec[i].id.daddr)); + memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, + sizeof(pol->xfrm_vec[i].saddr)); + pol->xfrm_vec[i].encap_family = mp->new_family; + /* flush bundles */ + while ((dst = pol->bundles) != NULL) { + pol->bundles = dst->next; + dst_free(dst); + } + } + } + + write_unlock_bh(&pol->lock); + + if (!n) + return -ENODATA; + + return 0; +} + +static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) +{ + int i, j; + + if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < num_migrate; i++) { + if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr, + m[i].old_family) == 0) && + (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr, + m[i].old_family) == 0)) + return -EINVAL; + if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || + xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) + return -EINVAL; + + /* check if there is any duplicated entry */ + for (j = i + 1; j < num_migrate; j++) { + if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, + sizeof(m[i].old_daddr)) && + !memcmp(&m[i].old_saddr, &m[j].old_saddr, + sizeof(m[i].old_saddr)) && + m[i].proto == m[j].proto && + m[i].mode == m[j].mode && + m[i].reqid == m[j].reqid && + m[i].old_family == m[j].old_family) + return -EINVAL; + } + } + + return 0; +} + +int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + int i, err, nx_cur = 0, nx_new = 0; + struct xfrm_policy *pol = NULL; + struct xfrm_state *x, *xc; + struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; + struct xfrm_state *x_new[XFRM_MAX_DEPTH]; + struct xfrm_migrate *mp; + + if ((err = xfrm_migrate_check(m, num_migrate)) < 0) + goto out; + + /* Stage 1 - find policy */ + if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { + err = -ENOENT; + goto out; + } + + /* Stage 2 - find and update state(s) */ + for (i = 0, mp = m; i < num_migrate; i++, mp++) { + if ((x = xfrm_migrate_state_find(mp))) { + x_cur[nx_cur] = x; + nx_cur++; + if ((xc = xfrm_state_migrate(x, mp))) { + x_new[nx_new] = xc; + nx_new++; + } else { + err = -ENODATA; + goto restore_state; + } + } + } + + /* Stage 3 - update policy */ + if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0) + goto restore_state; + + /* Stage 4 - delete old state(s) */ + if (nx_cur) { + xfrm_states_put(x_cur, nx_cur); + xfrm_states_delete(x_cur, nx_cur); + } + + /* Stage 5 - announce */ + km_migrate(sel, dir, type, m, num_migrate); + + xfrm_pol_put(pol); + + return 0; +out: + return err; + +restore_state: + if (pol) + xfrm_pol_put(pol); + if (nx_cur) + xfrm_states_put(x_cur, nx_cur); + if (nx_new) + xfrm_states_delete(x_new, nx_new); + + return err; +} +EXPORT_SYMBOL(xfrm_migrate); +#endif + diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fdb08d9f34a..91b02687db5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -183,9 +183,6 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); - int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); void km_state_expired(struct xfrm_state *x, int hard, u32 pid); @@ -831,6 +828,160 @@ out: } EXPORT_SYMBOL(xfrm_state_add); +#ifdef CONFIG_XFRM_MIGRATE +struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) +{ + int err = -ENOMEM; + struct xfrm_state *x = xfrm_state_alloc(); + if (!x) + goto error; + + memcpy(&x->id, &orig->id, sizeof(x->id)); + memcpy(&x->sel, &orig->sel, sizeof(x->sel)); + memcpy(&x->lft, &orig->lft, sizeof(x->lft)); + x->props.mode = orig->props.mode; + x->props.replay_window = orig->props.replay_window; + x->props.reqid = orig->props.reqid; + x->props.family = orig->props.family; + x->props.saddr = orig->props.saddr; + + if (orig->aalg) { + x->aalg = xfrm_algo_clone(orig->aalg); + if (!x->aalg) + goto error; + } + x->props.aalgo = orig->props.aalgo; + + if (orig->ealg) { + x->ealg = xfrm_algo_clone(orig->ealg); + if (!x->ealg) + goto error; + } + x->props.ealgo = orig->props.ealgo; + + if (orig->calg) { + x->calg = xfrm_algo_clone(orig->calg); + if (!x->calg) + goto error; + } + x->props.calgo = orig->props.calgo; + + if (orig->encap) { + x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL); + if (!x->encap) + goto error; + } + + if (orig->coaddr) { + x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), + GFP_KERNEL); + if (!x->coaddr) + goto error; + } + + err = xfrm_init_state(x); + if (err) + goto error; + + x->props.flags = orig->props.flags; + + x->curlft.add_time = orig->curlft.add_time; + x->km.state = orig->km.state; + x->km.seq = orig->km.seq; + + return x; + + error: + if (errp) + *errp = err; + if (x) { + kfree(x->aalg); + kfree(x->ealg); + kfree(x->calg); + kfree(x->encap); + kfree(x->coaddr); + } + kfree(x); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_clone); + +/* xfrm_state_lock is held */ +struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) +{ + unsigned int h; + struct xfrm_state *x; + struct hlist_node *entry; + + if (m->reqid) { + h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr, + m->reqid, m->old_family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.mode != m->mode || + x->id.proto != m->proto) + continue; + if (m->reqid && x->props.reqid != m->reqid) + continue; + if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, + m->old_family) || + xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, + m->old_family)) + continue; + xfrm_state_hold(x); + return x; + } + } else { + h = xfrm_src_hash(&m->old_daddr, &m->old_saddr, + m->old_family); + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + if (x->props.mode != m->mode || + x->id.proto != m->proto) + continue; + if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr, + m->old_family) || + xfrm_addr_cmp(&x->props.saddr, &m->old_saddr, + m->old_family)) + continue; + xfrm_state_hold(x); + return x; + } + } + + return NULL; +} +EXPORT_SYMBOL(xfrm_migrate_state_find); + +struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x, + struct xfrm_migrate *m) +{ + struct xfrm_state *xc; + int err; + + xc = xfrm_state_clone(x, &err); + if (!xc) + return NULL; + + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); + memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); + + /* add state */ + if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) { + /* a care is needed when the destination address of the + state is to be updated as it is a part of triplet */ + xfrm_state_insert(xc); + } else { + if ((err = xfrm_state_add(xc)) < 0) + goto error; + } + + return xc; +error: + kfree(xc); + return NULL; +} +EXPORT_SYMBOL(xfrm_state_migrate); +#endif + int xfrm_state_update(struct xfrm_state *x) { struct xfrm_state *x1; @@ -1345,6 +1496,26 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->migrate) { + ret = km->migrate(sel, dir, type, m, num_migrate); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_migrate); + int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { int err = -EINVAL; @@ -1458,7 +1629,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); -static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) { struct xfrm_state_afinfo *afinfo; if (unlikely(family >= NPROTO)) @@ -1470,11 +1641,14 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return afinfo; } -static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { read_unlock(&xfrm_state_afinfo_lock); } +EXPORT_SYMBOL(xfrm_state_get_afinfo); +EXPORT_SYMBOL(xfrm_state_put_afinfo); + /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e5372b11fc8..079a5d31575 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -434,18 +434,19 @@ error_no_put: return NULL; } -static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_usersa_info *p = NLMSG_DATA(nlh); struct xfrm_state *x; int err; struct km_event c; - err = verify_newsa_info(p, (struct rtattr **)xfrma); + err = verify_newsa_info(p, xfrma); if (err) return err; - x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err); + x = xfrm_state_construct(p, xfrma, &err); if (!x) return err; @@ -507,14 +508,15 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, return x; } -static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); + x = xfrm_user_state_lookup(p, xfrma, &err); if (x == NULL) return err; @@ -672,14 +674,15 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } -static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); + x = xfrm_user_state_lookup(p, xfrma, &err); if (x == NULL) goto out_noput; @@ -718,7 +721,8 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) return 0; } -static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; struct xfrm_userspi_info *p; @@ -1013,7 +1017,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return NULL; } -static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); struct xfrm_policy *xp; @@ -1024,11 +1029,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr err = verify_newpolicy_info(p); if (err) return err; - err = verify_sec_ctx_len((struct rtattr **)xfrma); + err = verify_sec_ctx_len(xfrma); if (err) return err; - xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err); + xp = xfrm_policy_construct(p, xfrma, &err); if (!xp) return err; @@ -1227,7 +1232,8 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, return skb; } -static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; @@ -1239,7 +1245,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, xfrma); if (err) return err; @@ -1250,11 +1256,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, delete); else { - struct rtattr **rtattrs = (struct rtattr **)xfrma; - struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; + struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(rtattrs); + err = verify_sec_ctx_len(xfrma); if (err) return err; @@ -1302,7 +1307,8 @@ out: return err; } -static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); @@ -1367,7 +1373,8 @@ nlmsg_failure: return -1; } -static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; struct sk_buff *r_skb; @@ -1415,7 +1422,8 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) return err; } -static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; struct km_event c; @@ -1439,7 +1447,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) goto out; spin_lock_bh(&x->lock); - err = xfrm_update_ae_params(x,(struct rtattr **)xfrma); + err = xfrm_update_ae_params(x, xfrma); spin_unlock_bh(&x->lock); if (err < 0) goto out; @@ -1455,14 +1463,15 @@ out: return err; } -static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct xfrm_audit audit_info; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, xfrma); if (err) return err; @@ -1477,7 +1486,8 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x return 0; } -static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); @@ -1485,18 +1495,17 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, xfrma); if (err) return err; if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, 0); else { - struct rtattr **rtattrs = (struct rtattr **)xfrma; - struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; + struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(rtattrs); + err = verify_sec_ctx_len(xfrma); if (err) return err; @@ -1537,7 +1546,8 @@ out: return err; } -static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_state *x; int err; @@ -1568,7 +1578,8 @@ out: return err; } -static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) { struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; @@ -1621,6 +1632,176 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xf return 0; } +#ifdef CONFIG_XFRM_MIGRATE +static int verify_user_migrate(struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct xfrm_user_migrate *um; + + if (!rt) + return -EINVAL; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(*um)) + return -EINVAL; + + return 0; +} + +static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct rtattr **xfrma, int *num) +{ + struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct xfrm_user_migrate *um; + int i, num_migrate; + + um = RTA_DATA(rt); + num_migrate = (rt->rta_len - sizeof(*rt)) / sizeof(*um); + + if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) + return -EINVAL; + + for (i = 0; i < num_migrate; i++, um++, ma++) { + memcpy(&ma->old_daddr, &um->old_daddr, sizeof(ma->old_daddr)); + memcpy(&ma->old_saddr, &um->old_saddr, sizeof(ma->old_saddr)); + memcpy(&ma->new_daddr, &um->new_daddr, sizeof(ma->new_daddr)); + memcpy(&ma->new_saddr, &um->new_saddr, sizeof(ma->new_saddr)); + + ma->proto = um->proto; + ma->mode = um->mode; + ma->reqid = um->reqid; + + ma->old_family = um->old_family; + ma->new_family = um->new_family; + } + + *num = i; + return 0; +} + +static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) +{ + struct xfrm_userpolicy_id *pi = NLMSG_DATA(nlh); + struct xfrm_migrate m[XFRM_MAX_DEPTH]; + u8 type; + int err; + int n = 0; + + err = verify_user_migrate((struct rtattr **)xfrma); + if (err) + return err; + + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + + err = copy_from_user_migrate((struct xfrm_migrate *)m, + (struct rtattr **)xfrma, &n); + if (err) + return err; + + if (!n) + return 0; + + xfrm_migrate(&pi->sel, pi->dir, type, m, n); + + return 0; +} +#else +static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) +{ + return -ENOPROTOOPT; +} +#endif + +#ifdef CONFIG_XFRM_MIGRATE +static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) +{ + struct xfrm_user_migrate um; + + memset(&um, 0, sizeof(um)); + um.proto = m->proto; + um.mode = m->mode; + um.reqid = m->reqid; + um.old_family = m->old_family; + memcpy(&um.old_daddr, &m->old_daddr, sizeof(um.old_daddr)); + memcpy(&um.old_saddr, &m->old_saddr, sizeof(um.old_saddr)); + um.new_family = m->new_family; + memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); + memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); + + RTA_PUT(skb, XFRMA_MIGRATE, sizeof(um), &um); + return 0; + +rtattr_failure: + return -1; +} + +static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, + int num_migrate, struct xfrm_selector *sel, + u8 dir, u8 type) +{ + struct xfrm_migrate *mp; + struct xfrm_userpolicy_id *pol_id; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + int i; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); + pol_id = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + /* copy data from selector, dir, and type to the pol_id */ + memset(pol_id, 0, sizeof(*pol_id)); + memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); + pol_id->dir = dir; + + if (copy_to_user_policy_type(type, skb) < 0) + goto nlmsg_failure; + + for (i = 0, mp = m ; i < num_migrate; i++, mp++) { + if (copy_to_user_migrate(mp, skb) < 0) + goto nlmsg_failure; + } + + nlh->nlmsg_len = skb->tail - b; + return skb->len; +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + struct sk_buff *skb; + size_t len; + + len = RTA_SPACE(sizeof(struct xfrm_user_migrate) * num_migrate); + len += NLMSG_SPACE(sizeof(struct xfrm_userpolicy_id)); +#ifdef CONFIG_XFRM_SUB_POLICY + len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); +#endif + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + /* build migrate */ + if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_MIGRATE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, + GFP_ATOMIC); +} +#else +static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_migrate) +{ + return -ENOPROTOOPT; +} +#endif #define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) @@ -1642,12 +1823,13 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), }; #undef XMSGSIZE static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct rtattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, @@ -1668,6 +1850,7 @@ static struct xfrm_link { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae }, [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, + [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, }; static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) @@ -1735,7 +1918,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err if (link->doit == NULL) goto err_einval; - *errp = link->doit(skb, nlh, (void **) &xfrma); + *errp = link->doit(skb, nlh, xfrma); return *errp; @@ -2274,6 +2457,7 @@ static struct xfrm_mgr netlink_mgr = { .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, .report = xfrm_send_report, + .migrate = xfrm_send_migrate, }; static int __init xfrm_user_init(void) |