diff options
Diffstat (limited to 'net')
66 files changed, 4934 insertions, 2540 deletions
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 12b43345b54..03532062a46 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -308,12 +308,6 @@ static struct net_proto_family bt_sock_family_ops = { .create = bt_sock_create, }; -extern int hci_sock_init(void); -extern int hci_sock_cleanup(void); - -extern int bt_sysfs_init(void); -extern int bt_sysfs_cleanup(void); - static int __init bt_init(void) { BT_INFO("Core ver %s", VERSION); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 55dc42eac92..cf0df1c8c93 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -87,7 +87,7 @@ int hci_unregister_notifier(struct notifier_block *nb) return notifier_chain_unregister(&hci_notifier, nb); } -void hci_notify(struct hci_dev *hdev, int event) +static void hci_notify(struct hci_dev *hdev, int event) { notifier_call_chain(&hci_notifier, event, hdev); } @@ -1347,7 +1347,7 @@ static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } -void hci_rx_task(unsigned long arg) +static void hci_rx_task(unsigned long arg) { struct hci_dev *hdev = (struct hci_dev *) arg; struct sk_buff *skb; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 32ef7975a13..799e448750a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -66,20 +66,20 @@ static struct hci_sec_filter hci_sec_filter = { /* Packet types */ 0x10, /* Events */ - { 0x1000d9fe, 0x0000300c }, + { 0x1000d9fe, 0x0000b00c }, /* Commands */ { { 0x0 }, /* OGF_LINK_CTL */ - { 0xbe000006, 0x00000001, 0x0000, 0x00 }, + { 0xbe000006, 0x00000001, 0x000000, 0x00 }, /* OGF_LINK_POLICY */ - { 0x00005200, 0x00000000, 0x0000, 0x00 }, + { 0x00005200, 0x00000000, 0x000000, 0x00 }, /* OGF_HOST_CTL */ - { 0xaab00200, 0x2b402aaa, 0x0154, 0x00 }, + { 0xaab00200, 0x2b402aaa, 0x020154, 0x00 }, /* OGF_INFO_PARAM */ - { 0x000002be, 0x00000000, 0x0000, 0x00 }, + { 0x000002be, 0x00000000, 0x000000, 0x00 }, /* OGF_STATUS_PARAM */ - { 0x000000ea, 0x00000000, 0x0000, 0x00 } + { 0x000000ea, 0x00000000, 0x000000, 0x00 } } }; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index de8af5f4239..860444a7fc0 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -520,7 +520,7 @@ static int hidp_session(void *arg) if (session->input) { input_unregister_device(session->input); - kfree(session->input); + session->input = NULL; } up_write(&hidp_session_sem); @@ -536,6 +536,8 @@ static inline void hidp_setup_input(struct hidp_session *session, struct hidp_co input->private = session; + input->name = "Bluetooth HID Boot Protocol Device"; + input->id.bustype = BUS_BLUETOOTH; input->id.vendor = req->vendor; input->id.product = req->product; @@ -582,16 +584,15 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, return -ENOTUNIQ; session = kmalloc(sizeof(struct hidp_session), GFP_KERNEL); - if (!session) + if (!session) return -ENOMEM; memset(session, 0, sizeof(struct hidp_session)); - session->input = kmalloc(sizeof(struct input_dev), GFP_KERNEL); + session->input = input_allocate_device(); if (!session->input) { kfree(session); return -ENOMEM; } - memset(session->input, 0, sizeof(struct input_dev)); down_write(&hidp_session_sem); @@ -651,8 +652,10 @@ unlink: __hidp_unlink_session(session); - if (session->input) + if (session->input) { input_unregister_device(session->input); + session->input = NULL; /* don't try to free it here */ + } failed: up_write(&hidp_session_sem); diff --git a/net/bluetooth/rfcomm/Makefile b/net/bluetooth/rfcomm/Makefile index aecec45ec68..fe07988a370 100644 --- a/net/bluetooth/rfcomm/Makefile +++ b/net/bluetooth/rfcomm/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_BT_RFCOMM) += rfcomm.o -rfcomm-y := core.o sock.o crc.o +rfcomm-y := core.o sock.o rfcomm-$(CONFIG_BT_RFCOMM_TTY) += tty.o diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 35adce6482b..c3d56ead840 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -133,6 +133,49 @@ static inline void rfcomm_session_put(struct rfcomm_session *s) /* ---- RFCOMM FCS computation ---- */ +/* reversed, 8-bit, poly=0x07 */ +static unsigned char rfcomm_crc_table[256] = { + 0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75, + 0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b, + 0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69, + 0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67, + + 0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d, + 0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43, + 0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51, + 0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f, + + 0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05, + 0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b, + 0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19, + 0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17, + + 0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d, + 0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33, + 0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21, + 0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f, + + 0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95, + 0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b, + 0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89, + 0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87, + + 0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad, + 0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3, + 0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1, + 0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf, + + 0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5, + 0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb, + 0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9, + 0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7, + + 0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd, + 0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3, + 0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1, + 0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf +}; + /* CRC on 2 bytes */ #define __crc(data) (rfcomm_crc_table[rfcomm_crc_table[0xff ^ data[0]] ^ data[1]]) diff --git a/net/bluetooth/rfcomm/crc.c b/net/bluetooth/rfcomm/crc.c deleted file mode 100644 index 1011bc4a869..00000000000 --- a/net/bluetooth/rfcomm/crc.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - RFCOMM implementation for Linux Bluetooth stack (BlueZ). - Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> - Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -/* - * RFCOMM FCS calculation. - * - * $Id: crc.c,v 1.2 2002/09/21 09:54:32 holtmann Exp $ - */ - -/* reversed, 8-bit, poly=0x07 */ -unsigned char rfcomm_crc_table[256] = { - 0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75, - 0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b, - 0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69, - 0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67, - - 0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d, - 0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43, - 0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51, - 0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f, - - 0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05, - 0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b, - 0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19, - 0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17, - - 0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d, - 0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33, - 0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21, - 0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f, - - 0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95, - 0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b, - 0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89, - 0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87, - - 0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad, - 0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3, - 0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1, - 0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf, - - 0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5, - 0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb, - 0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9, - 0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7, - - 0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd, - 0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3, - 0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1, - 0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf -}; diff --git a/net/core/dev.c b/net/core/dev.c index a44eeef24ed..8d154159527 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2717,6 +2717,20 @@ int register_netdevice(struct net_device *dev) dev->name); dev->features &= ~NETIF_F_TSO; } + if (dev->features & NETIF_F_UFO) { + if (!(dev->features & NETIF_F_HW_CSUM)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_HW_CSUM feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + if (!(dev->features & NETIF_F_SG)) { + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " + "NETIF_F_SG feature.\n", + dev->name); + dev->features &= ~NETIF_F_UFO; + } + } /* * nil rebuild_header routine, diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 404b761e82c..0350586e919 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -93,6 +93,20 @@ int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *a } +u32 ethtool_op_get_ufo(struct net_device *dev) +{ + return (dev->features & NETIF_F_UFO) != 0; +} + +int ethtool_op_set_ufo(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_UFO; + else + dev->features &= ~NETIF_F_UFO; + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -483,6 +497,11 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return err; } + if (!data && dev->ethtool_ops->set_ufo) { + err = dev->ethtool_ops->set_ufo(dev, 0); + if (err) + return err; + } return dev->ethtool_ops->set_sg(dev, data); } @@ -569,6 +588,32 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tso(dev, edata.data); } +static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTSO }; + + if (!dev->ethtool_ops->get_ufo) + return -EOPNOTSUPP; + edata.data = dev->ethtool_ops->get_ufo(dev); + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} +static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (!dev->ethtool_ops->set_ufo) + return -EOPNOTSUPP; + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if (edata.data && !(dev->features & NETIF_F_SG)) + return -EINVAL; + if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + return -EINVAL; + return dev->ethtool_ops->set_ufo(dev, edata.data); +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -854,6 +899,12 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; + case ETHTOOL_GUFO: + rc = ethtool_get_ufo(dev, useraddr); + break; + case ETHTOOL_SUFO: + rc = ethtool_set_ufo(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } @@ -882,3 +933,5 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_ufo); +EXPORT_SYMBOL(ethtool_op_get_ufo); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1dcf7fa1f0f..e68700f950a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1625,12 +1625,9 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, memset(&ndst, 0, sizeof(ndst)); - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu) { struct neigh_statistics *st; - if (!cpu_possible(cpu)) - continue; - st = per_cpu_ptr(tbl->stats, cpu); ndst.ndts_allocs += st->allocs; ndst.ndts_destroys += st->destroys; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 5f043d34669..7fc3e9e28c3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -75,7 +75,7 @@ * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" * to /proc gives result code thats should be read be the "writer". - * For pratical use this should be no problem. + * For practical use this should be no problem. * * Note when adding devices to a specific CPU there good idea to also assign * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. @@ -96,7 +96,7 @@ * New xmit() return, do_div and misc clean up by Stephen Hemminger * <shemminger@osdl.org> 040923 * - * Rany Dunlap fixed u64 printk compiler waring + * Randy Dunlap fixed u64 printk compiler waring * * Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org> * New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213 @@ -137,6 +137,7 @@ #include <linux/ipv6.h> #include <linux/udp.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/wait.h> #include <net/checksum.h> #include <net/ipv6.h> @@ -151,7 +152,7 @@ #include <asm/timex.h> -#define VERSION "pktgen v2.62: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.63: Packet Generator for packet performance testing.\n" /* #define PG_DEBUG(a) a */ #define PG_DEBUG(a) @@ -177,8 +178,8 @@ #define T_REMDEV (1<<3) /* Remove all devs */ /* Locks */ -#define thread_lock() spin_lock(&_thread_lock) -#define thread_unlock() spin_unlock(&_thread_lock) +#define thread_lock() down(&pktgen_sem) +#define thread_unlock() up(&pktgen_sem) /* If lock -- can be removed after some work */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -186,7 +187,9 @@ /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "net/pktgen" +#define PG_PROC_DIR "pktgen" +#define PGCTRL "pgctrl" +static struct proc_dir_entry *pg_proc_dir = NULL; #define MAX_CFLOWS 65536 @@ -202,11 +205,8 @@ struct pktgen_dev { * Try to keep frequent/infrequent used vars. separated. */ - char ifname[32]; - struct proc_dir_entry *proc_ent; + char ifname[IFNAMSIZ]; char result[512]; - /* proc file names */ - char fname[80]; struct pktgen_thread* pg_thread; /* the owner */ struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */ @@ -244,7 +244,7 @@ struct pktgen_dev { __u32 seq_num; int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many coppies of the same + * is greater than 1, then that many copies of the same * packet will be sent before a new packet is allocated. * For instance, if you want to send 1024 identical packets * before creating a new packet, set clone_skb to 1024. @@ -330,8 +330,6 @@ struct pktgen_thread { struct pktgen_dev *if_list; /* All device here */ struct pktgen_thread* next; char name[32]; - char fname[128]; /* name of proc file */ - struct proc_dir_entry *proc_ent; char result[512]; u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ @@ -396,7 +394,7 @@ static inline s64 divremdi3(s64 x, s64 y, int type) /* End of hacks to deal with 64-bit math on x86 */ -/** Convert to miliseconds */ +/** Convert to milliseconds */ static inline __u64 tv_to_ms(const struct timeval* tv) { __u64 ms = tv->tv_usec / 1000; @@ -425,7 +423,7 @@ static inline __u64 pg_div64(__u64 n, __u64 base) { __u64 tmp = n; /* - * How do we know if the architectrure we are running on + * How do we know if the architecture we are running on * supports division with 64 bit base? * */ @@ -473,16 +471,6 @@ static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) static char version[] __initdata = VERSION; -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos); -static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos); -static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); - -static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); -static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int create_proc_dir(void); -static int remove_proc_dir(void); - static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); static struct pktgen_thread* pktgen_find_thread(const char* name); @@ -503,83 +491,41 @@ static int pg_delay_d = 0; static int pg_clone_skb_d = 0; static int debug = 0; -static DEFINE_SPINLOCK(_thread_lock); +static DECLARE_MUTEX(pktgen_sem); static struct pktgen_thread *pktgen_threads = NULL; -static char module_fname[128]; -static struct proc_dir_entry *module_proc_ent = NULL; - static struct notifier_block pktgen_notifier_block = { .notifier_call = pktgen_device_event, }; -static struct file_operations pktgen_fops = { - .read = proc_pgctrl_read, - .write = proc_pgctrl_write, - /* .ioctl = pktgen_ioctl, later maybe */ -}; - /* * /proc handling functions * */ -static struct proc_dir_entry *pg_proc_dir = NULL; -static int proc_pgctrl_read_eof=0; - -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, - size_t count, loff_t *ppos) +static int pgctrl_show(struct seq_file *seq, void *v) { - char data[200]; - int len = 0; - - if(proc_pgctrl_read_eof) { - proc_pgctrl_read_eof=0; - len = 0; - goto out; - } - - sprintf(data, "%s", VERSION); - - len = strlen(data); - - if(len > count) { - len =-EFAULT; - goto out; - } - - if (copy_to_user(buf, data, len)) { - len =-EFAULT; - goto out; - } - - *ppos += len; - proc_pgctrl_read_eof=1; /* EOF next call */ - - out: - return len; + seq_puts(seq, VERSION); + return 0; } -static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t pgctrl_write(struct file* file,const char __user * buf, + size_t count, loff_t *ppos) { - char *data = NULL; int err = 0; + char data[128]; if (!capable(CAP_NET_ADMIN)){ err = -EPERM; goto out; } - data = (void*)vmalloc ((unsigned int)count); + if (count > sizeof(data)) + count = sizeof(data); - if(!data) { - err = -ENOMEM; - goto out; - } if (copy_from_user(data, buf, count)) { - err =-EFAULT; - goto out_free; + err = -EFAULT; + goto out; } data[count-1] = 0; /* Make string */ @@ -594,31 +540,40 @@ static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, err = count; - out_free: - vfree (data); out: return err; } -static int proc_if_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) +static int pgctrl_open(struct inode *inode, struct file *file) +{ + return single_open(file, pgctrl_show, PDE(inode)->data); +} + +static struct file_operations pktgen_fops = { + .owner = THIS_MODULE, + .open = pgctrl_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pgctrl_write, + .release = single_release, +}; + +static int pktgen_if_show(struct seq_file *seq, void *v) { - char *p; int i; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); + struct pktgen_dev *pkt_dev = seq->private; __u64 sa; __u64 stopped; __u64 now = getCurUs(); - p = buf; - p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", - (unsigned long long) pkt_dev->count, - pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); + seq_printf(seq, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", + (unsigned long long) pkt_dev->count, + pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); - p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); + seq_printf(seq, " frags: %d delay: %u clone_skb: %d ifname: %s\n", + pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); - p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); if(pkt_dev->flags & F_IPV6) { @@ -626,19 +581,19 @@ static int proc_if_read(char *buf , char **start, off_t offset, fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); - p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); + seq_printf(seq, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); - p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); + seq_printf(seq, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); } else - p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", - pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); + seq_printf(seq," dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", + pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); - p += sprintf(p, " src_mac: "); + seq_puts(seq, " src_mac: "); if ((pkt_dev->src_mac[0] == 0) && (pkt_dev->src_mac[1] == 0) && @@ -648,89 +603,89 @@ static int proc_if_read(char *buf , char **start, off_t offset, (pkt_dev->src_mac[5] == 0)) for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); + seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); else for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); + seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); - p += sprintf(p, "dst_mac: "); + seq_printf(seq, "dst_mac: "); for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); + seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); - p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", - pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, - pkt_dev->udp_dst_max); + seq_printf(seq, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", + pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, + pkt_dev->udp_dst_max); - p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ", - pkt_dev->src_mac_count, pkt_dev->dst_mac_count); + seq_printf(seq, " src_mac_count: %d dst_mac_count: %d \n Flags: ", + pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->flags & F_IPV6) - p += sprintf(p, "IPV6 "); + seq_printf(seq, "IPV6 "); if (pkt_dev->flags & F_IPSRC_RND) - p += sprintf(p, "IPSRC_RND "); + seq_printf(seq, "IPSRC_RND "); if (pkt_dev->flags & F_IPDST_RND) - p += sprintf(p, "IPDST_RND "); + seq_printf(seq, "IPDST_RND "); if (pkt_dev->flags & F_TXSIZE_RND) - p += sprintf(p, "TXSIZE_RND "); + seq_printf(seq, "TXSIZE_RND "); if (pkt_dev->flags & F_UDPSRC_RND) - p += sprintf(p, "UDPSRC_RND "); + seq_printf(seq, "UDPSRC_RND "); if (pkt_dev->flags & F_UDPDST_RND) - p += sprintf(p, "UDPDST_RND "); + seq_printf(seq, "UDPDST_RND "); if (pkt_dev->flags & F_MACSRC_RND) - p += sprintf(p, "MACSRC_RND "); + seq_printf(seq, "MACSRC_RND "); if (pkt_dev->flags & F_MACDST_RND) - p += sprintf(p, "MACDST_RND "); + seq_printf(seq, "MACDST_RND "); - p += sprintf(p, "\n"); + seq_puts(seq, "\n"); sa = pkt_dev->started_at; stopped = pkt_dev->stopped_at; if (pkt_dev->running) stopped = now; /* not really stopped, more like last-running-at */ - p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", - (unsigned long long) pkt_dev->sofar, - (unsigned long long) pkt_dev->errors, - (unsigned long long) sa, - (unsigned long long) stopped, - (unsigned long long) pkt_dev->idle_acc); + seq_printf(seq, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + (unsigned long long) pkt_dev->sofar, + (unsigned long long) pkt_dev->errors, + (unsigned long long) sa, + (unsigned long long) stopped, + (unsigned long long) pkt_dev->idle_acc); - p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", - pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset); + seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", + pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, + pkt_dev->cur_src_mac_offset); if(pkt_dev->flags & F_IPV6) { char b1[128], b2[128]; fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); - p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1); + seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1); } else - p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n", - pkt_dev->cur_saddr, pkt_dev->cur_daddr); + seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", + pkt_dev->cur_saddr, pkt_dev->cur_daddr); - p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n", - pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", + pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); - p += sprintf(p, " flows: %u\n", pkt_dev->nflows); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) - p += sprintf(p, "Result: %s\n", pkt_dev->result); + seq_printf(seq, "Result: %s\n", pkt_dev->result); else - p += sprintf(p, "Result: Idle\n"); - *eof = 1; + seq_printf(seq, "Result: Idle\n"); - return p - buf; + return 0; } @@ -802,13 +757,14 @@ done_str: return i; } -static int proc_if_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) +static ssize_t pktgen_if_write(struct file *file, const char __user *user_buffer, + size_t count, loff_t *offset) { + struct seq_file *seq = (struct seq_file *) file->private_data; + struct pktgen_dev *pkt_dev = seq->private; int i = 0, max, len; char name[16], valstr[32]; unsigned long value = 0; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); char* pg_result = NULL; int tmp = 0; char buf[128]; @@ -849,7 +805,8 @@ static int proc_if_write(struct file *file, const char __user *user_buffer, if (copy_from_user(tb, user_buffer, count)) return -EFAULT; tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb); + printk("pktgen: %s,%lu buffer -:%s:-\n", name, + (unsigned long) count, tb); } if (!strcmp(name, "min_pkt_size")) { @@ -1335,92 +1292,98 @@ static int proc_if_write(struct file *file, const char __user *user_buffer, return -EINVAL; } -static int proc_thread_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) +static int pktgen_if_open(struct inode *inode, struct file *file) { - char *p; - struct pktgen_thread *t = (struct pktgen_thread*)(data); - struct pktgen_dev *pkt_dev = NULL; + return single_open(file, pktgen_if_show, PDE(inode)->data); +} +static struct file_operations pktgen_if_fops = { + .owner = THIS_MODULE, + .open = pktgen_if_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pktgen_if_write, + .release = single_release, +}; - if (!t) { - printk("pktgen: ERROR: could not find thread in proc_thread_read\n"); - return -EINVAL; - } +static int pktgen_thread_show(struct seq_file *seq, void *v) +{ + struct pktgen_thread *t = seq->private; + struct pktgen_dev *pkt_dev = NULL; + + BUG_ON(!t); - p = buf; - p += sprintf(p, "Name: %s max_before_softirq: %d\n", + seq_printf(seq, "Name: %s max_before_softirq: %d\n", t->name, t->max_before_softirq); - p += sprintf(p, "Running: "); + seq_printf(seq, "Running: "); if_lock(t); for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) if(pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); + seq_printf(seq, "%s ", pkt_dev->ifname); - p += sprintf(p, "\nStopped: "); + seq_printf(seq, "\nStopped: "); for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) if(!pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); + seq_printf(seq, "%s ", pkt_dev->ifname); if (t->result[0]) - p += sprintf(p, "\nResult: %s\n", t->result); + seq_printf(seq, "\nResult: %s\n", t->result); else - p += sprintf(p, "\nResult: NA\n"); - - *eof = 1; + seq_printf(seq, "\nResult: NA\n"); if_unlock(t); - return p - buf; + return 0; } -static int proc_thread_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) +static ssize_t pktgen_thread_write(struct file *file, + const char __user *user_buffer, + size_t count, loff_t *offset) { + struct seq_file *seq = (struct seq_file *) file->private_data; + struct pktgen_thread *t = seq->private; int i = 0, max, len, ret; char name[40]; - struct pktgen_thread *t; char *pg_result; unsigned long value = 0; - + if (count < 1) { // sprintf(pg_result, "Wrong command format"); return -EINVAL; } - + max = count - i; len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - + if (len < 0) + return len; + i += len; - + /* Read variable name */ len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) - return len; + if (len < 0) + return len; memset(name, 0, sizeof(name)); if (copy_from_user(name, &user_buffer[i], len)) return -EFAULT; i += len; - + max = count -i; len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - + if (len < 0) + return len; + i += len; - if (debug) - printk("pktgen: t=%s, count=%lu\n", name, count); - + if (debug) + printk("pktgen: t=%s, count=%lu\n", name, + (unsigned long) count); - t = (struct pktgen_thread*)(data); if(!t) { printk("pktgen: ERROR: No thread\n"); ret = -EINVAL; @@ -1474,21 +1437,19 @@ static int proc_thread_write(struct file *file, const char __user *user_buffer, return ret; } -static int create_proc_dir(void) +static int pktgen_thread_open(struct inode *inode, struct file *file) { - pg_proc_dir = proc_mkdir(PG_PROC_DIR, NULL); - - if (!pg_proc_dir) - return -ENODEV; - - return 0; + return single_open(file, pktgen_thread_show, PDE(inode)->data); } -static int remove_proc_dir(void) -{ - remove_proc_entry(PG_PROC_DIR, NULL); - return 0; -} +static struct file_operations pktgen_thread_fops = { + .owner = THIS_MODULE, + .open = pktgen_thread_open, + .read = seq_read, + .llseek = seq_lseek, + .write = pktgen_thread_write, + .release = single_release, +}; /* Think find or remove for NN */ static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) @@ -1702,7 +1663,7 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) start = now = getCurUs(); printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); while (now < spin_until_us) { - /* TODO: optimise sleeping behavior */ + /* TODO: optimize sleeping behavior */ if (spin_until_us - now > jiffies_to_usecs(1)+1) schedule_timeout_interruptible(1); else if (spin_until_us - now > 100) { @@ -2361,7 +2322,7 @@ static void pktgen_stop_all_threads_ifs(void) pktgen_stop(t); t = t->next; } - thread_unlock(); + thread_unlock(); } static int thread_is_running(struct pktgen_thread *t ) @@ -2552,10 +2513,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t) struct pktgen_thread *tmp = pktgen_threads; - if (strlen(t->fname)) - remove_proc_entry(t->fname, NULL); + remove_proc_entry(t->name, pg_proc_dir); - thread_lock(); + thread_lock(); if (tmp == t) pktgen_threads = tmp->next; @@ -2825,7 +2785,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* i if_lock(t); for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { - if (strcmp(pkt_dev->ifname, ifname) == 0) { + if (strncmp(pkt_dev->ifname, ifname, IFNAMSIZ) == 0) { break; } } @@ -2864,74 +2824,70 @@ static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) { struct pktgen_dev *pkt_dev; + struct proc_dir_entry *pe; /* We don't allow a device to be on several threads */ - if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { - - pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL); - if (!pkt_dev) - return -ENOMEM; + pkt_dev = __pktgen_NN_threads(ifname, FIND); + if (pkt_dev) { + printk("pktgen: ERROR: interface already used.\n"); + return -EBUSY; + } - memset(pkt_dev, 0, sizeof(struct pktgen_dev)); + pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); + if (!pkt_dev) + return -ENOMEM; - pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); - if (pkt_dev->flows == NULL) { - kfree(pkt_dev); - return -ENOMEM; - } - memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); - - pkt_dev->min_pkt_size = ETH_ZLEN; - pkt_dev->max_pkt_size = ETH_ZLEN; - pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; - pkt_dev->count = pg_count_d; - pkt_dev->sofar = 0; - pkt_dev->udp_src_min = 9; /* sink port */ - pkt_dev->udp_src_max = 9; - pkt_dev->udp_dst_min = 9; - pkt_dev->udp_dst_max = 9; - - strncpy(pkt_dev->ifname, ifname, 31); - sprintf(pkt_dev->fname, "%s/%s", PG_PROC_DIR, ifname); - - if (! pktgen_setup_dev(pkt_dev)) { - printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); - return -ENODEV; - } + pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); + if (pkt_dev->flows == NULL) { + kfree(pkt_dev); + return -ENOMEM; + } + memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); - pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL); - if (!pkt_dev->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); - return -EINVAL; - } - pkt_dev->proc_ent->read_proc = proc_if_read; - pkt_dev->proc_ent->write_proc = proc_if_write; - pkt_dev->proc_ent->data = (void*)(pkt_dev); - pkt_dev->proc_ent->owner = THIS_MODULE; + pkt_dev->min_pkt_size = ETH_ZLEN; + pkt_dev->max_pkt_size = ETH_ZLEN; + pkt_dev->nfrags = 0; + pkt_dev->clone_skb = pg_clone_skb_d; + pkt_dev->delay_us = pg_delay_d / 1000; + pkt_dev->delay_ns = pg_delay_d % 1000; + pkt_dev->count = pg_count_d; + pkt_dev->sofar = 0; + pkt_dev->udp_src_min = 9; /* sink port */ + pkt_dev->udp_src_max = 9; + pkt_dev->udp_dst_min = 9; + pkt_dev->udp_dst_max = 9; + + strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); + + if (! pktgen_setup_dev(pkt_dev)) { + printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); + if (pkt_dev->flows) + vfree(pkt_dev->flows); + kfree(pkt_dev); + return -ENODEV; + } + + pe = create_proc_entry(ifname, 0600, pg_proc_dir); + if (!pe) { + printk("pktgen: cannot create %s/%s procfs entry.\n", + PG_PROC_DIR, ifname); + if (pkt_dev->flows) + vfree(pkt_dev->flows); + kfree(pkt_dev); + return -EINVAL; + } + pe->proc_fops = &pktgen_if_fops; + pe->data = pkt_dev; - return add_dev_to_thread(t, pkt_dev); - } - else { - printk("pktgen: ERROR: interface already used.\n"); - return -EBUSY; - } + return add_dev_to_thread(t, pkt_dev); } static struct pktgen_thread *pktgen_find_thread(const char* name) { struct pktgen_thread *t = NULL; - thread_lock(); + thread_lock(); t = pktgen_threads; while (t) { @@ -2947,6 +2903,7 @@ static struct pktgen_thread *pktgen_find_thread(const char* name) static int pktgen_create_thread(const char* name, int cpu) { struct pktgen_thread *t = NULL; + struct proc_dir_entry *pe; if (strlen(name) > 31) { printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); @@ -2958,28 +2915,26 @@ static int pktgen_create_thread(const char* name, int cpu) return -EINVAL; } - t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL)); + t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); if (!t) { printk("pktgen: ERROR: out of memory, can't create new thread.\n"); return -ENOMEM; } - memset(t, 0, sizeof(struct pktgen_thread)); strcpy(t->name, name); spin_lock_init(&t->if_lock); t->cpu = cpu; - sprintf(t->fname, "%s/%s", PG_PROC_DIR, t->name); - t->proc_ent = create_proc_entry(t->fname, 0600, NULL); - if (!t->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", t->fname); + pe = create_proc_entry(t->name, 0600, pg_proc_dir); + if (!pe) { + printk("pktgen: cannot create %s/%s procfs entry.\n", + PG_PROC_DIR, t->name); kfree(t); return -EINVAL; } - t->proc_ent->read_proc = proc_thread_read; - t->proc_ent->write_proc = proc_thread_write; - t->proc_ent->data = (void*)(t); - t->proc_ent->owner = THIS_MODULE; + + pe->proc_fops = &pktgen_thread_fops; + pe->data = t; t->next = pktgen_threads; pktgen_threads = t; @@ -3034,8 +2989,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_ /* Clean up proc file system */ - if (strlen(pkt_dev->fname)) - remove_proc_entry(pkt_dev->fname, NULL); + remove_proc_entry(pkt_dev->ifname, pg_proc_dir); if (pkt_dev->flows) vfree(pkt_dev->flows); @@ -3046,31 +3000,31 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_ static int __init pg_init(void) { int cpu; - printk(version); + struct proc_dir_entry *pe; - module_fname[0] = 0; + printk(version); - create_proc_dir(); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + if (!pg_proc_dir) + return -ENODEV; + pg_proc_dir->owner = THIS_MODULE; - sprintf(module_fname, "%s/pgctrl", PG_PROC_DIR); - module_proc_ent = create_proc_entry(module_fname, 0600, NULL); - if (!module_proc_ent) { - printk("pktgen: ERROR: cannot create %s procfs entry.\n", module_fname); + pe = create_proc_entry(PGCTRL, 0600, pg_proc_dir); + if (pe == NULL) { + printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL); + proc_net_remove(PG_PROC_DIR); return -EINVAL; } - module_proc_ent->proc_fops = &pktgen_fops; - module_proc_ent->data = NULL; + pe->proc_fops = &pktgen_fops; + pe->data = NULL; /* Register us to receive netdevice events */ register_netdevice_notifier(&pktgen_notifier_block); - for (cpu = 0; cpu < NR_CPUS ; cpu++) { + for_each_online_cpu(cpu) { char buf[30]; - if (!cpu_online(cpu)) - continue; - sprintf(buf, "kpktgend_%i", cpu); pktgen_create_thread(buf, cpu); } @@ -3095,10 +3049,8 @@ static void __exit pg_cleanup(void) unregister_netdevice_notifier(&pktgen_notifier_block); /* Clean up proc file system */ - - remove_proc_entry(module_fname, NULL); - - remove_proc_dir(); + remove_proc_entry(PGCTRL, pg_proc_dir); + proc_net_remove(PG_PROC_DIR); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 02cd4cde211..95501e40100 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -122,6 +122,8 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask + * @fclone: allocate from fclone cache instead of head cache + * and allocate a cloned (child) skb * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -174,6 +176,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb_shinfo(skb)->tso_size = 0; skb_shinfo(skb)->tso_segs = 0; skb_shinfo(skb)->frag_list = NULL; + skb_shinfo(skb)->ufo_size = 0; + skb_shinfo(skb)->ip6_frag_id = 0; out: return skb; nodata: @@ -1694,6 +1698,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, return textsearch_find(config, state); } +/** + * skb_append_datato_frags: - append the user data to a skb + * @sk: sock structure + * @skb: skb structure to be appened with user data. + * @getfrag: call back function to be used for getting the user data + * @from: pointer to user message iov + * @length: length of the iov message + * + * Description: This procedure append the user data in the fragment part + * of the skb if any page alloc fails user this procedure returns -ENOMEM + */ +int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length) +{ + int frg_cnt = 0; + skb_frag_t *frag = NULL; + struct page *page = NULL; + int copy, left; + int offset = 0; + int ret; + + do { + /* Return error if we don't have space for new frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + if (frg_cnt >= MAX_SKB_FRAGS) + return -EFAULT; + + /* allocate a new page for next frag */ + page = alloc_pages(sk->sk_allocation, 0); + + /* If alloc_page fails just return failure and caller will + * free previous allocated pages by doing kfree_skb() + */ + if (page == NULL) + return -ENOMEM; + + /* initialize the next frag */ + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; + skb_fill_page_desc(skb, frg_cnt, page, 0, 0); + skb->truesize += PAGE_SIZE; + atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); + + /* get the new initialized frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; + + /* copy the user data to page */ + left = PAGE_SIZE - frag->page_offset; + copy = (length > left)? left : length; + + ret = getfrag(from, (page_address(frag->page) + + frag->page_offset + frag->size), + offset, copy, 0, skb); + if (ret < 0) + return -EFAULT; + + /* copy was successful so update the size parameters */ + sk->sk_sndmsg_off += copy; + frag->size += copy; + skb->len += copy; + skb->data_len += copy; + offset += copy; + length -= copy; + + } while (length > 0); + + return 0; +} + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", @@ -1745,3 +1821,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_append_datato_frags); diff --git a/net/core/sock.c b/net/core/sock.c index 1c52fe809ed..9602ceb3bac 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -940,7 +940,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, int noblock, int *errcode) { struct sk_buff *skb; - unsigned int gfp_mask; + gfp_t gfp_mask; long timeo; int err; diff --git a/net/dccp/output.c b/net/dccp/output.c index 29250749f16..d59f86f7cea 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -495,7 +495,7 @@ void dccp_send_close(struct sock *sk, const int active) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; + const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC; skb = alloc_skb(sk->sk_prot->max_header, prio); if (skb == NULL) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 1186dc44cdf..3f25cadccdd 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -719,22 +719,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (saddr->sdn_flags & ~SDF_WILD) return -EINVAL; -#if 1 if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum || (saddr->sdn_flags & SDF_WILD))) return -EACCES; -#else - /* - * Maybe put the default actions in the default security ops for - * dn_prot_sock ? Would be nice if the capable call would go there - * too. - */ - if (security_dn_prot_sock(saddr) && - !capable(CAP_NET_BIND_SERVICE) || - saddr->sdn_objnum || (saddr->sdn_flags & SDF_WILD)) - return -EACCES; -#endif - if (!(saddr->sdn_flags & SDF_WILD)) { if (dn_ntohs(saddr->sdn_nodeaddrl)) { diff --git a/net/ieee80211/Makefile b/net/ieee80211/Makefile index a6ccac5baea..f988417121d 100644 --- a/net/ieee80211/Makefile +++ b/net/ieee80211/Makefile @@ -7,5 +7,6 @@ ieee80211-objs := \ ieee80211_module.o \ ieee80211_tx.o \ ieee80211_rx.o \ - ieee80211_wx.o + ieee80211_wx.o \ + ieee80211_geo.o diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c index 61a9d92e455..f3b6aa3be63 100644 --- a/net/ieee80211/ieee80211_crypt.c +++ b/net/ieee80211/ieee80211_crypt.c @@ -41,6 +41,12 @@ void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) { struct list_head *ptr, *n; struct ieee80211_crypt_data *entry; + unsigned long flags; + + spin_lock_irqsave(&ieee->lock, flags); + + if (list_empty(&ieee->crypt_deinit_list)) + goto unlock; for (ptr = ieee->crypt_deinit_list.next, n = ptr->next; ptr != &ieee->crypt_deinit_list; ptr = n, n = ptr->next) { @@ -57,6 +63,18 @@ void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) } kfree(entry); } + unlock: + spin_unlock_irqrestore(&ieee->lock, flags); +} + +/* After this, crypt_deinit_list won't accept new members */ +void ieee80211_crypt_quiescing(struct ieee80211_device *ieee) +{ + unsigned long flags; + + spin_lock_irqsave(&ieee->lock, flags); + ieee->crypt_quiesced = 1; + spin_unlock_irqrestore(&ieee->lock, flags); } void ieee80211_crypt_deinit_handler(unsigned long data) @@ -64,16 +82,16 @@ void ieee80211_crypt_deinit_handler(unsigned long data) struct ieee80211_device *ieee = (struct ieee80211_device *)data; unsigned long flags; - spin_lock_irqsave(&ieee->lock, flags); ieee80211_crypt_deinit_entries(ieee, 0); - if (!list_empty(&ieee->crypt_deinit_list)) { + + spin_lock_irqsave(&ieee->lock, flags); + if (!list_empty(&ieee->crypt_deinit_list) && !ieee->crypt_quiesced) { printk(KERN_DEBUG "%s: entries remaining in delayed crypt " "deletion list\n", ieee->dev->name); ieee->crypt_deinit_timer.expires = jiffies + HZ; add_timer(&ieee->crypt_deinit_timer); } spin_unlock_irqrestore(&ieee->lock, flags); - } void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, @@ -93,10 +111,12 @@ void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, * locking. */ spin_lock_irqsave(&ieee->lock, flags); - list_add(&tmp->list, &ieee->crypt_deinit_list); - if (!timer_pending(&ieee->crypt_deinit_timer)) { - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); + if (!ieee->crypt_quiesced) { + list_add(&tmp->list, &ieee->crypt_deinit_list); + if (!timer_pending(&ieee->crypt_deinit_timer)) { + ieee->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&ieee->crypt_deinit_timer); + } } spin_unlock_irqrestore(&ieee->lock, flags); } @@ -191,18 +211,18 @@ static void ieee80211_crypt_null_deinit(void *priv) } static struct ieee80211_crypto_ops ieee80211_crypt_null = { - .name = "NULL", - .init = ieee80211_crypt_null_init, - .deinit = ieee80211_crypt_null_deinit, - .encrypt_mpdu = NULL, - .decrypt_mpdu = NULL, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = NULL, - .get_key = NULL, - .extra_prefix_len = 0, - .extra_postfix_len = 0, - .owner = THIS_MODULE, + .name = "NULL", + .init = ieee80211_crypt_null_init, + .deinit = ieee80211_crypt_null_deinit, + .encrypt_mpdu = NULL, + .decrypt_mpdu = NULL, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = NULL, + .get_key = NULL, + .extra_mpdu_prefix_len = 0, + .extra_mpdu_postfix_len = 0, + .owner = THIS_MODULE, }; static int __init ieee80211_crypto_init(void) @@ -249,6 +269,7 @@ static void __exit ieee80211_crypto_deinit(void) EXPORT_SYMBOL(ieee80211_crypt_deinit_entries); EXPORT_SYMBOL(ieee80211_crypt_deinit_handler); EXPORT_SYMBOL(ieee80211_crypt_delayed_deinit); +EXPORT_SYMBOL(ieee80211_crypt_quiescing); EXPORT_SYMBOL(ieee80211_register_crypto_ops); EXPORT_SYMBOL(ieee80211_unregister_crypto_ops); diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 8fc13f45971..05a853c1301 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -119,7 +119,7 @@ static inline void xor_block(u8 * b, u8 * a, size_t len) } static void ccmp_init_blocks(struct crypto_tfm *tfm, - struct ieee80211_hdr *hdr, + struct ieee80211_hdr_4addr *hdr, u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) { u8 *pos, qc = 0; @@ -191,26 +191,18 @@ static void ccmp_init_blocks(struct crypto_tfm *tfm, ieee80211_ccmp_aes_encrypt(tfm, b0, s0); } -static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_ccmp_data *key = priv; - int data_len, i, blocks, last, len; - u8 *pos, *mic; - struct ieee80211_hdr *hdr; - u8 *b0 = key->tx_b0; - u8 *b = key->tx_b; - u8 *e = key->tx_e; - u8 *s0 = key->tx_s0; + int i; + u8 *pos; - if (skb_headroom(skb) < CCMP_HDR_LEN || - skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) + if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) return -1; - data_len = skb->len - hdr_len; pos = skb_push(skb, CCMP_HDR_LEN); memmove(pos, pos + CCMP_HDR_LEN, hdr_len); pos += hdr_len; - mic = skb_put(skb, CCMP_MIC_LEN); i = CCMP_PN_LEN - 1; while (i >= 0) { @@ -229,7 +221,31 @@ static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) *pos++ = key->tx_pn[1]; *pos++ = key->tx_pn[0]; - hdr = (struct ieee80211_hdr *)skb->data; + return CCMP_HDR_LEN; +} + +static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct ieee80211_ccmp_data *key = priv; + int data_len, i, blocks, last, len; + u8 *pos, *mic; + struct ieee80211_hdr_4addr *hdr; + u8 *b0 = key->tx_b0; + u8 *b = key->tx_b; + u8 *e = key->tx_e; + u8 *s0 = key->tx_s0; + + if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) + return -1; + + data_len = skb->len - hdr_len; + len = ieee80211_ccmp_hdr(skb, hdr_len, priv); + if (len < 0) + return -1; + + pos = skb->data + hdr_len + CCMP_HDR_LEN; + mic = skb_put(skb, CCMP_MIC_LEN); + hdr = (struct ieee80211_hdr_4addr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; @@ -258,7 +274,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_ccmp_data *key = priv; u8 keyidx, *pos; - struct ieee80211_hdr *hdr; + struct ieee80211_hdr_4addr *hdr; u8 *b0 = key->rx_b0; u8 *b = key->rx_b; u8 *a = key->rx_a; @@ -272,7 +288,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return -1; } - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_4addr *)skb->data; pos = skb->data + hdr_len; keyidx = pos[3]; if (!(keyidx & (1 << 5))) { @@ -426,19 +442,20 @@ static char *ieee80211_ccmp_print_stats(char *p, void *priv) } static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = { - .name = "CCMP", - .init = ieee80211_ccmp_init, - .deinit = ieee80211_ccmp_deinit, - .encrypt_mpdu = ieee80211_ccmp_encrypt, - .decrypt_mpdu = ieee80211_ccmp_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = ieee80211_ccmp_set_key, - .get_key = ieee80211_ccmp_get_key, - .print_stats = ieee80211_ccmp_print_stats, - .extra_prefix_len = CCMP_HDR_LEN, - .extra_postfix_len = CCMP_MIC_LEN, - .owner = THIS_MODULE, + .name = "CCMP", + .init = ieee80211_ccmp_init, + .deinit = ieee80211_ccmp_deinit, + .build_iv = ieee80211_ccmp_hdr, + .encrypt_mpdu = ieee80211_ccmp_encrypt, + .decrypt_mpdu = ieee80211_ccmp_decrypt, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = ieee80211_ccmp_set_key, + .get_key = ieee80211_ccmp_get_key, + .print_stats = ieee80211_ccmp_print_stats, + .extra_mpdu_prefix_len = CCMP_HDR_LEN, + .extra_mpdu_postfix_len = CCMP_MIC_LEN, + .owner = THIS_MODULE, }; static int __init ieee80211_crypto_ccmp_init(void) diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index d4f9164be1a..2e34f29b795 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -59,8 +59,24 @@ struct ieee80211_tkip_data { /* scratch buffers for virt_to_page() (crypto API) */ u8 rx_hdr[16], tx_hdr[16]; + + unsigned long flags; }; +static unsigned long ieee80211_tkip_set_flags(unsigned long flags, void *priv) +{ + struct ieee80211_tkip_data *_priv = priv; + unsigned long old_flags = _priv->flags; + _priv->flags = flags; + return old_flags; +} + +static unsigned long ieee80211_tkip_get_flags(void *priv) +{ + struct ieee80211_tkip_data *_priv = priv; + return _priv->flags; +} + static void *ieee80211_tkip_init(int key_idx) { struct ieee80211_tkip_data *priv; @@ -69,6 +85,7 @@ static void *ieee80211_tkip_init(int key_idx) if (priv == NULL) goto fail; memset(priv, 0, sizeof(*priv)); + priv->key_idx = key_idx; priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0); @@ -255,25 +272,27 @@ static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, #endif } -static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +static u8 *ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; int len; - u8 rc4key[16], *pos, *icv; - struct ieee80211_hdr *hdr; + u8 *rc4key, *pos, *icv; + struct ieee80211_hdr_4addr *hdr; u32 crc; - struct scatterlist sg; - if (skb_headroom(skb) < 8 || skb_tailroom(skb) < 4 || - skb->len < hdr_len) - return -1; + hdr = (struct ieee80211_hdr_4addr *)skb->data; + + if (skb_headroom(skb) < 8 || skb->len < hdr_len) + return NULL; - hdr = (struct ieee80211_hdr *)skb->data; if (!tkey->tx_phase1_done) { tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, tkey->tx_iv32); tkey->tx_phase1_done = 1; } + rc4key = kmalloc(16, GFP_ATOMIC); + if (!rc4key) + return NULL; tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); len = skb->len - hdr_len; @@ -282,9 +301,9 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += hdr_len; icv = skb_put(skb, 4); - *pos++ = rc4key[0]; - *pos++ = rc4key[1]; - *pos++ = rc4key[2]; + *pos++ = *rc4key; + *pos++ = *(rc4key + 1); + *pos++ = *(rc4key + 2); *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; *pos++ = tkey->tx_iv32 & 0xff; *pos++ = (tkey->tx_iv32 >> 8) & 0xff; @@ -297,6 +316,38 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[2] = crc >> 16; icv[3] = crc >> 24; + return rc4key; +} + +static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct ieee80211_tkip_data *tkey = priv; + int len; + const u8 *rc4key; + u8 *pos; + struct scatterlist sg; + + if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { + if (net_ratelimit()) { + struct ieee80211_hdr_4addr *hdr = + (struct ieee80211_hdr_4addr *)skb->data; + printk(KERN_DEBUG "TKIP countermeasures: dropped " + "TX packet to " MAC_FMT "\n", + MAC_ARG(hdr->addr1)); + } + return -1; + } + + if (skb_tailroom(skb) < 4 || skb->len < hdr_len) + return -1; + + len = skb->len - hdr_len; + pos = skb->data + hdr_len; + + rc4key = ieee80211_tkip_hdr(skb, hdr_len, priv); + if (!rc4key) + return -1; + crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); @@ -319,16 +370,26 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) u8 keyidx, *pos; u32 iv32; u16 iv16; - struct ieee80211_hdr *hdr; + struct ieee80211_hdr_4addr *hdr; u8 icv[4]; u32 crc; struct scatterlist sg; int plen; + hdr = (struct ieee80211_hdr_4addr *)skb->data; + + if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP countermeasures: dropped " + "received packet from " MAC_FMT "\n", + MAC_ARG(hdr->addr2)); + } + return -1; + } + if (skb->len < hdr_len + 8 + 4) return -1; - hdr = (struct ieee80211_hdr *)skb->data; pos = skb->data + hdr_len; keyidx = pos[3]; if (!(keyidx & (1 << 5))) { @@ -441,9 +502,9 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) { - struct ieee80211_hdr *hdr11; + struct ieee80211_hdr_4addr *hdr11; - hdr11 = (struct ieee80211_hdr *)skb->data; + hdr11 = (struct ieee80211_hdr_4addr *)skb->data; switch (le16_to_cpu(hdr11->frame_ctl) & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { case IEEE80211_FCTL_TODS: @@ -490,9 +551,9 @@ static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, return 0; } -#if WIRELESS_EXT >= 18 static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, int keyidx) + struct ieee80211_hdr_4addr *hdr, + int keyidx) { union iwreq_data wrqu; struct iw_michaelmicfailure ev; @@ -510,28 +571,6 @@ static void ieee80211_michael_mic_failure(struct net_device *dev, wrqu.data.length = sizeof(ev); wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); } -#elif WIRELESS_EXT >= 15 -static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, int keyidx) -{ - union iwreq_data wrqu; - char buf[128]; - - /* TODO: needed parameters: count, keyid, key type, TSC */ - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(keyid=%d %scast addr=" - MAC_FMT ")", keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); -} -#else /* WIRELESS_EXT >= 15 */ -static inline void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr *hdr, - int keyidx) -{ -} -#endif /* WIRELESS_EXT >= 15 */ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, int hdr_len, void *priv) @@ -547,8 +586,8 @@ static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) return -1; if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { - struct ieee80211_hdr *hdr; - hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_hdr_4addr *hdr; + hdr = (struct ieee80211_hdr_4addr *)skb->data; printk(KERN_DEBUG "%s: Michael MIC verification failed for " "MSDU from " MAC_FMT " keyidx=%d\n", skb->dev ? skb->dev->name : "N/A", MAC_ARG(hdr->addr2), @@ -654,19 +693,22 @@ static char *ieee80211_tkip_print_stats(char *p, void *priv) } static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { - .name = "TKIP", - .init = ieee80211_tkip_init, - .deinit = ieee80211_tkip_deinit, - .encrypt_mpdu = ieee80211_tkip_encrypt, - .decrypt_mpdu = ieee80211_tkip_decrypt, - .encrypt_msdu = ieee80211_michael_mic_add, - .decrypt_msdu = ieee80211_michael_mic_verify, - .set_key = ieee80211_tkip_set_key, - .get_key = ieee80211_tkip_get_key, - .print_stats = ieee80211_tkip_print_stats, - .extra_prefix_len = 4 + 4, /* IV + ExtIV */ - .extra_postfix_len = 8 + 4, /* MIC + ICV */ - .owner = THIS_MODULE, + .name = "TKIP", + .init = ieee80211_tkip_init, + .deinit = ieee80211_tkip_deinit, + .encrypt_mpdu = ieee80211_tkip_encrypt, + .decrypt_mpdu = ieee80211_tkip_decrypt, + .encrypt_msdu = ieee80211_michael_mic_add, + .decrypt_msdu = ieee80211_michael_mic_verify, + .set_key = ieee80211_tkip_set_key, + .get_key = ieee80211_tkip_get_key, + .print_stats = ieee80211_tkip_print_stats, + .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ + .extra_mpdu_postfix_len = 4, /* ICV */ + .extra_msdu_postfix_len = 8, /* MIC */ + .get_flags = ieee80211_tkip_get_flags, + .set_flags = ieee80211_tkip_set_flags, + .owner = THIS_MODULE, }; static int __init ieee80211_crypto_tkip_init(void) diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index b4d2514a090..7c08ed2f262 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -229,19 +229,19 @@ static char *prism2_wep_print_stats(char *p, void *priv) } static struct ieee80211_crypto_ops ieee80211_crypt_wep = { - .name = "WEP", - .init = prism2_wep_init, - .deinit = prism2_wep_deinit, - .encrypt_mpdu = prism2_wep_encrypt, - .decrypt_mpdu = prism2_wep_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = prism2_wep_set_key, - .get_key = prism2_wep_get_key, - .print_stats = prism2_wep_print_stats, - .extra_prefix_len = 4, /* IV */ - .extra_postfix_len = 4, /* ICV */ - .owner = THIS_MODULE, + .name = "WEP", + .init = prism2_wep_init, + .deinit = prism2_wep_deinit, + .encrypt_mpdu = prism2_wep_encrypt, + .decrypt_mpdu = prism2_wep_decrypt, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = prism2_wep_set_key, + .get_key = prism2_wep_get_key, + .print_stats = prism2_wep_print_stats, + .extra_mpdu_prefix_len = 4, /* IV */ + .extra_mpdu_postfix_len = 4, /* ICV */ + .owner = THIS_MODULE, }; static int __init ieee80211_crypto_wep_init(void) diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c new file mode 100644 index 00000000000..c4b54ef8f6d --- /dev/null +++ b/net/ieee80211/ieee80211_geo.c @@ -0,0 +1,141 @@ +/****************************************************************************** + + Copyright(c) 2005 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + The full GNU General Public License is included in this distribution in the + file called LICENSE. + + Contact Information: + James P. Ketrenos <ipw2100-admin@linux.intel.com> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +******************************************************************************/ +#include <linux/compiler.h> +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/if_arp.h> +#include <linux/in6.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/proc_fs.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/tcp.h> +#include <linux/types.h> +#include <linux/version.h> +#include <linux/wireless.h> +#include <linux/etherdevice.h> +#include <asm/uaccess.h> + +#include <net/ieee80211.h> + +int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel) +{ + int i; + + /* Driver needs to initialize the geography map before using + * these helper functions */ + BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); + + if (ieee->freq_band & IEEE80211_24GHZ_BAND) + for (i = 0; i < ieee->geo.bg_channels; i++) + /* NOTE: If G mode is currently supported but + * this is a B only channel, we don't see it + * as valid. */ + if ((ieee->geo.bg[i].channel == channel) && + (!(ieee->mode & IEEE_G) || + !(ieee->geo.bg[i].flags & IEEE80211_CH_B_ONLY))) + return IEEE80211_24GHZ_BAND; + + if (ieee->freq_band & IEEE80211_52GHZ_BAND) + for (i = 0; i < ieee->geo.a_channels; i++) + if (ieee->geo.a[i].channel == channel) + return IEEE80211_52GHZ_BAND; + + return 0; +} + +int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel) +{ + int i; + + /* Driver needs to initialize the geography map before using + * these helper functions */ + BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); + + if (ieee->freq_band & IEEE80211_24GHZ_BAND) + for (i = 0; i < ieee->geo.bg_channels; i++) + if (ieee->geo.bg[i].channel == channel) + return i; + + if (ieee->freq_band & IEEE80211_52GHZ_BAND) + for (i = 0; i < ieee->geo.a_channels; i++) + if (ieee->geo.a[i].channel == channel) + return i; + + return -1; +} + +u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq) +{ + int i; + + /* Driver needs to initialize the geography map before using + * these helper functions */ + BUG_ON(ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0); + + freq /= 100000; + + if (ieee->freq_band & IEEE80211_24GHZ_BAND) + for (i = 0; i < ieee->geo.bg_channels; i++) + if (ieee->geo.bg[i].freq == freq) + return ieee->geo.bg[i].channel; + + if (ieee->freq_band & IEEE80211_52GHZ_BAND) + for (i = 0; i < ieee->geo.a_channels; i++) + if (ieee->geo.a[i].freq == freq) + return ieee->geo.a[i].channel; + + return 0; +} + +int ieee80211_set_geo(struct ieee80211_device *ieee, + const struct ieee80211_geo *geo) +{ + memcpy(ieee->geo.name, geo->name, 3); + ieee->geo.name[3] = '\0'; + ieee->geo.bg_channels = geo->bg_channels; + ieee->geo.a_channels = geo->a_channels; + memcpy(ieee->geo.bg, geo->bg, geo->bg_channels * + sizeof(struct ieee80211_channel)); + memcpy(ieee->geo.a, geo->a, ieee->geo.a_channels * + sizeof(struct ieee80211_channel)); + return 0; +} + +const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device *ieee) +{ + return &ieee->geo; +} + +EXPORT_SYMBOL(ieee80211_is_valid_channel); +EXPORT_SYMBOL(ieee80211_freq_to_channel); +EXPORT_SYMBOL(ieee80211_channel_to_index); +EXPORT_SYMBOL(ieee80211_set_geo); +EXPORT_SYMBOL(ieee80211_get_geo); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 6059e9e3712..f66d792cd20 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -1,6 +1,6 @@ /******************************************************************************* - Copyright(c) 2004 Intel Corporation. All rights reserved. + Copyright(c) 2004-2005 Intel Corporation. All rights reserved. Portions of this file are based on the WEP enablement code provided by the Host AP project hostap-drivers v0.1.3 @@ -53,12 +53,15 @@ #include <net/ieee80211.h> -MODULE_DESCRIPTION("802.11 data/management/control stack"); -MODULE_AUTHOR - ("Copyright (C) 2004 Intel Corporation <jketreno@linux.intel.com>"); -MODULE_LICENSE("GPL"); +#define DRV_DESCRIPTION "802.11 data/management/control stack" +#define DRV_NAME "ieee80211" +#define DRV_VERSION IEEE80211_VERSION +#define DRV_COPYRIGHT "Copyright (C) 2004-2005 Intel Corporation <jketreno@linux.intel.com>" -#define DRV_NAME "ieee80211" +MODULE_VERSION(DRV_VERSION); +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR(DRV_COPYRIGHT); +MODULE_LICENSE("GPL"); static inline int ieee80211_networks_allocate(struct ieee80211_device *ieee) { @@ -126,26 +129,34 @@ struct net_device *alloc_ieee80211(int sizeof_priv) /* Default fragmentation threshold is maximum payload size */ ieee->fts = DEFAULT_FTS; + ieee->rts = DEFAULT_FTS; ieee->scan_age = DEFAULT_MAX_SCAN_AGE; ieee->open_wep = 1; /* Default to enabling full open WEP with host based encrypt/decrypt */ ieee->host_encrypt = 1; ieee->host_decrypt = 1; + ieee->host_mc_decrypt = 1; + + /* Host fragementation in Open mode. Default is enabled. + * Note: host fragmentation is always enabled if host encryption + * is enabled. For cards can do hardware encryption, they must do + * hardware fragmentation as well. So we don't need a variable + * like host_enc_frag. */ + ieee->host_open_frag = 1; ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ INIT_LIST_HEAD(&ieee->crypt_deinit_list); init_timer(&ieee->crypt_deinit_timer); ieee->crypt_deinit_timer.data = (unsigned long)ieee; ieee->crypt_deinit_timer.function = ieee80211_crypt_deinit_handler; + ieee->crypt_quiesced = 0; spin_lock_init(&ieee->lock); ieee->wpa_enabled = 0; - ieee->tkip_countermeasures = 0; ieee->drop_unencrypted = 0; ieee->privacy_invoked = 0; - ieee->ieee802_1x = 1; return dev; @@ -161,6 +172,7 @@ void free_ieee80211(struct net_device *dev) int i; + ieee80211_crypt_quiescing(ieee); del_timer_sync(&ieee->crypt_deinit_timer); ieee80211_crypt_deinit_entries(ieee, 1); @@ -195,38 +207,26 @@ static int show_debug_level(char *page, char **start, off_t offset, static int store_debug_level(struct file *file, const char __user * buffer, unsigned long count, void *data) { - char buf[] = "0x00000000"; - char *p = (char *)buf; + char buf[] = "0x00000000\n"; + unsigned long len = min((unsigned long)sizeof(buf) - 1, count); unsigned long val; - if (count > sizeof(buf) - 1) - count = sizeof(buf) - 1; - - if (copy_from_user(buf, buffer, count)) + if (copy_from_user(buf, buffer, len)) return count; - buf[count] = 0; - /* - * what a FPOS... What, sscanf(buf, "%i", &val) would be too - * scary? - */ - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - p++; - if (p[0] == 'x' || p[0] == 'X') - p++; - val = simple_strtoul(p, &p, 16); - } else - val = simple_strtoul(p, &p, 10); - if (p == buf) + buf[len] = 0; + if (sscanf(buf, "%li", &val) != 1) printk(KERN_INFO DRV_NAME ": %s is not in hex or decimal form.\n", buf); else ieee80211_debug_level = val; - return strlen(buf); + return strnlen(buf, len); } +#endif /* CONFIG_IEEE80211_DEBUG */ static int __init ieee80211_init(void) { +#ifdef CONFIG_IEEE80211_DEBUG struct proc_dir_entry *e; ieee80211_debug_level = debug; @@ -246,26 +246,33 @@ static int __init ieee80211_init(void) e->read_proc = show_debug_level; e->write_proc = store_debug_level; e->data = NULL; +#endif /* CONFIG_IEEE80211_DEBUG */ + + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION ", " DRV_VERSION "\n"); + printk(KERN_INFO DRV_NAME ": " DRV_COPYRIGHT "\n"); return 0; } static void __exit ieee80211_exit(void) { +#ifdef CONFIG_IEEE80211_DEBUG if (ieee80211_proc) { remove_proc_entry("debug_level", ieee80211_proc); remove_proc_entry(DRV_NAME, proc_net); ieee80211_proc = NULL; } +#endif /* CONFIG_IEEE80211_DEBUG */ } +#ifdef CONFIG_IEEE80211_DEBUG #include <linux/moduleparam.h> module_param(debug, int, 0444); MODULE_PARM_DESC(debug, "debug output mask"); +#endif /* CONFIG_IEEE80211_DEBUG */ module_exit(ieee80211_exit); module_init(ieee80211_init); -#endif const char *escape_essid(const char *essid, u8 essid_len) { diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index f7dcd854139..ce694cf5c16 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -5,7 +5,7 @@ * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen * <jkmaline@cc.hut.fi> * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> - * Copyright (c) 2004, Intel Corporation + * Copyright (c) 2004-2005, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -87,7 +87,7 @@ static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct /* Called only as a tasklet (software IRQ) */ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee, - struct ieee80211_hdr *hdr) + struct ieee80211_hdr_4addr *hdr) { struct sk_buff *skb = NULL; u16 sc; @@ -101,7 +101,7 @@ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee, if (frag == 0) { /* Reserve enough space to fit maximum frame length */ skb = dev_alloc_skb(ieee->dev->mtu + - sizeof(struct ieee80211_hdr) + + sizeof(struct ieee80211_hdr_4addr) + 8 /* LLC */ + 2 /* alignment */ + 8 /* WEP */ + ETH_ALEN /* WDS */ ); @@ -138,7 +138,7 @@ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee, /* Called only as a tasklet (software IRQ) */ static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee, - struct ieee80211_hdr *hdr) + struct ieee80211_hdr_4addr *hdr) { u16 sc; unsigned int seq; @@ -176,7 +176,7 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb, ieee->dev->name); return 0; /* - hostap_update_sta_ps(ieee, (struct hostap_ieee80211_hdr *) + hostap_update_sta_ps(ieee, (struct hostap_ieee80211_hdr_4addr *) skb->data);*/ } @@ -232,13 +232,13 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee, { struct net_device *dev = ieee->dev; u16 fc, ethertype; - struct ieee80211_hdr *hdr; + struct ieee80211_hdr_3addr *hdr; u8 *pos; if (skb->len < 24) return 0; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_3addr *)skb->data; fc = le16_to_cpu(hdr->frame_ctl); /* check that the frame is unicast frame to us */ @@ -271,26 +271,15 @@ static inline int ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_crypt_data *crypt) { - struct ieee80211_hdr *hdr; + struct ieee80211_hdr_3addr *hdr; int res, hdrlen; if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL) return 0; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_3addr *)skb->data; hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); -#ifdef CONFIG_IEEE80211_CRYPT_TKIP - if (ieee->tkip_countermeasures && strcmp(crypt->ops->name, "TKIP") == 0) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: TKIP countermeasures: dropped " - "received packet from " MAC_FMT "\n", - ieee->dev->name, MAC_ARG(hdr->addr2)); - } - return -1; - } -#endif - atomic_inc(&crypt->refcnt); res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv); atomic_dec(&crypt->refcnt); @@ -314,13 +303,13 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, struct sk_buff *skb, int keyidx, struct ieee80211_crypt_data *crypt) { - struct ieee80211_hdr *hdr; + struct ieee80211_hdr_3addr *hdr; int res, hdrlen; if (crypt == NULL || crypt->ops->decrypt_msdu == NULL) return 0; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_3addr *)skb->data; hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); atomic_inc(&crypt->refcnt); @@ -343,7 +332,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_rx_stats *rx_stats) { struct net_device *dev = ieee->dev; - struct ieee80211_hdr *hdr; + struct ieee80211_hdr_4addr *hdr; size_t hdrlen; u16 fc, type, stype, sc; struct net_device_stats *stats; @@ -363,7 +352,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_crypt_data *crypt = NULL; int keyidx = 0; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_4addr *)skb->data; stats = &ieee->stats; if (skb->len < 10) { @@ -378,35 +367,51 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, frag = WLAN_GET_SEQ_FRAG(sc); hdrlen = ieee80211_get_hdrlen(fc); -#ifdef NOT_YET -#if WIRELESS_EXT > 15 /* Put this code here so that we avoid duplicating it in all * Rx paths. - Jean II */ #ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ /* If spy monitoring on */ - if (iface->spy_data.spy_number > 0) { + if (ieee->spy_data.spy_number > 0) { struct iw_quality wstats; - wstats.level = rx_stats->signal; - wstats.noise = rx_stats->noise; - wstats.updated = 6; /* No qual value */ + + wstats.updated = 0; + if (rx_stats->mask & IEEE80211_STATMASK_RSSI) { + wstats.level = rx_stats->rssi; + wstats.updated |= IW_QUAL_LEVEL_UPDATED; + } else + wstats.updated |= IW_QUAL_LEVEL_INVALID; + + if (rx_stats->mask & IEEE80211_STATMASK_NOISE) { + wstats.noise = rx_stats->noise; + wstats.updated |= IW_QUAL_NOISE_UPDATED; + } else + wstats.updated |= IW_QUAL_NOISE_INVALID; + + if (rx_stats->mask & IEEE80211_STATMASK_SIGNAL) { + wstats.qual = rx_stats->signal; + wstats.updated |= IW_QUAL_QUAL_UPDATED; + } else + wstats.updated |= IW_QUAL_QUAL_INVALID; + /* Update spy records */ - wireless_spy_update(dev, hdr->addr2, &wstats); + wireless_spy_update(ieee->dev, hdr->addr2, &wstats); } #endif /* IW_WIRELESS_SPY */ -#endif /* WIRELESS_EXT > 15 */ + +#ifdef NOT_YET hostap_update_rx_stats(local->ap, hdr, rx_stats); #endif -#if WIRELESS_EXT > 15 if (ieee->iw_mode == IW_MODE_MONITOR) { ieee80211_monitor_rx(ieee, skb, rx_stats); stats->rx_packets++; stats->rx_bytes += skb->len; return 1; } -#endif - if (ieee->host_decrypt) { + if ((is_multicast_ether_addr(hdr->addr1) || + is_broadcast_ether_addr(hdr->addr2)) ? ieee->host_mc_decrypt : + ieee->host_decrypt) { int idx = 0; if (skb->len >= hdrlen + 3) idx = skb->data[hdrlen + 3] >> 6; @@ -531,6 +536,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* Nullfunc frames may have PS-bit set, so they must be passed to * hostap_handle_sta_rx() before being dropped here. */ + + stype &= ~IEEE80211_STYPE_QOS_DATA; + if (stype != IEEE80211_STYPE_DATA && stype != IEEE80211_STYPE_DATA_CFACK && stype != IEEE80211_STYPE_DATA_CFPOLL && @@ -549,7 +557,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, (keyidx = ieee80211_rx_frame_decrypt(ieee, skb, crypt)) < 0) goto rx_dropped; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_4addr *)skb->data; /* skb: hdr + (possibly fragmented) plaintext payload */ // PR: FIXME: hostap has additional conditions in the "if" below: @@ -603,7 +611,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, /* this was the last fragment and the frame will be * delivered, so remove skb from fragment cache */ skb = frag_skb; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_4addr *)skb->data; ieee80211_frag_cache_invalidate(ieee, hdr); } @@ -613,7 +621,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, ieee80211_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt)) goto rx_dropped; - hdr = (struct ieee80211_hdr *)skb->data; + hdr = (struct ieee80211_hdr_4addr *)skb->data; if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep) { if ( /*ieee->ieee802_1x && */ ieee80211_is_eapol_frame(ieee, skb)) { @@ -755,69 +763,179 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, #define MGMT_FRAME_FIXED_PART_LENGTH 0x24 -static inline int ieee80211_is_ofdm_rate(u8 rate) +static u8 qos_oui[QOS_OUI_LEN] = { 0x00, 0x50, 0xF2 }; + +/* +* Make ther structure we read from the beacon packet has +* the right values +*/ +static int ieee80211_verify_qos_info(struct ieee80211_qos_information_element + *info_element, int sub_type) { - switch (rate & ~IEEE80211_BASIC_RATE_MASK) { - case IEEE80211_OFDM_RATE_6MB: - case IEEE80211_OFDM_RATE_9MB: - case IEEE80211_OFDM_RATE_12MB: - case IEEE80211_OFDM_RATE_18MB: - case IEEE80211_OFDM_RATE_24MB: - case IEEE80211_OFDM_RATE_36MB: - case IEEE80211_OFDM_RATE_48MB: - case IEEE80211_OFDM_RATE_54MB: - return 1; - } + + if (info_element->qui_subtype != sub_type) + return -1; + if (memcmp(info_element->qui, qos_oui, QOS_OUI_LEN)) + return -1; + if (info_element->qui_type != QOS_OUI_TYPE) + return -1; + if (info_element->version != QOS_VERSION_1) + return -1; + return 0; } -static inline int ieee80211_network_init(struct ieee80211_device *ieee, - struct ieee80211_probe_response - *beacon, - struct ieee80211_network *network, - struct ieee80211_rx_stats *stats) +/* + * Parse a QoS parameter element + */ +static int ieee80211_read_qos_param_element(struct ieee80211_qos_parameter_info + *element_param, struct ieee80211_info_element + *info_element) { -#ifdef CONFIG_IEEE80211_DEBUG - char rates_str[64]; - char *p; -#endif - struct ieee80211_info_element *info_element; - u16 left; - u8 i; + int ret = 0; + u16 size = sizeof(struct ieee80211_qos_parameter_info) - 2; - /* Pull out fixed field data */ - memcpy(network->bssid, beacon->header.addr3, ETH_ALEN); - network->capability = beacon->capability; - network->last_scanned = jiffies; - network->time_stamp[0] = beacon->time_stamp[0]; - network->time_stamp[1] = beacon->time_stamp[1]; - network->beacon_interval = beacon->beacon_interval; - /* Where to pull this? beacon->listen_interval; */ - network->listen_interval = 0x0A; - network->rates_len = network->rates_ex_len = 0; - network->last_associate = 0; - network->ssid_len = 0; - network->flags = 0; - network->atim_window = 0; + if ((info_element == NULL) || (element_param == NULL)) + return -1; - if (stats->freq == IEEE80211_52GHZ_BAND) { - /* for A band (No DS info) */ - network->channel = stats->received_channel; + if (info_element->id == QOS_ELEMENT_ID && info_element->len == size) { + memcpy(element_param->info_element.qui, info_element->data, + info_element->len); + element_param->info_element.elementID = info_element->id; + element_param->info_element.length = info_element->len; } else - network->flags |= NETWORK_HAS_CCK; + ret = -1; + if (ret == 0) + ret = ieee80211_verify_qos_info(&element_param->info_element, + QOS_OUI_PARAM_SUB_TYPE); + return ret; +} - network->wpa_ie_len = 0; - network->rsn_ie_len = 0; +/* + * Parse a QoS information element + */ +static int ieee80211_read_qos_info_element(struct + ieee80211_qos_information_element + *element_info, struct ieee80211_info_element + *info_element) +{ + int ret = 0; + u16 size = sizeof(struct ieee80211_qos_information_element) - 2; + + if (element_info == NULL) + return -1; + if (info_element == NULL) + return -1; + + if ((info_element->id == QOS_ELEMENT_ID) && (info_element->len == size)) { + memcpy(element_info->qui, info_element->data, + info_element->len); + element_info->elementID = info_element->id; + element_info->length = info_element->len; + } else + ret = -1; + + if (ret == 0) + ret = ieee80211_verify_qos_info(element_info, + QOS_OUI_INFO_SUB_TYPE); + return ret; +} + +/* + * Write QoS parameters from the ac parameters. + */ +static int ieee80211_qos_convert_ac_to_parameters(struct + ieee80211_qos_parameter_info + *param_elm, struct + ieee80211_qos_parameters + *qos_param) +{ + int rc = 0; + int i; + struct ieee80211_qos_ac_parameter *ac_params; + u32 txop; + u8 cw_min; + u8 cw_max; + + for (i = 0; i < QOS_QUEUE_NUM; i++) { + ac_params = &(param_elm->ac_params_record[i]); + + qos_param->aifs[i] = (ac_params->aci_aifsn) & 0x0F; + qos_param->aifs[i] -= (qos_param->aifs[i] < 2) ? 0 : 2; + + cw_min = ac_params->ecw_min_max & 0x0F; + qos_param->cw_min[i] = (u16) ((1 << cw_min) - 1); + + cw_max = (ac_params->ecw_min_max & 0xF0) >> 4; + qos_param->cw_max[i] = (u16) ((1 << cw_max) - 1); + + qos_param->flag[i] = + (ac_params->aci_aifsn & 0x10) ? 0x01 : 0x00; + + txop = le16_to_cpu(ac_params->tx_op_limit) * 32; + qos_param->tx_op_limit[i] = (u16) txop; + } + return rc; +} + +/* + * we have a generic data element which it may contain QoS information or + * parameters element. check the information element length to decide + * which type to read + */ +static int ieee80211_parse_qos_info_param_IE(struct ieee80211_info_element + *info_element, + struct ieee80211_network *network) +{ + int rc = 0; + struct ieee80211_qos_parameters *qos_param = NULL; + struct ieee80211_qos_information_element qos_info_element; + + rc = ieee80211_read_qos_info_element(&qos_info_element, info_element); + + if (rc == 0) { + network->qos_data.param_count = qos_info_element.ac_info & 0x0F; + network->flags |= NETWORK_HAS_QOS_INFORMATION; + } else { + struct ieee80211_qos_parameter_info param_element; + + rc = ieee80211_read_qos_param_element(¶m_element, + info_element); + if (rc == 0) { + qos_param = &(network->qos_data.parameters); + ieee80211_qos_convert_ac_to_parameters(¶m_element, + qos_param); + network->flags |= NETWORK_HAS_QOS_PARAMETERS; + network->qos_data.param_count = + param_element.info_element.ac_info & 0x0F; + } + } + + if (rc == 0) { + IEEE80211_DEBUG_QOS("QoS is supported\n"); + network->qos_data.supported = 1; + } + return rc; +} + +static int ieee80211_parse_info_param(struct ieee80211_info_element + *info_element, u16 length, + struct ieee80211_network *network) +{ + u8 i; +#ifdef CONFIG_IEEE80211_DEBUG + char rates_str[64]; + char *p; +#endif - info_element = &beacon->info_element; - left = stats->len - ((void *)info_element - (void *)beacon); - while (left >= sizeof(struct ieee80211_info_element_hdr)) { - if (sizeof(struct ieee80211_info_element_hdr) + - info_element->len > left) { - IEEE80211_DEBUG_SCAN - ("SCAN: parse failed: info_element->len + 2 > left : info_element->len+2=%Zd left=%d.\n", - info_element->len + - sizeof(struct ieee80211_info_element), left); + while (length >= sizeof(*info_element)) { + if (sizeof(*info_element) + info_element->len > length) { + IEEE80211_DEBUG_MGMT("Info elem: parse failed: " + "info_element->len + 2 > left : " + "info_element->len+2=%zd left=%d, id=%d.\n", + info_element->len + + sizeof(*info_element), + length, info_element->id); return 1; } @@ -837,7 +955,7 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, memset(network->ssid + network->ssid_len, 0, IW_ESSID_MAX_SIZE - network->ssid_len); - IEEE80211_DEBUG_SCAN("MFIE_TYPE_SSID: '%s' len=%d.\n", + IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n", network->ssid, network->ssid_len); break; @@ -845,15 +963,14 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, #ifdef CONFIG_IEEE80211_DEBUG p = rates_str; #endif - network->rates_len = - min(info_element->len, MAX_RATES_LENGTH); + network->rates_len = min(info_element->len, + MAX_RATES_LENGTH); for (i = 0; i < network->rates_len; i++) { network->rates[i] = info_element->data[i]; #ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, - sizeof(rates_str) - (p - - rates_str), - "%02X ", network->rates[i]); + p += snprintf(p, sizeof(rates_str) - + (p - rates_str), "%02X ", + network->rates[i]); #endif if (ieee80211_is_ofdm_rate (info_element->data[i])) { @@ -865,7 +982,7 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, } } - IEEE80211_DEBUG_SCAN("MFIE_TYPE_RATES: '%s' (%d)\n", + IEEE80211_DEBUG_MGMT("MFIE_TYPE_RATES: '%s' (%d)\n", rates_str, network->rates_len); break; @@ -873,15 +990,14 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, #ifdef CONFIG_IEEE80211_DEBUG p = rates_str; #endif - network->rates_ex_len = - min(info_element->len, MAX_RATES_EX_LENGTH); + network->rates_ex_len = min(info_element->len, + MAX_RATES_EX_LENGTH); for (i = 0; i < network->rates_ex_len; i++) { network->rates_ex[i] = info_element->data[i]; #ifdef CONFIG_IEEE80211_DEBUG - p += snprintf(p, - sizeof(rates_str) - (p - - rates_str), - "%02X ", network->rates[i]); + p += snprintf(p, sizeof(rates_str) - + (p - rates_str), "%02X ", + network->rates[i]); #endif if (ieee80211_is_ofdm_rate (info_element->data[i])) { @@ -893,40 +1009,51 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, } } - IEEE80211_DEBUG_SCAN("MFIE_TYPE_RATES_EX: '%s' (%d)\n", + IEEE80211_DEBUG_MGMT("MFIE_TYPE_RATES_EX: '%s' (%d)\n", rates_str, network->rates_ex_len); break; case MFIE_TYPE_DS_SET: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_DS_SET: %d\n", + IEEE80211_DEBUG_MGMT("MFIE_TYPE_DS_SET: %d\n", info_element->data[0]); - if (stats->freq == IEEE80211_24GHZ_BAND) - network->channel = info_element->data[0]; + network->channel = info_element->data[0]; break; case MFIE_TYPE_FH_SET: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_FH_SET: ignored\n"); + IEEE80211_DEBUG_MGMT("MFIE_TYPE_FH_SET: ignored\n"); break; case MFIE_TYPE_CF_SET: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_CF_SET: ignored\n"); + IEEE80211_DEBUG_MGMT("MFIE_TYPE_CF_SET: ignored\n"); break; case MFIE_TYPE_TIM: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_TIM: ignored\n"); + IEEE80211_DEBUG_MGMT("MFIE_TYPE_TIM: ignored\n"); + break; + + case MFIE_TYPE_ERP_INFO: + network->erp_value = info_element->data[0]; + IEEE80211_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n", + network->erp_value); break; case MFIE_TYPE_IBSS_SET: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_IBSS_SET: ignored\n"); + network->atim_window = info_element->data[0]; + IEEE80211_DEBUG_MGMT("MFIE_TYPE_IBSS_SET: %d\n", + network->atim_window); break; case MFIE_TYPE_CHALLENGE: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_CHALLENGE: ignored\n"); + IEEE80211_DEBUG_MGMT("MFIE_TYPE_CHALLENGE: ignored\n"); break; case MFIE_TYPE_GENERIC: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_GENERIC: %d bytes\n", + IEEE80211_DEBUG_MGMT("MFIE_TYPE_GENERIC: %d bytes\n", info_element->len); + if (!ieee80211_parse_qos_info_param_IE(info_element, + network)) + break; + if (info_element->len >= 4 && info_element->data[0] == 0x00 && info_element->data[1] == 0x50 && @@ -940,7 +1067,7 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, break; case MFIE_TYPE_RSN: - IEEE80211_DEBUG_SCAN("MFIE_TYPE_RSN: %d bytes\n", + IEEE80211_DEBUG_MGMT("MFIE_TYPE_RSN: %d bytes\n", info_element->len); network->rsn_ie_len = min(info_element->len + 2, MAX_WPA_IE_LEN); @@ -948,18 +1075,127 @@ static inline int ieee80211_network_init(struct ieee80211_device *ieee, network->rsn_ie_len); break; + case MFIE_TYPE_QOS_PARAMETER: + printk(KERN_ERR + "QoS Error need to parse QOS_PARAMETER IE\n"); + break; + default: - IEEE80211_DEBUG_SCAN("unsupported IE %d\n", + IEEE80211_DEBUG_MGMT("unsupported IE %d\n", info_element->id); break; } - left -= sizeof(struct ieee80211_info_element_hdr) + - info_element->len; - info_element = (struct ieee80211_info_element *) - &info_element->data[info_element->len]; + length -= sizeof(*info_element) + info_element->len; + info_element = + (struct ieee80211_info_element *)&info_element-> + data[info_element->len]; + } + + return 0; +} + +static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct ieee80211_assoc_response + *frame, struct ieee80211_rx_stats *stats) +{ + struct ieee80211_network network_resp; + struct ieee80211_network *network = &network_resp; + struct net_device *dev = ieee->dev; + + network->flags = 0; + network->qos_data.active = 0; + network->qos_data.supported = 0; + network->qos_data.param_count = 0; + network->qos_data.old_param_count = 0; + + //network->atim_window = le16_to_cpu(frame->aid) & (0x3FFF); + network->atim_window = le16_to_cpu(frame->aid); + network->listen_interval = le16_to_cpu(frame->status); + memcpy(network->bssid, frame->header.addr3, ETH_ALEN); + network->capability = le16_to_cpu(frame->capability); + network->last_scanned = jiffies; + network->rates_len = network->rates_ex_len = 0; + network->last_associate = 0; + network->ssid_len = 0; + network->erp_value = + (network->capability & WLAN_CAPABILITY_IBSS) ? 0x3 : 0x0; + + if (stats->freq == IEEE80211_52GHZ_BAND) { + /* for A band (No DS info) */ + network->channel = stats->received_channel; + } else + network->flags |= NETWORK_HAS_CCK; + + network->wpa_ie_len = 0; + network->rsn_ie_len = 0; + + if (ieee80211_parse_info_param + (frame->info_element, stats->len - sizeof(*frame), network)) + return 1; + + network->mode = 0; + if (stats->freq == IEEE80211_52GHZ_BAND) + network->mode = IEEE_A; + else { + if (network->flags & NETWORK_HAS_OFDM) + network->mode |= IEEE_G; + if (network->flags & NETWORK_HAS_CCK) + network->mode |= IEEE_B; } + if (ieee80211_is_empty_essid(network->ssid, network->ssid_len)) + network->flags |= NETWORK_EMPTY_ESSID; + + memcpy(&network->stats, stats, sizeof(network->stats)); + + if (ieee->handle_assoc_response != NULL) + ieee->handle_assoc_response(dev, frame, network); + + return 0; +} + +/***************************************************/ + +static inline int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee80211_probe_response + *beacon, + struct ieee80211_network *network, + struct ieee80211_rx_stats *stats) +{ + network->qos_data.active = 0; + network->qos_data.supported = 0; + network->qos_data.param_count = 0; + network->qos_data.old_param_count = 0; + + /* Pull out fixed field data */ + memcpy(network->bssid, beacon->header.addr3, ETH_ALEN); + network->capability = le16_to_cpu(beacon->capability); + network->last_scanned = jiffies; + network->time_stamp[0] = le32_to_cpu(beacon->time_stamp[0]); + network->time_stamp[1] = le32_to_cpu(beacon->time_stamp[1]); + network->beacon_interval = le16_to_cpu(beacon->beacon_interval); + /* Where to pull this? beacon->listen_interval; */ + network->listen_interval = 0x0A; + network->rates_len = network->rates_ex_len = 0; + network->last_associate = 0; + network->ssid_len = 0; + network->flags = 0; + network->atim_window = 0; + network->erp_value = (network->capability & WLAN_CAPABILITY_IBSS) ? + 0x3 : 0x0; + + if (stats->freq == IEEE80211_52GHZ_BAND) { + /* for A band (No DS info) */ + network->channel = stats->received_channel; + } else + network->flags |= NETWORK_HAS_CCK; + + network->wpa_ie_len = 0; + network->rsn_ie_len = 0; + + if (ieee80211_parse_info_param + (beacon->info_element, stats->len - sizeof(*beacon), network)) + return 1; + network->mode = 0; if (stats->freq == IEEE80211_52GHZ_BAND) network->mode = IEEE_A; @@ -1002,6 +1238,9 @@ static inline int is_same_network(struct ieee80211_network *src, static inline void update_network(struct ieee80211_network *dst, struct ieee80211_network *src) { + int qos_active; + u8 old_param; + memcpy(&dst->stats, &src->stats, sizeof(struct ieee80211_rx_stats)); dst->capability = src->capability; memcpy(dst->rates, src->rates, src->rates_len); @@ -1017,6 +1256,7 @@ static inline void update_network(struct ieee80211_network *dst, dst->beacon_interval = src->beacon_interval; dst->listen_interval = src->listen_interval; dst->atim_window = src->atim_window; + dst->erp_value = src->erp_value; memcpy(dst->wpa_ie, src->wpa_ie, src->wpa_ie_len); dst->wpa_ie_len = src->wpa_ie_len; @@ -1024,22 +1264,48 @@ static inline void update_network(struct ieee80211_network *dst, dst->rsn_ie_len = src->rsn_ie_len; dst->last_scanned = jiffies; + qos_active = src->qos_data.active; + old_param = dst->qos_data.old_param_count; + if (dst->flags & NETWORK_HAS_QOS_MASK) + memcpy(&dst->qos_data, &src->qos_data, + sizeof(struct ieee80211_qos_data)); + else { + dst->qos_data.supported = src->qos_data.supported; + dst->qos_data.param_count = src->qos_data.param_count; + } + + if (dst->qos_data.supported == 1) { + if (dst->ssid_len) + IEEE80211_DEBUG_QOS + ("QoS the network %s is QoS supported\n", + dst->ssid); + else + IEEE80211_DEBUG_QOS + ("QoS the network is QoS supported\n"); + } + dst->qos_data.active = qos_active; + dst->qos_data.old_param_count = old_param; + /* dst->last_associate is not overwritten */ } +static inline int is_beacon(int fc) +{ + return (WLAN_FC_GET_STYPE(le16_to_cpu(fc)) == IEEE80211_STYPE_BEACON); +} + static inline void ieee80211_process_probe_response(struct ieee80211_device - *ieee, - struct + *ieee, struct ieee80211_probe_response - *beacon, - struct ieee80211_rx_stats + *beacon, struct ieee80211_rx_stats *stats) { + struct net_device *dev = ieee->dev; struct ieee80211_network network; struct ieee80211_network *target; struct ieee80211_network *oldest = NULL; #ifdef CONFIG_IEEE80211_DEBUG - struct ieee80211_info_element *info_element = &beacon->info_element; + struct ieee80211_info_element *info_element = beacon->info_element; #endif unsigned long flags; @@ -1070,10 +1336,10 @@ static inline void ieee80211_process_probe_response(struct ieee80211_device escape_essid(info_element->data, info_element->len), MAC_ARG(beacon->header.addr3), - WLAN_FC_GET_STYPE(beacon->header. - frame_ctl) == - IEEE80211_STYPE_PROBE_RESP ? - "PROBE RESPONSE" : "BEACON"); + is_beacon(le16_to_cpu + (beacon->header. + frame_ctl)) ? + "BEACON" : "PROBE RESPONSE"); return; } @@ -1122,10 +1388,10 @@ static inline void ieee80211_process_probe_response(struct ieee80211_device escape_essid(network.ssid, network.ssid_len), MAC_ARG(network.bssid), - WLAN_FC_GET_STYPE(beacon->header. - frame_ctl) == - IEEE80211_STYPE_PROBE_RESP ? - "PROBE RESPONSE" : "BEACON"); + is_beacon(le16_to_cpu + (beacon->header. + frame_ctl)) ? + "BEACON" : "PROBE RESPONSE"); #endif memcpy(target, &network, sizeof(*target)); list_add_tail(&target->list, &ieee->network_list); @@ -1134,34 +1400,60 @@ static inline void ieee80211_process_probe_response(struct ieee80211_device escape_essid(target->ssid, target->ssid_len), MAC_ARG(target->bssid), - WLAN_FC_GET_STYPE(beacon->header. - frame_ctl) == - IEEE80211_STYPE_PROBE_RESP ? - "PROBE RESPONSE" : "BEACON"); + is_beacon(le16_to_cpu + (beacon->header. + frame_ctl)) ? + "BEACON" : "PROBE RESPONSE"); update_network(target, &network); } spin_unlock_irqrestore(&ieee->lock, flags); + + if (is_beacon(le16_to_cpu(beacon->header.frame_ctl))) { + if (ieee->handle_beacon != NULL) + ieee->handle_beacon(dev, beacon, &network); + } else { + if (ieee->handle_probe_response != NULL) + ieee->handle_probe_response(dev, beacon, &network); + } } void ieee80211_rx_mgt(struct ieee80211_device *ieee, - struct ieee80211_hdr *header, + struct ieee80211_hdr_4addr *header, struct ieee80211_rx_stats *stats) { - switch (WLAN_FC_GET_STYPE(header->frame_ctl)) { + switch (WLAN_FC_GET_STYPE(le16_to_cpu(header->frame_ctl))) { case IEEE80211_STYPE_ASSOC_RESP: IEEE80211_DEBUG_MGMT("received ASSOCIATION RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(header->frame_ctl)); + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); + ieee80211_handle_assoc_resp(ieee, + (struct ieee80211_assoc_response *) + header, stats); break; case IEEE80211_STYPE_REASSOC_RESP: IEEE80211_DEBUG_MGMT("received REASSOCIATION RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(header->frame_ctl)); + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); + break; + + case IEEE80211_STYPE_PROBE_REQ: + IEEE80211_DEBUG_MGMT("recieved auth (%d)\n", + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); + + if (ieee->handle_probe_request != NULL) + ieee->handle_probe_request(ieee->dev, + (struct + ieee80211_probe_request *) + header, stats); break; case IEEE80211_STYPE_PROBE_RESP: IEEE80211_DEBUG_MGMT("received PROBE RESPONSE (%d)\n", - WLAN_FC_GET_STYPE(header->frame_ctl)); + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); IEEE80211_DEBUG_SCAN("Probe response\n"); ieee80211_process_probe_response(ieee, (struct @@ -1171,20 +1463,46 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, case IEEE80211_STYPE_BEACON: IEEE80211_DEBUG_MGMT("received BEACON (%d)\n", - WLAN_FC_GET_STYPE(header->frame_ctl)); + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); IEEE80211_DEBUG_SCAN("Beacon\n"); ieee80211_process_probe_response(ieee, (struct ieee80211_probe_response *) header, stats); break; + case IEEE80211_STYPE_AUTH: + IEEE80211_DEBUG_MGMT("recieved auth (%d)\n", + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); + + if (ieee->handle_auth != NULL) + ieee->handle_auth(ieee->dev, + (struct ieee80211_auth *)header); + break; + + case IEEE80211_STYPE_DISASSOC: + if (ieee->handle_disassoc != NULL) + ieee->handle_disassoc(ieee->dev, + (struct ieee80211_disassoc *) + header); + break; + + case IEEE80211_STYPE_DEAUTH: + printk("DEAUTH from AP\n"); + if (ieee->handle_deauth != NULL) + ieee->handle_deauth(ieee->dev, (struct ieee80211_auth *) + header); + break; default: IEEE80211_DEBUG_MGMT("received UNKNOWN (%d)\n", - WLAN_FC_GET_STYPE(header->frame_ctl)); + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); IEEE80211_WARNING("%s: Unknown management packet: %d\n", ieee->dev->name, - WLAN_FC_GET_STYPE(header->frame_ctl)); + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); break; } } diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index eed07bbbe6b..95ccbadbf55 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright(c) 2003 - 2004 Intel Corporation. All rights reserved. + Copyright(c) 2003 - 2005 Intel Corporation. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -128,7 +128,7 @@ payload of each frame is reduced to 492 bytes. static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 }; static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 }; -static inline int ieee80211_put_snap(u8 * data, u16 h_proto) +static inline int ieee80211_copy_snap(u8 * data, u16 h_proto) { struct ieee80211_snap_hdr *snap; u8 *oui; @@ -157,31 +157,14 @@ static inline int ieee80211_encrypt_fragment(struct ieee80211_device *ieee, struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; int res; -#ifdef CONFIG_IEEE80211_CRYPT_TKIP - struct ieee80211_hdr *header; - - if (ieee->tkip_countermeasures && - crypt && crypt->ops && strcmp(crypt->ops->name, "TKIP") == 0) { - header = (struct ieee80211_hdr *)frag->data; - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: TKIP countermeasures: dropped " - "TX packet to " MAC_FMT "\n", - ieee->dev->name, MAC_ARG(header->addr1)); - } + if (crypt == NULL) return -1; - } -#endif + /* To encrypt, frame format is: * IV (4 bytes), clear payload (including SNAP), ICV (4 bytes) */ - - // PR: FIXME: Copied from hostap. Check fragmentation/MSDU/MPDU encryption. - /* Host-based IEEE 802.11 fragmentation for TX is not yet supported, so - * call both MSDU and MPDU encryption functions from here. */ atomic_inc(&crypt->refcnt); res = 0; - if (crypt->ops->encrypt_msdu) - res = crypt->ops->encrypt_msdu(frag, hdr_len, crypt->priv); - if (res == 0 && crypt->ops->encrypt_mpdu) + if (crypt->ops && crypt->ops->encrypt_mpdu) res = crypt->ops->encrypt_mpdu(frag, hdr_len, crypt->priv); atomic_dec(&crypt->refcnt); @@ -207,7 +190,7 @@ void ieee80211_txb_free(struct ieee80211_txb *txb) } static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, - gfp_t gfp_mask) + int headroom, gfp_t gfp_mask) { struct ieee80211_txb *txb; int i; @@ -221,11 +204,13 @@ static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, txb->frag_size = txb_size; for (i = 0; i < nr_frags; i++) { - txb->fragments[i] = dev_alloc_skb(txb_size); + txb->fragments[i] = __dev_alloc_skb(txb_size + headroom, + gfp_mask); if (unlikely(!txb->fragments[i])) { i--; break; } + skb_reserve(txb->fragments[i], headroom); } if (unlikely(i != nr_frags)) { while (i >= 0) @@ -236,25 +221,31 @@ static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, return txb; } -/* SKBs are added to the ieee->tx_queue. */ +/* Incoming skb is converted to a txb which consists of + * a block of 802.11 fragment packets (stored as skbs) */ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_device *ieee = netdev_priv(dev); struct ieee80211_txb *txb = NULL; - struct ieee80211_hdr *frag_hdr; - int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size; + struct ieee80211_hdr_3addr *frag_hdr; + int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size, + rts_required; unsigned long flags; struct net_device_stats *stats = &ieee->stats; - int ether_type, encrypt; + int ether_type, encrypt, host_encrypt, host_encrypt_msdu, host_build_iv; int bytes, fc, hdr_len; struct sk_buff *skb_frag; - struct ieee80211_hdr header = { /* Ensure zero initialized */ + struct ieee80211_hdr_3addr header = { /* Ensure zero initialized */ .duration_id = 0, .seq_ctl = 0 }; u8 dest[ETH_ALEN], src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt; + int priority = skb->priority; + int snapped = 0; + + if (ieee->is_queue_full && (*ieee->is_queue_full) (dev, priority)) + return NETDEV_TX_BUSY; spin_lock_irqsave(&ieee->lock, flags); @@ -276,7 +267,11 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) crypt = ieee->crypt[ieee->tx_keyidx]; encrypt = !(ether_type == ETH_P_PAE && ieee->ieee802_1x) && - ieee->host_encrypt && crypt && crypt->ops; + ieee->sec.encrypt; + + host_encrypt = ieee->host_encrypt && encrypt && crypt; + host_encrypt_msdu = ieee->host_encrypt_msdu && encrypt && crypt; + host_build_iv = ieee->host_build_iv && encrypt && crypt; if (!encrypt && ieee->ieee802_1x && ieee->drop_unencrypted && ether_type != ETH_P_PAE) { @@ -285,8 +280,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) } /* Save source and destination addresses */ - memcpy(&dest, skb->data, ETH_ALEN); - memcpy(&src, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(dest, skb->data, ETH_ALEN); + memcpy(src, skb->data + ETH_ALEN, ETH_ALEN); /* Advance the SKB to the start of the payload */ skb_pull(skb, sizeof(struct ethhdr)); @@ -294,7 +289,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) /* Determine total amount of storage required for TXB packets */ bytes = skb->len + SNAP_SIZE + sizeof(u16); - if (encrypt) + if (host_encrypt) fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | IEEE80211_FCTL_PROTECTED; else @@ -302,70 +297,144 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) if (ieee->iw_mode == IW_MODE_INFRA) { fc |= IEEE80211_FCTL_TODS; - /* To DS: Addr1 = BSSID, Addr2 = SA, - Addr3 = DA */ - memcpy(&header.addr1, ieee->bssid, ETH_ALEN); - memcpy(&header.addr2, &src, ETH_ALEN); - memcpy(&header.addr3, &dest, ETH_ALEN); + /* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */ + memcpy(header.addr1, ieee->bssid, ETH_ALEN); + memcpy(header.addr2, src, ETH_ALEN); + memcpy(header.addr3, dest, ETH_ALEN); } else if (ieee->iw_mode == IW_MODE_ADHOC) { - /* not From/To DS: Addr1 = DA, Addr2 = SA, - Addr3 = BSSID */ - memcpy(&header.addr1, dest, ETH_ALEN); - memcpy(&header.addr2, src, ETH_ALEN); - memcpy(&header.addr3, ieee->bssid, ETH_ALEN); + /* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */ + memcpy(header.addr1, dest, ETH_ALEN); + memcpy(header.addr2, src, ETH_ALEN); + memcpy(header.addr3, ieee->bssid, ETH_ALEN); } header.frame_ctl = cpu_to_le16(fc); hdr_len = IEEE80211_3ADDR_LEN; - /* Determine fragmentation size based on destination (multicast - * and broadcast are not fragmented) */ - if (is_multicast_ether_addr(dest) || is_broadcast_ether_addr(dest)) - frag_size = MAX_FRAG_THRESHOLD; - else - frag_size = ieee->fts; + /* Encrypt msdu first on the whole data packet. */ + if ((host_encrypt || host_encrypt_msdu) && + crypt && crypt->ops && crypt->ops->encrypt_msdu) { + int res = 0; + int len = bytes + hdr_len + crypt->ops->extra_msdu_prefix_len + + crypt->ops->extra_msdu_postfix_len; + struct sk_buff *skb_new = dev_alloc_skb(len); + + if (unlikely(!skb_new)) + goto failed; + + skb_reserve(skb_new, crypt->ops->extra_msdu_prefix_len); + memcpy(skb_put(skb_new, hdr_len), &header, hdr_len); + snapped = 1; + ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)), + ether_type); + memcpy(skb_put(skb_new, skb->len), skb->data, skb->len); + res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv); + if (res < 0) { + IEEE80211_ERROR("msdu encryption failed\n"); + dev_kfree_skb_any(skb_new); + goto failed; + } + dev_kfree_skb_any(skb); + skb = skb_new; + bytes += crypt->ops->extra_msdu_prefix_len + + crypt->ops->extra_msdu_postfix_len; + skb_pull(skb, hdr_len); + } - /* Determine amount of payload per fragment. Regardless of if - * this stack is providing the full 802.11 header, one will - * eventually be affixed to this fragment -- so we must account for - * it when determining the amount of payload space. */ - bytes_per_frag = frag_size - IEEE80211_3ADDR_LEN; - if (ieee->config & - (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) - bytes_per_frag -= IEEE80211_FCS_LEN; - - /* Each fragment may need to have room for encryptiong pre/postfix */ - if (encrypt) - bytes_per_frag -= crypt->ops->extra_prefix_len + - crypt->ops->extra_postfix_len; - - /* Number of fragments is the total bytes_per_frag / - * payload_per_fragment */ - nr_frags = bytes / bytes_per_frag; - bytes_last_frag = bytes % bytes_per_frag; - if (bytes_last_frag) + if (host_encrypt || ieee->host_open_frag) { + /* Determine fragmentation size based on destination (multicast + * and broadcast are not fragmented) */ + if (is_multicast_ether_addr(dest) || + is_broadcast_ether_addr(dest)) + frag_size = MAX_FRAG_THRESHOLD; + else + frag_size = ieee->fts; + + /* Determine amount of payload per fragment. Regardless of if + * this stack is providing the full 802.11 header, one will + * eventually be affixed to this fragment -- so we must account + * for it when determining the amount of payload space. */ + bytes_per_frag = frag_size - IEEE80211_3ADDR_LEN; + if (ieee->config & + (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) + bytes_per_frag -= IEEE80211_FCS_LEN; + + /* Each fragment may need to have room for encryptiong + * pre/postfix */ + if (host_encrypt) + bytes_per_frag -= crypt->ops->extra_mpdu_prefix_len + + crypt->ops->extra_mpdu_postfix_len; + + /* Number of fragments is the total + * bytes_per_frag / payload_per_fragment */ + nr_frags = bytes / bytes_per_frag; + bytes_last_frag = bytes % bytes_per_frag; + if (bytes_last_frag) + nr_frags++; + else + bytes_last_frag = bytes_per_frag; + } else { + nr_frags = 1; + bytes_per_frag = bytes_last_frag = bytes; + frag_size = bytes + IEEE80211_3ADDR_LEN; + } + + rts_required = (frag_size > ieee->rts + && ieee->config & CFG_IEEE80211_RTS); + if (rts_required) nr_frags++; - else - bytes_last_frag = bytes_per_frag; /* When we allocate the TXB we allocate enough space for the reserve * and full fragment bytes (bytes_per_frag doesn't include prefix, * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(nr_frags, frag_size, GFP_ATOMIC); + txb = ieee80211_alloc_txb(nr_frags, frag_size, + ieee->tx_headroom, GFP_ATOMIC); if (unlikely(!txb)) { printk(KERN_WARNING "%s: Could not allocate TXB\n", ieee->dev->name); goto failed; } txb->encrypted = encrypt; - txb->payload_size = bytes; + if (host_encrypt) + txb->payload_size = frag_size * (nr_frags - 1) + + bytes_last_frag; + else + txb->payload_size = bytes; + + if (rts_required) { + skb_frag = txb->fragments[0]; + frag_hdr = + (struct ieee80211_hdr_3addr *)skb_put(skb_frag, hdr_len); + + /* + * Set header frame_ctl to the RTS. + */ + header.frame_ctl = + cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); + memcpy(frag_hdr, &header, hdr_len); - for (i = 0; i < nr_frags; i++) { + /* + * Restore header frame_ctl to the original data setting. + */ + header.frame_ctl = cpu_to_le16(fc); + + if (ieee->config & + (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) + skb_put(skb_frag, 4); + + txb->rts_included = 1; + i = 1; + } else + i = 0; + + for (; i < nr_frags; i++) { skb_frag = txb->fragments[i]; - if (encrypt) - skb_reserve(skb_frag, crypt->ops->extra_prefix_len); + if (host_encrypt || host_build_iv) + skb_reserve(skb_frag, + crypt->ops->extra_mpdu_prefix_len); - frag_hdr = (struct ieee80211_hdr *)skb_put(skb_frag, hdr_len); + frag_hdr = + (struct ieee80211_hdr_3addr *)skb_put(skb_frag, hdr_len); memcpy(frag_hdr, &header, hdr_len); /* If this is not the last fragment, then add the MOREFRAGS @@ -379,11 +448,10 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) bytes = bytes_last_frag; } - /* Put a SNAP header on the first fragment */ - if (i == 0) { - ieee80211_put_snap(skb_put - (skb_frag, SNAP_SIZE + sizeof(u16)), - ether_type); + if (i == 0 && !snapped) { + ieee80211_copy_snap(skb_put + (skb_frag, SNAP_SIZE + sizeof(u16)), + ether_type); bytes -= SNAP_SIZE + sizeof(u16); } @@ -394,8 +462,19 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) /* Encryption routine will move the header forward in order * to insert the IV between the header and the payload */ - if (encrypt) + if (host_encrypt) ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len); + else if (host_build_iv) { + struct ieee80211_crypt_data *crypt; + + crypt = ieee->crypt[ieee->tx_keyidx]; + atomic_inc(&crypt->refcnt); + if (crypt->ops->build_iv) + crypt->ops->build_iv(skb_frag, hdr_len, + crypt->priv); + atomic_dec(&crypt->refcnt); + } + if (ieee->config & (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) skb_put(skb_frag, 4); @@ -407,11 +486,20 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); if (txb) { - if ((*ieee->hard_start_xmit) (txb, dev) == 0) { + int ret = (*ieee->hard_start_xmit) (txb, dev, priority); + if (ret == 0) { stats->tx_packets++; stats->tx_bytes += txb->payload_size; return 0; } + + if (ret == NETDEV_TX_BUSY) { + printk(KERN_ERR "%s: NETDEV_TX_BUSY returned; " + "driver should report queue full via " + "ieee_device->is_queue_full.\n", + ieee->dev->name); + } + ieee80211_txb_free(txb); } @@ -422,7 +510,72 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); stats->tx_errors++; return 1; +} + +/* Incoming 802.11 strucure is converted to a TXB + * a block of 802.11 fragment packets (stored as skbs) */ +int ieee80211_tx_frame(struct ieee80211_device *ieee, + struct ieee80211_hdr *frame, int len) +{ + struct ieee80211_txb *txb = NULL; + unsigned long flags; + struct net_device_stats *stats = &ieee->stats; + struct sk_buff *skb_frag; + int priority = -1; + + spin_lock_irqsave(&ieee->lock, flags); + /* If there is no driver handler to take the TXB, dont' bother + * creating it... */ + if (!ieee->hard_start_xmit) { + printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name); + goto success; + } + + if (unlikely(len < 24)) { + printk(KERN_WARNING "%s: skb too small (%d).\n", + ieee->dev->name, len); + goto success; + } + + /* When we allocate the TXB we allocate enough space for the reserve + * and full fragment bytes (bytes_per_frag doesn't include prefix, + * postfix, header, FCS, etc.) */ + txb = ieee80211_alloc_txb(1, len, ieee->tx_headroom, GFP_ATOMIC); + if (unlikely(!txb)) { + printk(KERN_WARNING "%s: Could not allocate TXB\n", + ieee->dev->name); + goto failed; + } + txb->encrypted = 0; + txb->payload_size = len; + + skb_frag = txb->fragments[0]; + + memcpy(skb_put(skb_frag, len), frame, len); + + if (ieee->config & + (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) + skb_put(skb_frag, 4); + + success: + spin_unlock_irqrestore(&ieee->lock, flags); + + if (txb) { + if ((*ieee->hard_start_xmit) (txb, ieee->dev, priority) == 0) { + stats->tx_packets++; + stats->tx_bytes += txb->payload_size; + return 0; + } + ieee80211_txb_free(txb); + } + return 0; + + failed: + spin_unlock_irqrestore(&ieee->lock, flags); + stats->tx_errors++; + return 1; } +EXPORT_SYMBOL(ieee80211_tx_frame); EXPORT_SYMBOL(ieee80211_txb_free); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 94882f39b07..1ce7af9bec3 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright(c) 2004 Intel Corporation. All rights reserved. + Copyright(c) 2004-2005 Intel Corporation. All rights reserved. Portions of this file are based on the WEP enablement code provided by the Host AP project hostap-drivers v0.1.3 @@ -32,6 +32,7 @@ #include <linux/kmod.h> #include <linux/module.h> +#include <linux/jiffies.h> #include <net/ieee80211.h> #include <linux/wireless.h> @@ -140,18 +141,41 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, start = iwe_stream_add_point(start, stop, &iwe, custom); /* Add quality statistics */ - /* TODO: Fix these values... */ iwe.cmd = IWEVQUAL; - iwe.u.qual.qual = network->stats.signal; - iwe.u.qual.level = network->stats.rssi; - iwe.u.qual.noise = network->stats.noise; - iwe.u.qual.updated = network->stats.mask & IEEE80211_STATMASK_WEMASK; - if (!(network->stats.mask & IEEE80211_STATMASK_RSSI)) - iwe.u.qual.updated |= IW_QUAL_LEVEL_INVALID; - if (!(network->stats.mask & IEEE80211_STATMASK_NOISE)) + iwe.u.qual.updated = IW_QUAL_QUAL_UPDATED | IW_QUAL_LEVEL_UPDATED | + IW_QUAL_NOISE_UPDATED; + + if (!(network->stats.mask & IEEE80211_STATMASK_RSSI)) { + iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID | + IW_QUAL_LEVEL_INVALID; + iwe.u.qual.qual = 0; + iwe.u.qual.level = 0; + } else { + iwe.u.qual.level = network->stats.rssi; + if (ieee->perfect_rssi == ieee->worst_rssi) + iwe.u.qual.qual = 100; + else + iwe.u.qual.qual = + (100 * + (ieee->perfect_rssi - ieee->worst_rssi) * + (ieee->perfect_rssi - ieee->worst_rssi) - + (ieee->perfect_rssi - network->stats.rssi) * + (15 * (ieee->perfect_rssi - ieee->worst_rssi) + + 62 * (ieee->perfect_rssi - network->stats.rssi))) / + ((ieee->perfect_rssi - ieee->worst_rssi) * + (ieee->perfect_rssi - ieee->worst_rssi)); + if (iwe.u.qual.qual > 100) + iwe.u.qual.qual = 100; + else if (iwe.u.qual.qual < 1) + iwe.u.qual.qual = 0; + } + + if (!(network->stats.mask & IEEE80211_STATMASK_NOISE)) { iwe.u.qual.updated |= IW_QUAL_NOISE_INVALID; - if (!(network->stats.mask & IEEE80211_STATMASK_SIGNAL)) - iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID; + iwe.u.qual.noise = 0; + } else { + iwe.u.qual.noise = network->stats.noise; + } start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN); @@ -162,7 +186,7 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, if (iwe.u.data.length) start = iwe_stream_add_point(start, stop, &iwe, custom); - if (ieee->wpa_enabled && network->wpa_ie_len) { + if (network->wpa_ie_len) { char buf[MAX_WPA_IE_LEN * 2 + 30]; u8 *p = buf; @@ -177,7 +201,7 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, start = iwe_stream_add_point(start, stop, &iwe, buf); } - if (ieee->wpa_enabled && network->rsn_ie_len) { + if (network->rsn_ie_len) { char buf[MAX_WPA_IE_LEN * 2 + 30]; u8 *p = buf; @@ -197,8 +221,8 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, iwe.cmd = IWEVCUSTOM; p = custom; p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), - " Last beacon: %lums ago", - (jiffies - network->last_scanned) / (HZ / 100)); + " Last beacon: %dms ago", + jiffies_to_msecs(jiffies - network->last_scanned)); iwe.u.data.length = p - custom; if (iwe.u.data.length) start = iwe_stream_add_point(start, stop, &iwe, custom); @@ -228,13 +252,13 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, ev = ipw2100_translate_scan(ieee, ev, stop, network); else IEEE80211_DEBUG_SCAN("Not showing network '%s (" - MAC_FMT ")' due to age (%lums).\n", + MAC_FMT ")' due to age (%dms).\n", escape_essid(network->ssid, network->ssid_len), MAC_ARG(network->bssid), - (jiffies - - network->last_scanned) / (HZ / - 100)); + jiffies_to_msecs(jiffies - + network-> + last_scanned)); } spin_unlock_irqrestore(&ieee->lock, flags); @@ -258,6 +282,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, }; int i, key, key_provided, len; struct ieee80211_crypt_data **crypt; + int host_crypto = ieee->host_encrypt || ieee->host_decrypt; IEEE80211_DEBUG_WX("SET_ENCODE\n"); @@ -298,15 +323,17 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, if (i == WEP_KEYS) { sec.enabled = 0; + sec.encrypt = 0; sec.level = SEC_LEVEL_0; - sec.flags |= SEC_ENABLED | SEC_LEVEL; + sec.flags |= SEC_ENABLED | SEC_LEVEL | SEC_ENCRYPT; } goto done; } sec.enabled = 1; - sec.flags |= SEC_ENABLED; + sec.encrypt = 1; + sec.flags |= SEC_ENABLED | SEC_ENCRYPT; if (*crypt != NULL && (*crypt)->ops != NULL && strcmp((*crypt)->ops->name, "WEP") != 0) { @@ -315,7 +342,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, ieee80211_crypt_delayed_deinit(ieee, crypt); } - if (*crypt == NULL) { + if (*crypt == NULL && host_crypto) { struct ieee80211_crypt_data *new_crypt; /* take WEP into use */ @@ -355,49 +382,56 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, key, escape_essid(sec.keys[key], len), erq->length, len); sec.key_sizes[key] = len; - (*crypt)->ops->set_key(sec.keys[key], len, NULL, - (*crypt)->priv); + if (*crypt) + (*crypt)->ops->set_key(sec.keys[key], len, NULL, + (*crypt)->priv); sec.flags |= (1 << key); /* This ensures a key will be activated if no key is * explicitely set */ if (key == sec.active_key) sec.flags |= SEC_ACTIVE_KEY; + } else { - len = (*crypt)->ops->get_key(sec.keys[key], WEP_KEY_LEN, - NULL, (*crypt)->priv); - if (len == 0) { - /* Set a default key of all 0 */ - IEEE80211_DEBUG_WX("Setting key %d to all zero.\n", - key); - memset(sec.keys[key], 0, 13); - (*crypt)->ops->set_key(sec.keys[key], 13, NULL, - (*crypt)->priv); - sec.key_sizes[key] = 13; - sec.flags |= (1 << key); + if (host_crypto) { + len = (*crypt)->ops->get_key(sec.keys[key], WEP_KEY_LEN, + NULL, (*crypt)->priv); + if (len == 0) { + /* Set a default key of all 0 */ + IEEE80211_DEBUG_WX("Setting key %d to all " + "zero.\n", key); + memset(sec.keys[key], 0, 13); + (*crypt)->ops->set_key(sec.keys[key], 13, NULL, + (*crypt)->priv); + sec.key_sizes[key] = 13; + sec.flags |= (1 << key); + } } - /* No key data - just set the default TX key index */ if (key_provided) { - IEEE80211_DEBUG_WX - ("Setting key %d to default Tx key.\n", key); + IEEE80211_DEBUG_WX("Setting key %d to default Tx " + "key.\n", key); ieee->tx_keyidx = key; sec.active_key = key; sec.flags |= SEC_ACTIVE_KEY; } } - - done: - ieee->open_wep = !(erq->flags & IW_ENCODE_RESTRICTED); - sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN : WLAN_AUTH_SHARED_KEY; - sec.flags |= SEC_AUTH_MODE; - IEEE80211_DEBUG_WX("Auth: %s\n", sec.auth_mode == WLAN_AUTH_OPEN ? - "OPEN" : "SHARED KEY"); + if (erq->flags & (IW_ENCODE_OPEN | IW_ENCODE_RESTRICTED)) { + ieee->open_wep = !(erq->flags & IW_ENCODE_RESTRICTED); + sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN : + WLAN_AUTH_SHARED_KEY; + sec.flags |= SEC_AUTH_MODE; + IEEE80211_DEBUG_WX("Auth: %s\n", + sec.auth_mode == WLAN_AUTH_OPEN ? + "OPEN" : "SHARED KEY"); + } /* For now we just support WEP, so only set that security level... * TODO: When WPA is added this is one place that needs to change */ sec.flags |= SEC_LEVEL; sec.level = SEC_LEVEL_1; /* 40 and 104 bit WEP */ + sec.encode_alg[key] = SEC_ALG_WEP; + done: if (ieee->set_security) ieee->set_security(dev, &sec); @@ -422,6 +456,7 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee, struct iw_point *erq = &(wrqu->encoding); int len, key; struct ieee80211_crypt_data *crypt; + struct ieee80211_security *sec = &ieee->sec; IEEE80211_DEBUG_WX("GET_ENCODE\n"); @@ -436,23 +471,16 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee, crypt = ieee->crypt[key]; erq->flags = key + 1; - if (crypt == NULL || crypt->ops == NULL) { + if (!sec->enabled) { erq->length = 0; erq->flags |= IW_ENCODE_DISABLED; return 0; } - if (strcmp(crypt->ops->name, "WEP") != 0) { - /* only WEP is supported with wireless extensions, so just - * report that encryption is used */ - erq->length = 0; - erq->flags |= IW_ENCODE_ENABLED; - return 0; - } + len = sec->key_sizes[key]; + memcpy(keybuf, sec->keys[key], len); - len = crypt->ops->get_key(keybuf, WEP_KEY_LEN, NULL, crypt->priv); erq->length = (len >= 0 ? len : 0); - erq->flags |= IW_ENCODE_ENABLED; if (ieee->open_wep) @@ -463,6 +491,240 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee, return 0; } +int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct net_device *dev = ieee->dev; + struct iw_point *encoding = &wrqu->encoding; + struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; + int i, idx, ret = 0; + int group_key = 0; + const char *alg, *module; + struct ieee80211_crypto_ops *ops; + struct ieee80211_crypt_data **crypt; + + struct ieee80211_security sec = { + .flags = 0, + }; + + idx = encoding->flags & IW_ENCODE_INDEX; + if (idx) { + if (idx < 1 || idx > WEP_KEYS) + return -EINVAL; + idx--; + } else + idx = ieee->tx_keyidx; + + if (ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) { + crypt = &ieee->crypt[idx]; + group_key = 1; + } else { + if (idx != 0) + return -EINVAL; + if (ieee->iw_mode == IW_MODE_INFRA) + crypt = &ieee->crypt[idx]; + else + return -EINVAL; + } + + sec.flags |= SEC_ENABLED | SEC_ENCRYPT; + if ((encoding->flags & IW_ENCODE_DISABLED) || + ext->alg == IW_ENCODE_ALG_NONE) { + if (*crypt) + ieee80211_crypt_delayed_deinit(ieee, crypt); + + for (i = 0; i < WEP_KEYS; i++) + if (ieee->crypt[i] != NULL) + break; + + if (i == WEP_KEYS) { + sec.enabled = 0; + sec.encrypt = 0; + sec.level = SEC_LEVEL_0; + sec.flags |= SEC_LEVEL; + } + goto done; + } + + sec.enabled = 1; + sec.encrypt = 1; + + if (group_key ? !ieee->host_mc_decrypt : + !(ieee->host_encrypt || ieee->host_decrypt || + ieee->host_encrypt_msdu)) + goto skip_host_crypt; + + switch (ext->alg) { + case IW_ENCODE_ALG_WEP: + alg = "WEP"; + module = "ieee80211_crypt_wep"; + break; + case IW_ENCODE_ALG_TKIP: + alg = "TKIP"; + module = "ieee80211_crypt_tkip"; + break; + case IW_ENCODE_ALG_CCMP: + alg = "CCMP"; + module = "ieee80211_crypt_ccmp"; + break; + default: + IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", + dev->name, ext->alg); + ret = -EINVAL; + goto done; + } + + ops = ieee80211_get_crypto_ops(alg); + if (ops == NULL) { + request_module(module); + ops = ieee80211_get_crypto_ops(alg); + } + if (ops == NULL) { + IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", + dev->name, ext->alg); + ret = -EINVAL; + goto done; + } + + if (*crypt == NULL || (*crypt)->ops != ops) { + struct ieee80211_crypt_data *new_crypt; + + ieee80211_crypt_delayed_deinit(ieee, crypt); + + new_crypt = (struct ieee80211_crypt_data *) + kmalloc(sizeof(*new_crypt), GFP_KERNEL); + if (new_crypt == NULL) { + ret = -ENOMEM; + goto done; + } + memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data)); + new_crypt->ops = ops; + if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) + new_crypt->priv = new_crypt->ops->init(idx); + if (new_crypt->priv == NULL) { + kfree(new_crypt); + ret = -EINVAL; + goto done; + } + *crypt = new_crypt; + } + + if (ext->key_len > 0 && (*crypt)->ops->set_key && + (*crypt)->ops->set_key(ext->key, ext->key_len, ext->rx_seq, + (*crypt)->priv) < 0) { + IEEE80211_DEBUG_WX("%s: key setting failed\n", dev->name); + ret = -EINVAL; + goto done; + } + + skip_host_crypt: + if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) { + ieee->tx_keyidx = idx; + sec.active_key = idx; + sec.flags |= SEC_ACTIVE_KEY; + } + + if (ext->alg != IW_ENCODE_ALG_NONE) { + memcpy(sec.keys[idx], ext->key, ext->key_len); + sec.key_sizes[idx] = ext->key_len; + sec.flags |= (1 << idx); + if (ext->alg == IW_ENCODE_ALG_WEP) { + sec.encode_alg[idx] = SEC_ALG_WEP; + sec.flags |= SEC_LEVEL; + sec.level = SEC_LEVEL_1; + } else if (ext->alg == IW_ENCODE_ALG_TKIP) { + sec.encode_alg[idx] = SEC_ALG_TKIP; + sec.flags |= SEC_LEVEL; + sec.level = SEC_LEVEL_2; + } else if (ext->alg == IW_ENCODE_ALG_CCMP) { + sec.encode_alg[idx] = SEC_ALG_CCMP; + sec.flags |= SEC_LEVEL; + sec.level = SEC_LEVEL_3; + } + /* Don't set sec level for group keys. */ + if (group_key) + sec.flags &= ~SEC_LEVEL; + } + done: + if (ieee->set_security) + ieee->set_security(ieee->dev, &sec); + + /* + * Do not reset port if card is in Managed mode since resetting will + * generate new IEEE 802.11 authentication which may end up in looping + * with IEEE 802.1X. If your hardware requires a reset after WEP + * configuration (for example... Prism2), implement the reset_port in + * the callbacks structures used to initialize the 802.11 stack. + */ + if (ieee->reset_on_keychange && + ieee->iw_mode != IW_MODE_INFRA && + ieee->reset_port && ieee->reset_port(dev)) { + IEEE80211_DEBUG_WX("%s: reset_port failed\n", dev->name); + return -EINVAL; + } + + return ret; +} + +int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct iw_point *encoding = &wrqu->encoding; + struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; + struct ieee80211_security *sec = &ieee->sec; + int idx, max_key_len; + + max_key_len = encoding->length - sizeof(*ext); + if (max_key_len < 0) + return -EINVAL; + + idx = encoding->flags & IW_ENCODE_INDEX; + if (idx) { + if (idx < 1 || idx > WEP_KEYS) + return -EINVAL; + idx--; + } else + idx = ieee->tx_keyidx; + + if (!ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) + if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) + return -EINVAL; + + encoding->flags = idx + 1; + memset(ext, 0, sizeof(*ext)); + + if (!sec->enabled) { + ext->alg = IW_ENCODE_ALG_NONE; + ext->key_len = 0; + encoding->flags |= IW_ENCODE_DISABLED; + } else { + if (sec->encode_alg[idx] == SEC_ALG_WEP) + ext->alg = IW_ENCODE_ALG_WEP; + else if (sec->encode_alg[idx] == SEC_ALG_TKIP) + ext->alg = IW_ENCODE_ALG_TKIP; + else if (sec->encode_alg[idx] == SEC_ALG_CCMP) + ext->alg = IW_ENCODE_ALG_CCMP; + else + return -EINVAL; + + ext->key_len = sec->key_sizes[idx]; + memcpy(ext->key, sec->keys[idx], ext->key_len); + encoding->flags |= IW_ENCODE_ENABLED; + if (ext->key_len && + (ext->alg == IW_ENCODE_ALG_TKIP || + ext->alg == IW_ENCODE_ALG_CCMP)) + ext->ext_flags |= IW_ENCODE_EXT_TX_SEQ_VALID; + + } + + return 0; +} + +EXPORT_SYMBOL(ieee80211_wx_set_encodeext); +EXPORT_SYMBOL(ieee80211_wx_get_encodeext); + EXPORT_SYMBOL(ieee80211_wx_get_scan); EXPORT_SYMBOL(ieee80211_wx_set_encode); EXPORT_SYMBOL(ieee80211_wx_get_encode); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 74f2207e131..4ec4b2ca6ab 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { + u32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); @@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) if ((dev->flags & IFF_BROADCAST) && (ifa->ifa_prefixlen < 31) && (ifa->ifa_broadcast == - (ifa->ifa_local|~ifa->ifa_mask))) { + (ifa->ifa_local|~old_mask))) { ifa->ifa_broadcast = (ifa->ifa_local | ~sin->sin_addr.s_addr); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0093ea08c7f..66247f38b37 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) prefix = htonl(l->key); list_for_each_entry_rcu(fa, &li->falh, fa_list) { - const struct fib_info *fi = rcu_dereference(fa->fa_info); + const struct fib_info *fi = fa->fa_info; unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); if (fa->fa_type == RTN_BROADCAST diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 90dca711ac9..175e093ec56 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops) struct inet_sock *inet; int i; - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { int err; - if (!cpu_possible(i)) - continue; - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, &per_cpu(__icmp_socket, i)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1ad5202e556..17758234a3e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb) { IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size) + if (skb->len > dst_mtu(skb->dst) && + !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) return ip_fragment(skb, ip_finish_output); else return ip_finish_output(skb); @@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy) return csum; } +inline int ip_ufo_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int hh_len, int fragheaderlen, + int transhdrlen, int mtu,unsigned int flags) +{ + struct sk_buff *skb; + int err; + + /* There is support for UDP fragmentation offload by network + * device, so create one single skb packet containing complete + * udp datagram + */ + if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = sock_alloc_send_skb(sk, + hh_len + fragheaderlen + transhdrlen + 20, + (flags & MSG_DONTWAIT), &err); + + if (skb == NULL) + return err; + + /* reserve space for Hardware header */ + skb_reserve(skb, hh_len); + + /* create space for UDP/IP header */ + skb_put(skb,fragheaderlen + transhdrlen); + + /* initialize network header pointer */ + skb->nh.raw = skb->data; + + /* initialize protocol header pointer */ + skb->h.raw = skb->data + fragheaderlen; + + skb->ip_summed = CHECKSUM_HW; + skb->csum = 0; + sk->sk_sndmsg_off = 0; + } + + err = skb_append_datato_frags(sk,skb, getfrag, from, + (length - transhdrlen)); + if (!err) { + /* specify the length of each IP datagram fragment*/ + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; + } + /* There is not enough support do UFO , + * so follow normal path + */ + kfree_skb(skb); + return err; +} + /* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket @@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk, csummode = CHECKSUM_HW; inet->cork.length += length; + if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + if(ip_ufo_append_data(sk, getfrag, from, length, hh_len, + fragheaderlen, transhdrlen, mtu, flags)) + goto error; + + return 0; + } /* So, what's going on in the loop below? * @@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return -EINVAL; inet->cork.length += size; + if ((sk->sk_protocol == IPPROTO_UDP) && + (rt->u.dst.dev->features & NETIF_F_UFO)) + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + while (size > 0) { int i; - /* Check if the remaining data fits into current packet. */ - len = mtu - skb->len; - if (len < size) - len = maxfraglen - skb->len; + if (skb_shinfo(skb)->ufo_size) + len = size; + else { + + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; + } if (len <= 0) { struct sk_buff *skb_prev; char *data; @@ -1023,10 +1096,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, int alloclen; skb_prev = skb; - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; - else - fraggap = 0; + fraggap = skb_prev->len - maxfraglen; alloclen = fragheaderlen + hh_len + fraggap + 15; skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 07a80b56e8d..422ab68ee7f 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -50,7 +50,7 @@ #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/listhelp.h> -#define IP_CONNTRACK_VERSION "2.3" +#define IP_CONNTRACK_VERSION "2.4" #if 0 #define DEBUGP printk @@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); static int ip_conntrack_hash_rnd_initted; static unsigned int ip_conntrack_hash_rnd; -static u_int32_t -hash_conntrack(const struct ip_conntrack_tuple *tuple) +static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, + unsigned int size, unsigned int rnd) { -#if 0 - dump_tuple(tuple); -#endif return (jhash_3words(tuple->src.ip, (tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), - ip_conntrack_hash_rnd) % ip_conntrack_htable_size); + rnd) % size); +} + +static u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ + return __hash_conntrack(tuple, ip_conntrack_htable_size, + ip_conntrack_hash_rnd); } int @@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data) return 1; } -static void free_conntrack_hash(void) +static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) { - if (ip_conntrack_vmalloc) - vfree(ip_conntrack_hash); + if (vmalloced) + vfree(hash); else - free_pages((unsigned long)ip_conntrack_hash, - get_order(sizeof(struct list_head) - * ip_conntrack_htable_size)); + free_pages((unsigned long)hash, + get_order(sizeof(struct list_head) * size)); } void ip_conntrack_flush() @@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void) ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); - free_conntrack_hash(); + free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, + ip_conntrack_htable_size); nf_unregister_sockopt(&so_getorigdst); } -static int hashsize; -module_param(hashsize, int, 0400); +static struct list_head *alloc_hashtable(int size, int *vmalloced) +{ + struct list_head *hash; + unsigned int i; + + *vmalloced = 0; + hash = (void*)__get_free_pages(GFP_KERNEL, + get_order(sizeof(struct list_head) + * size)); + if (!hash) { + *vmalloced = 1; + printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); + hash = vmalloc(sizeof(struct list_head) * size); + } + + if (hash) + for (i = 0; i < size; i++) + INIT_LIST_HEAD(&hash[i]); + + return hash; +} + +int set_hashsize(const char *val, struct kernel_param *kp) +{ + int i, bucket, hashsize, vmalloced; + int old_vmalloced, old_size; + int rnd; + struct list_head *hash, *old_hash; + struct ip_conntrack_tuple_hash *h; + + /* On boot, we can set this without any fancy locking. */ + if (!ip_conntrack_htable_size) + return param_set_int(val, kp); + + hashsize = simple_strtol(val, NULL, 0); + if (!hashsize) + return -EINVAL; + + hash = alloc_hashtable(hashsize, &vmalloced); + if (!hash) + return -ENOMEM; + + /* We have to rehash for the new table anyway, so we also can + * use a new random seed */ + get_random_bytes(&rnd, 4); + + write_lock_bh(&ip_conntrack_lock); + for (i = 0; i < ip_conntrack_htable_size; i++) { + while (!list_empty(&ip_conntrack_hash[i])) { + h = list_entry(ip_conntrack_hash[i].next, + struct ip_conntrack_tuple_hash, list); + list_del(&h->list); + bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + list_add_tail(&h->list, &hash[bucket]); + } + } + old_size = ip_conntrack_htable_size; + old_vmalloced = ip_conntrack_vmalloc; + old_hash = ip_conntrack_hash; + + ip_conntrack_htable_size = hashsize; + ip_conntrack_vmalloc = vmalloced; + ip_conntrack_hash = hash; + ip_conntrack_hash_rnd = rnd; + write_unlock_bh(&ip_conntrack_lock); + + free_conntrack_hash(old_hash, old_vmalloced, old_size); + return 0; +} + +module_param_call(hashsize, set_hashsize, param_get_uint, + &ip_conntrack_htable_size, 0600); int __init ip_conntrack_init(void) { @@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void) /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ - if (hashsize) { - ip_conntrack_htable_size = hashsize; - } else { + if (!ip_conntrack_htable_size) { ip_conntrack_htable_size = (((num_physpages << PAGE_SHIFT) / 16384) / sizeof(struct list_head)); @@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void) return ret; } - /* AK: the hash table is twice as big than needed because it - uses list_head. it would be much nicer to caches to use a - single pointer list head here. */ - ip_conntrack_vmalloc = 0; - ip_conntrack_hash - =(void*)__get_free_pages(GFP_KERNEL, - get_order(sizeof(struct list_head) - *ip_conntrack_htable_size)); - if (!ip_conntrack_hash) { - ip_conntrack_vmalloc = 1; - printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n"); - ip_conntrack_hash = vmalloc(sizeof(struct list_head) - * ip_conntrack_htable_size); - } + ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size, + &ip_conntrack_vmalloc); if (!ip_conntrack_hash) { printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); goto err_unreg_sockopt; @@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void) ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; write_unlock_bh(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) - INIT_LIST_HEAD(&ip_conntrack_hash[i]); - /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; @@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void) err_free_conntrack_slab: kmem_cache_destroy(ip_conntrack_cachep); err_free_hash: - free_conntrack_hash(); + free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, + ip_conntrack_htable_size); err_unreg_sockopt: nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f7943ba1f43..a65e508fbd4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -90,9 +90,7 @@ fold_field(void *mib[], int offt) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b7185fb3377..23e540365a1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -700,10 +700,7 @@ int __init icmpv6_init(struct net_proto_family *ops) struct sock *sk; int err, i, j; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - + for_each_cpu(i) { err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &per_cpu(__icmpv6_socket, i)); if (err < 0) { @@ -749,9 +746,7 @@ void icmpv6_cleanup(void) { int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { sock_release(per_cpu(__icmpv6_socket, i)); } inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 563b442ffab..614296a920c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -147,7 +147,8 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { - if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst)) + if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || + dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else return ip6_output2(skb); @@ -768,6 +769,65 @@ out_err_release: *dst = NULL; return err; } +inline int ip6_ufo_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int hh_len, int fragheaderlen, + int transhdrlen, int mtu,unsigned int flags) + +{ + struct sk_buff *skb; + int err; + + /* There is support for UDP large send offload by network + * device, so create one single skb packet containing complete + * udp datagram + */ + if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = sock_alloc_send_skb(sk, + hh_len + fragheaderlen + transhdrlen + 20, + (flags & MSG_DONTWAIT), &err); + if (skb == NULL) + return -ENOMEM; + + /* reserve space for Hardware header */ + skb_reserve(skb, hh_len); + + /* create space for UDP/IP header */ + skb_put(skb,fragheaderlen + transhdrlen); + + /* initialize network header pointer */ + skb->nh.raw = skb->data; + + /* initialize protocol header pointer */ + skb->h.raw = skb->data + fragheaderlen; + + skb->ip_summed = CHECKSUM_HW; + skb->csum = 0; + sk->sk_sndmsg_off = 0; + } + + err = skb_append_datato_frags(sk,skb, getfrag, from, + (length - transhdrlen)); + if (!err) { + struct frag_hdr fhdr; + + /* specify the length of each IP datagram fragment*/ + skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - + sizeof(struct frag_hdr); + ipv6_select_ident(skb, &fhdr); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + __skb_queue_tail(&sk->sk_write_queue, skb); + + return 0; + } + /* There is not enough support do UPD LSO, + * so follow normal path + */ + kfree_skb(skb); + + return err; +} int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), @@ -860,6 +920,15 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, */ inet->cork.length += length; + if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && + (rt->u.dst.dev->features & NETIF_F_UFO)) { + + if(ip6_ufo_append_data(sk, getfrag, from, length, hh_len, + fragheaderlen, transhdrlen, mtu, flags)) + goto error; + + return 0; + } if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 39a96c76810..c4f2a0ef748 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -164,7 +164,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) #define MLDV2_EXP(thresh, nbmant, nbexp, value) \ ((value) < (thresh) ? (value) : \ - ((MLDV2_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \ + ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \ (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp)))) #define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 334a5967831..50a13e75d70 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -140,9 +140,7 @@ fold_field(void *mib[], int offt) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt); res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 678c3f2c0d0..5ca283537bc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -740,11 +740,8 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) { - struct netlink_sock *nlk; int len = skb->len; - nlk = nlk_sk(sk); - skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, len); sock_put(sk); @@ -827,7 +824,7 @@ struct netlink_broadcast_data { int failure; int congested; int delivered; - unsigned int allocation; + gfp_t allocation; struct sk_buff *skb, *skb2; }; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index e556d92c0bc..b18fe504301 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -727,7 +727,7 @@ int rose_rt_ioctl(unsigned int cmd, void __user *arg) } if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ return -EINVAL; - if (rose_route.ndigis > 8) /* No more than 8 digipeats */ + if (rose_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; err = rose_add_node(&rose_route, dev); dev_put(dev); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index b74f7772b57..6e4dc28874d 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -69,9 +69,7 @@ fold_field(void *mib[], int nr) unsigned long res = 0; int i; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; + for_each_cpu(i) { res += *((unsigned long *) (((void *) per_cpu_ptr(mib[0], i)) + sizeof (unsigned long) * nr)); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 46a2ce00a29..cdcab9ca4c6 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ -sunrpc-y := clnt.o xprt.o sched.o \ +sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ pmap_clnt.o timer.o xdr.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 505e2d4b3d6..a415d99c394 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/errno.h> -#include <linux/socket.h> #include <linux/sunrpc/clnt.h> #include <linux/spinlock.h> diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index fe1b874084b..f3431a7e33d 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o + gss_krb5_seqnum.o gss_krb5_wrap.o obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 2f7b867161d..f44f46f1d8e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -42,9 +42,8 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/slab.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/sched.h> +#include <linux/pagemap.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/auth_gss.h> @@ -846,10 +845,8 @@ gss_marshal(struct rpc_task *task, u32 *p) /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ - iov.iov_base = req->rq_snd_buf.head[0].iov_base; - if (task->tk_client->cl_xprt->stream) - /* See clnt.c:call_header() */ - iov.iov_base += 4; + iov.iov_base = xprt_skip_transport_header(task->tk_xprt, + req->rq_snd_buf.head[0].iov_base); iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); @@ -857,9 +854,7 @@ gss_marshal(struct rpc_task *task, u32 *p) *p++ = htonl(RPC_AUTH_GSS); mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx->gc_gss_ctx, - GSS_C_QOP_DEFAULT, - &verf_buf, &mic); + maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; } else if (maj_stat != 0) { @@ -890,10 +885,8 @@ static u32 * gss_validate(struct rpc_task *task, u32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; - struct gss_cred *gss_cred = container_of(cred, struct gss_cred, - gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 seq, qop_state; + u32 seq; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -914,23 +907,14 @@ gss_validate(struct rpc_task *task, u32 *p) mic.data = (u8 *)p; mic.len = len; - maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat) goto out_bad; - switch (gss_cred->gc_service) { - case RPC_GSS_SVC_NONE: - /* verifier data, flavor, length: */ - task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; - break; - case RPC_GSS_SVC_INTEGRITY: - /* verifier data, flavor, length, length, sequence number: */ - task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; - break; - case RPC_GSS_SVC_PRIVACY: - goto out_bad; - } + /* We leave it to unwrap to calculate au_rslack. For now we just + * calculate the length of the verifier: */ + task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", task->tk_pid); @@ -975,8 +959,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, p = iov->iov_base + iov->iov_len; mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx->gc_gss_ctx, - GSS_C_QOP_DEFAULT, &integ_buf, &mic); + maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); status = -EIO; /* XXX? */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; @@ -990,6 +973,113 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, return 0; } +static void +priv_release_snd_buf(struct rpc_rqst *rqstp) +{ + int i; + + for (i=0; i < rqstp->rq_enc_pages_num; i++) + __free_page(rqstp->rq_enc_pages[i]); + kfree(rqstp->rq_enc_pages); +} + +static int +alloc_enc_pages(struct rpc_rqst *rqstp) +{ + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; + int first, last, i; + + if (snd_buf->page_len == 0) { + rqstp->rq_enc_pages_num = 0; + return 0; + } + + first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; + rqstp->rq_enc_pages_num = last - first + 1 + 1; + rqstp->rq_enc_pages + = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), + GFP_NOFS); + if (!rqstp->rq_enc_pages) + goto out; + for (i=0; i < rqstp->rq_enc_pages_num; i++) { + rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); + if (rqstp->rq_enc_pages[i] == NULL) + goto out_free; + } + rqstp->rq_release_snd_buf = priv_release_snd_buf; + return 0; +out_free: + for (i--; i >= 0; i--) { + __free_page(rqstp->rq_enc_pages[i]); + } +out: + return -EAGAIN; +} + +static inline int +gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) +{ + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; + u32 offset; + u32 maj_stat; + int status; + u32 *opaque_len; + struct page **inpages; + int first; + int pad; + struct kvec *iov; + char *tmp; + + opaque_len = p++; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + *p++ = htonl(rqstp->rq_seqno); + + status = encode(rqstp, p, obj); + if (status) + return status; + + status = alloc_enc_pages(rqstp); + if (status) + return status; + first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + inpages = snd_buf->pages + first; + snd_buf->pages = rqstp->rq_enc_pages; + snd_buf->page_base -= first << PAGE_CACHE_SHIFT; + /* Give the tail its own page, in case we need extra space in the + * head when wrapping: */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) { + tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); + memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); + snd_buf->tail[0].iov_base = tmp; + } + maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); + /* RPC_SLACK_SPACE should prevent this ever happening: */ + BUG_ON(snd_buf->len > snd_buf->buflen); + status = -EIO; + /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was + * done anyway, so it's safe to put the request on the wire: */ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + else if (maj_stat) + return status; + + *opaque_len = htonl(snd_buf->len - offset); + /* guess whether we're in the head or the tail: */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) + iov = snd_buf->tail; + else + iov = snd_buf->head; + p = iov->iov_base + iov->iov_len; + pad = 3 - ((snd_buf->len - offset - 1) & 3); + memset(p, 0, pad); + iov->iov_len += pad; + snd_buf->len += pad; + + return 0; +} + static int gss_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *p, void *obj) @@ -1017,6 +1107,8 @@ gss_wrap_req(struct rpc_task *task, rqstp, p, obj); break; case RPC_GSS_SVC_PRIVACY: + status = gss_wrap_req_priv(cred, ctx, encode, + rqstp, p, obj); break; } out: @@ -1054,8 +1146,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) return status; - maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, - &mic, NULL); + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat != GSS_S_COMPLETE) @@ -1063,6 +1154,35 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, return 0; } +static inline int +gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_rqst *rqstp, u32 **p) +{ + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; + u32 offset; + u32 opaque_len; + u32 maj_stat; + int status = -EIO; + + opaque_len = ntohl(*(*p)++); + offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; + if (offset + opaque_len > rcv_buf->len) + return status; + /* remove padding: */ + rcv_buf->len = offset + opaque_len; + + maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + if (maj_stat != GSS_S_COMPLETE) + return status; + if (ntohl(*(*p)++) != rqstp->rq_seqno) + return status; + + return 0; +} + + static int gss_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *p, void *obj) @@ -1071,6 +1191,9 @@ gss_unwrap_resp(struct rpc_task *task, struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + u32 *savedp = p; + struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; + int savedlen = head->iov_len; int status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) @@ -1084,8 +1207,14 @@ gss_unwrap_resp(struct rpc_task *task, goto out; break; case RPC_GSS_SVC_PRIVACY: + status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); + if (status) + goto out; break; } + /* take into account extra slack for integrity and privacy cases: */ + task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) + + (savedlen - head->iov_len); out_decode: status = decode(rqstp, p, obj); out: diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index ee6ae74cd1b..3f3d5437f02 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -139,17 +139,91 @@ buf_to_sg(struct scatterlist *sg, char *ptr, int len) { sg->length = len; } +static int +process_xdr_buf(struct xdr_buf *buf, int offset, int len, + int (*actor)(struct scatterlist *, void *), void *data) +{ + int i, page_len, thislen, page_offset, ret = 0; + struct scatterlist sg[1]; + + if (offset >= buf->head[0].iov_len) { + offset -= buf->head[0].iov_len; + } else { + thislen = buf->head[0].iov_len - offset; + if (thislen > len) + thislen = len; + buf_to_sg(sg, buf->head[0].iov_base + offset, thislen); + ret = actor(sg, data); + if (ret) + goto out; + offset = 0; + len -= thislen; + } + if (len == 0) + goto out; + + if (offset >= buf->page_len) { + offset -= buf->page_len; + } else { + page_len = buf->page_len - offset; + if (page_len > len) + page_len = len; + len -= page_len; + page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); + i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; + thislen = PAGE_CACHE_SIZE - page_offset; + do { + if (thislen > page_len) + thislen = page_len; + sg->page = buf->pages[i]; + sg->offset = page_offset; + sg->length = thislen; + ret = actor(sg, data); + if (ret) + goto out; + page_len -= thislen; + i++; + page_offset = 0; + thislen = PAGE_CACHE_SIZE; + } while (page_len != 0); + offset = 0; + } + if (len == 0) + goto out; + + if (offset < buf->tail[0].iov_len) { + thislen = buf->tail[0].iov_len - offset; + if (thislen > len) + thislen = len; + buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen); + ret = actor(sg, data); + len -= thislen; + } + if (len != 0) + ret = -EINVAL; +out: + return ret; +} + +static int +checksummer(struct scatterlist *sg, void *data) +{ + struct crypto_tfm *tfm = (struct crypto_tfm *)data; + + crypto_digest_update(tfm, sg, 1); + + return 0; +} + /* checksum the plaintext data and hdrlen bytes of the token header */ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum) + int body_offset, struct xdr_netobj *cksum) { char *cksumname; struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ struct scatterlist sg[1]; u32 code = GSS_S_FAILURE; - int len, thislen, offset; - int i; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -169,33 +243,8 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, crypto_digest_init(tfm); buf_to_sg(sg, header, hdrlen); crypto_digest_update(tfm, sg, 1); - if (body->head[0].iov_len) { - buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); - crypto_digest_update(tfm, sg, 1); - } - - len = body->page_len; - if (len != 0) { - offset = body->page_base & (PAGE_CACHE_SIZE - 1); - i = body->page_base >> PAGE_CACHE_SHIFT; - thislen = PAGE_CACHE_SIZE - offset; - do { - if (thislen > len) - thislen = len; - sg->page = body->pages[i]; - sg->offset = offset; - sg->length = thislen; - crypto_digest_update(tfm, sg, 1); - len -= thislen; - i++; - offset = 0; - thislen = PAGE_CACHE_SIZE; - } while(len != 0); - } - if (body->tail[0].iov_len) { - buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); - crypto_digest_update(tfm, sg, 1); - } + process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, tfm); crypto_digest_final(tfm, cksum->data); code = 0; out: @@ -204,3 +253,154 @@ out: } EXPORT_SYMBOL(make_checksum); + +struct encryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + int pos; + struct xdr_buf *outbuf; + struct page **pages; + struct scatterlist infrags[4]; + struct scatterlist outfrags[4]; + int fragno; + int fraglen; +}; + +static int +encryptor(struct scatterlist *sg, void *data) +{ + struct encryptor_desc *desc = data; + struct xdr_buf *outbuf = desc->outbuf; + struct page *in_page; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + int page_pos; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->infrags[desc->fragno] = *sg; + desc->outfrags[desc->fragno] = *sg; + + page_pos = desc->pos - outbuf->head[0].iov_len; + if (page_pos >= 0 && page_pos < outbuf->page_len) { + /* pages are not in place: */ + int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; + in_page = desc->pages[i]; + } else { + in_page = sg->page; + } + desc->infrags[desc->fragno].page = in_page; + desc->fragno++; + desc->fraglen += sg->length; + desc->pos += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->outfrags[0].page = sg->page; + desc->outfrags[0].offset = sg->offset + sg->length - fraglen; + desc->outfrags[0].length = fraglen; + desc->infrags[0] = desc->outfrags[0]; + desc->infrags[0].page = in_page; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, + struct page **pages) +{ + int ret; + struct encryptor_desc desc; + + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.pos = offset; + desc.outbuf = buf; + desc.pages = pages; + desc.fragno = 0; + desc.fraglen = 0; + + ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); + return ret; +} + +EXPORT_SYMBOL(gss_encrypt_xdr_buf); + +struct decryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + struct scatterlist frags[4]; + int fragno; + int fraglen; +}; + +static int +decryptor(struct scatterlist *sg, void *data) +{ + struct decryptor_desc *desc = data; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->frags[desc->fragno] = *sg; + desc->fragno++; + desc->fraglen += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->frags[0].page = sg->page; + desc->frags[0].offset = sg->offset + sg->length - fraglen; + desc->frags[0].length = fraglen; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +{ + struct decryptor_desc desc; + + /* XXXJBF: */ + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.fragno = 0; + desc.fraglen = 0; + return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); +} + +EXPORT_SYMBOL(gss_decrypt_xdr_buf); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 606a8a82caf..5f1f806a0b1 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -39,7 +39,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/sunrpc/auth.h> -#include <linux/in.h> #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/xdr.h> #include <linux/crypto.h> @@ -191,43 +190,12 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { kfree(kctx); } -static u32 -gss_verify_mic_kerberos(struct gss_ctx *ctx, - struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate) { - u32 maj_stat = 0; - int qop_state; - struct krb5_ctx *kctx = ctx->internal_ctx_id; - - maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, - KG_TOK_MIC_MSG); - if (!maj_stat && qop_state) - *qstate = qop_state; - - dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); - return maj_stat; -} - -static u32 -gss_get_mic_kerberos(struct gss_ctx *ctx, - u32 qop, - struct xdr_buf *message, - struct xdr_netobj *mic_token) { - u32 err = 0; - struct krb5_ctx *kctx = ctx->internal_ctx_id; - - err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); - - dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); - - return err; -} - static struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, + .gss_wrap = gss_wrap_kerberos, + .gss_unwrap = gss_unwrap_kerberos, .gss_delete_sec_context = gss_delete_sec_context_kerberos, }; @@ -242,6 +210,11 @@ static struct pf_desc gss_kerberos_pfs[] = { .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", }, + [2] = { + .pseudoflavor = RPC_AUTH_GSS_KRB5P, + .service = RPC_GSS_SVC_PRIVACY, + .name = "krb5p", + }, }; static struct gss_api_mech gss_kerberos_mech = { diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index afeeb8715a7..13f8ae97945 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -70,22 +70,13 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static inline int -gss_krb5_padding(int blocksize, int length) { - /* Most of the code is block-size independent but in practice we - * use only 8: */ - BUG_ON(blocksize != 8); - return 8 - (length & 7); -} - u32 -krb5_make_token(struct krb5_ctx *ctx, int qop_req, - struct xdr_buf *text, struct xdr_netobj *token, - int toktype) +gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, + struct xdr_netobj *token) { + struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; s32 checksum_type; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; - int blocksize = 0, tmsglen; unsigned char *ptr, *krb5_hdr, *msg_start; s32 now; @@ -93,9 +84,6 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, now = get_seconds(); - if (qop_req != 0) - goto out_err; - switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: checksum_type = CKSUMTYPE_RSA_MD5; @@ -111,21 +99,13 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, goto out_err; } - if (toktype == KG_TOK_WRAP_MSG) { - blocksize = crypto_tfm_alg_blocksize(ctx->enc); - tmsglen = blocksize + text->len - + gss_krb5_padding(blocksize, blocksize + text->len); - } else { - tmsglen = 0; - } - - token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); + token->len = g_token_size(&ctx->mech_used, 22); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); + g_make_token_header(&ctx->mech_used, 22, &ptr); - *ptr++ = (unsigned char) ((toktype>>8)&0xff); - *ptr++ = (unsigned char) (toktype&0xff); + *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr - 2; @@ -133,17 +113,9 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - if (toktype == KG_TOK_WRAP_MSG) - *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX removing support for now */ - goto out_err; - } else { /* Sign only. */ - if (make_checksum(checksum_type, krb5_hdr, 8, text, - &md5cksum)) + if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) goto out_err; - } switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 8767fc53183..2030475d98e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -68,21 +68,14 @@ #endif -/* message_buffer is an input if toktype is MIC and an output if it is WRAP: - * If toktype is MIC: read_token is a mic token, and message_buffer is the - * data that the mic was supposedly taken over. - * If toktype is WRAP: read_token is a wrap token, and message_buffer is used - * to return the decrypted data. - */ +/* read_token is a mic token, and message_buffer is the data that the mic was + * supposedly taken over. */ -/* XXX will need to change prototype and/or just split into a separate function - * when we add privacy (because read_token will be in pages too). */ u32 -krb5_read_token(struct krb5_ctx *ctx, - struct xdr_netobj *read_token, - struct xdr_buf *message_buffer, - int *qop_state, int toktype) +gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, + struct xdr_buf *message_buffer, struct xdr_netobj *read_token) { + struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; int signalg; int sealalg; s32 checksum_type; @@ -100,16 +93,12 @@ krb5_read_token(struct krb5_ctx *ctx, read_token->len)) goto out; - if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) + if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || + (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) goto out; /* XXX sanity-check bodysize?? */ - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX gone */ - goto out; - } - /* get the sign and seal algorithms */ signalg = ptr[0] + (ptr[1] << 8); @@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx, if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) goto out; - if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || - ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) - goto out; - - /* in the current spec, there is only one valid seal algorithm per - key type, so a simple comparison is ok */ - - if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) + if (sealalg != 0xffff) goto out; /* there are several mappings of seal algorithms to sign algorithms, @@ -154,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx, switch (signalg) { case SGN_ALG_DES_MAC_MD5: ret = make_checksum(checksum_type, ptr - 2, 8, - message_buffer, &md5cksum); + message_buffer, 0, &md5cksum); if (ret) goto out; @@ -175,9 +157,6 @@ krb5_read_token(struct krb5_ctx *ctx, /* it got through unscathed. Make sure the context is unexpired */ - if (qop_state) - *qop_state = GSS_C_QOP_DEFAULT; - now = get_seconds(); ret = GSS_S_CONTEXT_EXPIRED; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c new file mode 100644 index 00000000000..af777cf9f25 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -0,0 +1,363 @@ +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/sunrpc/gss_krb5.h> +#include <linux/random.h> +#include <linux/pagemap.h> +#include <asm/scatterlist.h> +#include <linux/crypto.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +static inline int +gss_krb5_padding(int blocksize, int length) +{ + /* Most of the code is block-size independent but currently we + * use only 8: */ + BUG_ON(blocksize != 8); + return 8 - (length & 7); +} + +static inline void +gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) +{ + int padding = gss_krb5_padding(blocksize, buf->len - offset); + char *p; + struct kvec *iov; + + if (buf->page_len || buf->tail[0].iov_len) + iov = &buf->tail[0]; + else + iov = &buf->head[0]; + p = iov->iov_base + iov->iov_len; + iov->iov_len += padding; + buf->len += padding; + memset(p, padding, padding); +} + +static inline int +gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) +{ + u8 *ptr; + u8 pad; + int len = buf->len; + + if (len <= buf->head[0].iov_len) { + pad = *(u8 *)(buf->head[0].iov_base + len - 1); + if (pad > buf->head[0].iov_len) + return -EINVAL; + buf->head[0].iov_len -= pad; + goto out; + } else + len -= buf->head[0].iov_len; + if (len <= buf->page_len) { + int last = (buf->page_base + len - 1) + >>PAGE_CACHE_SHIFT; + int offset = (buf->page_base + len - 1) + & (PAGE_CACHE_SIZE - 1); + ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); + pad = *(ptr + offset); + kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); + goto out; + } else + len -= buf->page_len; + BUG_ON(len > buf->tail[0].iov_len); + pad = *(u8 *)(buf->tail[0].iov_base + len - 1); +out: + /* XXX: NOTE: we do not adjust the page lengths--they represent + * a range of data in the real filesystem page cache, and we need + * to know that range so the xdr code can properly place read data. + * However adjusting the head length, as we do above, is harmless. + * In the case of a request that fits into a single page, the server + * also uses length and head length together to determine the original + * start of the request to copy the request for deferal; so it's + * easier on the server if we adjust head and tail length in tandem. + * It's not really a problem that we don't fool with the page and + * tail lengths, though--at worst badly formed xdr might lead the + * server to attempt to parse the padding. + * XXX: Document all these weird requirements for gss mechanism + * wrap/unwrap functions. */ + if (pad > blocksize) + return -EINVAL; + if (buf->len > pad) + buf->len -= pad; + else + return -EINVAL; + return 0; +} + +static inline void +make_confounder(char *p, int blocksize) +{ + static u64 i = 0; + u64 *q = (u64 *)p; + + /* rfc1964 claims this should be "random". But all that's really + * necessary is that it be unique. And not even that is necessary in + * our case since our "gssapi" implementation exists only to support + * rpcsec_gss, so we know that the only buffers we will ever encrypt + * already begin with a unique sequence number. Just to hedge my bets + * I'll make a half-hearted attempt at something unique, but ensuring + * uniqueness would mean worrying about atomicity and rollover, and I + * don't care enough. */ + + BUG_ON(blocksize != 8); + *q = i++; +} + +/* Assumptions: the head and tail of inbuf are ours to play with. + * The pages, however, may be real pages in the page cache and we replace + * them with scratch pages from **pages before writing to them. */ +/* XXX: obviously the above should be documentation of wrap interface, + * and shouldn't be in this kerberos-specific file. */ + +/* XXX factor out common code with seal/unseal. */ + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx, int offset, + struct xdr_buf *buf, struct page **pages) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + int blocksize = 0, plainlen; + unsigned char *ptr, *krb5_hdr, *msg_start; + s32 now; + int headlen; + struct page **tmp_pages; + + dprintk("RPC: gss_wrap_kerberos\n"); + + now = get_seconds(); + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" + " supported\n", kctx->signalg); + goto out_err; + } + if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { + dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", + kctx->sealalg); + goto out_err; + } + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + gss_krb5_add_padding(buf, offset, blocksize); + BUG_ON((buf->len - offset) % blocksize); + plainlen = blocksize + buf->len - offset; + + headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + (buf->len - offset); + + ptr = buf->head[0].iov_base + offset; + /* shift data to make room for header. */ + /* XXX Would be cleverer to encrypt while copying. */ + /* XXX bounds checking, slack, etc. */ + memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); + buf->head[0].iov_len += headlen; + buf->len += headlen; + BUG_ON((buf->len - offset - headlen) % blocksize); + + g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + + + *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); + + /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ + krb5_hdr = ptr - 2; + msg_start = krb5_hdr + 24; + /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); + + *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + memset(krb5_hdr + 4, 0xff, 4); + *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + + make_confounder(msg_start, blocksize); + + /* XXXJBF: UGH!: */ + tmp_pages = buf->pages; + buf->pages = pages; + if (make_checksum(checksum_type, krb5_hdr, 8, buf, + offset + headlen - blocksize, &md5cksum)) + goto out_err; + buf->pages = tmp_pages; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + goto out_err; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + + dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: + BUG(); + } + + kfree(md5cksum.data); + + /* XXX would probably be more efficient to compute checksum + * and encrypt at the same time: */ + if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, + kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) + goto out_err; + + if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, + pages)) + goto out_err; + + kctx->seq_send++; + + return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); +out_err: + if (md5cksum.data) kfree(md5cksum.data); + return GSS_S_FAILURE; +} + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + int signalg; + int sealalg; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + s32 now; + int direction; + s32 seqnum; + unsigned char *ptr; + int bodysize; + u32 ret = GSS_S_DEFECTIVE_TOKEN; + void *data_start, *orig_start; + int data_len; + int blocksize; + + dprintk("RPC: gss_unwrap_kerberos\n"); + + ptr = (u8 *)buf->head[0].iov_base + offset; + if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, + buf->len - offset)) + goto out; + + if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || + (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) + goto out; + + /* XXX sanity-check bodysize?? */ + + /* get the sign and seal algorithms */ + + signalg = ptr[0] + (ptr[1] << 8); + sealalg = ptr[2] + (ptr[3] << 8); + + /* Sanity checks */ + + if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + goto out; + + if (sealalg == 0xffff) + goto out; + + /* in the current spec, there is only one valid seal algorithm per + key type, so a simple comparison is ok */ + + if (sealalg != kctx->sealalg) + goto out; + + /* there are several mappings of seal algorithms to sign algorithms, + but few enough that we can try them all. */ + + if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || + (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || + (kctx->sealalg == SEAL_ALG_DES3KD && + signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) + goto out; + + if (gss_decrypt_xdr_buf(kctx->enc, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base)) + goto out; + + /* compute the checksum of the message */ + + /* initialize the the cksum */ + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + ret = make_checksum(checksum_type, ptr - 2, 8, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); + if (ret) + goto out; + + ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len); + if (ret) + goto out; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; + goto out; + } + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + /* it got through unscathed. Make sure the context is unexpired */ + + now = get_seconds(); + + ret = GSS_S_CONTEXT_EXPIRED; + if (now > kctx->endtime) + goto out; + + /* do sequencing checks */ + + ret = GSS_S_BAD_SIG; + if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) + goto out; + + if ((kctx->initiate && direction != 0xff) || + (!kctx->initiate && direction != 0)) + goto out; + + /* Copy the data back to the right position. XXX: Would probably be + * better to copy and encrypt at the same time. */ + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + data_start = ptr + 22 + blocksize; + orig_start = buf->head[0].iov_base + offset; + data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; + memmove(orig_start, data_start, data_len); + buf->head[0].iov_len -= (data_start - orig_start); + buf->len -= (data_start - orig_start); + + ret = GSS_S_DEFECTIVE_TOKEN; + if (gss_krb5_remove_padding(buf, blocksize)) + goto out; + + ret = GSS_S_COMPLETE; +out: + if (md5cksum.data) kfree(md5cksum.data); + return ret; +} diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 9dfb68377d6..b048bf672da 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -35,7 +35,6 @@ #include <linux/types.h> #include <linux/slab.h> -#include <linux/socket.h> #include <linux/module.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/gss_asn1.h> @@ -251,13 +250,11 @@ gss_import_sec_context(const void *input_token, size_t bufsize, u32 gss_get_mic(struct gss_ctx *context_handle, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token) { return context_handle->mech_type->gm_ops ->gss_get_mic(context_handle, - qop, message, mic_token); } @@ -267,16 +264,34 @@ gss_get_mic(struct gss_ctx *context_handle, u32 gss_verify_mic(struct gss_ctx *context_handle, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate) + struct xdr_netobj *mic_token) { return context_handle->mech_type->gm_ops ->gss_verify_mic(context_handle, message, - mic_token, - qstate); + mic_token); } +u32 +gss_wrap(struct gss_ctx *ctx_id, + int offset, + struct xdr_buf *buf, + struct page **inpages) +{ + return ctx_id->mech_type->gm_ops + ->gss_wrap(ctx_id, offset, buf, inpages); +} + +u32 +gss_unwrap(struct gss_ctx *ctx_id, + int offset, + struct xdr_buf *buf) +{ + return ctx_id->mech_type->gm_ops + ->gss_unwrap(ctx_id, offset, buf); +} + + /* gss_delete_sec_context: free all resources associated with context_handle. * Note this differs from the RFC 2744-specified prototype in that we don't * bother returning an output token, since it would never be used anyway. */ diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 6c97d61baa9..39b3edc1469 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -224,18 +224,13 @@ gss_delete_sec_context_spkm3(void *internal_ctx) { static u32 gss_verify_mic_spkm3(struct gss_ctx *ctx, struct xdr_buf *signbuf, - struct xdr_netobj *checksum, - u32 *qstate) { + struct xdr_netobj *checksum) +{ u32 maj_stat = 0; - int qop_state = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); - maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, - SPKM_MIC_TOK); - - if (!maj_stat && qop_state) - *qstate = qop_state; + maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); return maj_stat; @@ -243,15 +238,15 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, static u32 gss_get_mic_spkm3(struct gss_ctx *ctx, - u32 qop, struct xdr_buf *message_buffer, - struct xdr_netobj *message_token) { + struct xdr_netobj *message_token) +{ u32 err = 0; struct spkm3_ctx *sctx = ctx->internal_ctx_id; dprintk("RPC: gss_get_mic_spkm3\n"); - err = spkm3_make_token(sctx, qop, message_buffer, + err = spkm3_make_token(sctx, message_buffer, message_token, SPKM_MIC_TOK); return err; } @@ -264,8 +259,8 @@ static struct gss_api_ops gss_spkm3_ops = { }; static struct pf_desc gss_spkm3_pfs[] = { - {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, - {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, + {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, + {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, }; static struct gss_api_mech gss_spkm3_mech = { diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 25339868d46..148201e929d 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -51,7 +51,7 @@ */ u32 -spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, +spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype) { @@ -68,8 +68,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, dprintk("RPC: spkm3_make_token\n"); now = jiffies; - if (qop_req != 0) - goto out_err; if (ctx->ctx_id.len != 16) { dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 65ce81bf0bc..c3c0d958610 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c @@ -52,7 +52,7 @@ u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, /* checksum */ struct xdr_buf *message_buffer, /* signbuf */ - int *qop_state, int toktype) + int toktype) { s32 code; struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e3308195374..e4ada15ed85 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -566,8 +566,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, if (rqstp->rq_deferred) /* skip verification of revisited request */ return SVC_OK; - if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) - != GSS_S_COMPLETE) { + if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { *authp = rpcsec_gsserr_credproblem; return SVC_DENIED; } @@ -604,7 +603,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) xdr_buf_from_iov(&iov, &verf_data); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; mic.data = (u8 *)(p + 1); - maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); + maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); if (maj_stat != GSS_S_COMPLETE) return -1; *p++ = htonl(mic.len); @@ -710,7 +709,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) goto out; - maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); + maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); if (maj_stat != GSS_S_COMPLETE) goto out; if (ntohl(svc_getu32(&buf->head[0])) != seq) @@ -1012,7 +1011,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) resv = &resbuf->tail[0]; } mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; - if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) + if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) goto out_err; svc_putu32(resv, htonl(mic.len)); memset(mic.data + mic.len, 0, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 9b72d3abf82..f56767aaa92 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -7,9 +7,7 @@ */ #include <linux/types.h> -#include <linux/socket.h> #include <linux/module.h> -#include <linux/in.h> #include <linux/utsname.h> #include <linux/sunrpc/clnt.h> #include <linux/sched.h> diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4ff297a9b15..890fb5ea0dc 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -9,8 +9,6 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/module.h> -#include <linux/socket.h> -#include <linux/in.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/auth.h> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f17e6153b68..702ede309b0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1,5 +1,5 @@ /* - * linux/net/sunrpc/rpcclnt.c + * linux/net/sunrpc/clnt.c * * This file contains the high-level RPC interface. * It is modeled as a finite state machine to support both synchronous @@ -27,7 +27,6 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/in.h> #include <linux/utsname.h> #include <linux/sunrpc/clnt.h> @@ -53,6 +52,7 @@ static void call_allocate(struct rpc_task *task); static void call_encode(struct rpc_task *task); static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); +static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); static void call_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -517,15 +517,8 @@ void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { struct rpc_xprt *xprt = clnt->cl_xprt; - - xprt->sndsize = 0; - if (sndsize) - xprt->sndsize = sndsize + RPC_SLACK_SPACE; - xprt->rcvsize = 0; - if (rcvsize) - xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; - if (xprt_connected(xprt)) - xprt_sock_setbufsize(xprt); + if (xprt->ops->set_buffer_size) + xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); } /* @@ -685,13 +678,11 @@ call_allocate(struct rpc_task *task) static void call_encode(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct xdr_buf *sndbuf = &req->rq_snd_buf; struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; - int status; u32 *p; dprintk("RPC: %4d call_encode (status %d)\n", @@ -719,11 +710,15 @@ call_encode(struct rpc_task *task) rpc_exit(task, -EIO); return; } - if (encode && (status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp)) < 0) { - printk(KERN_WARNING "%s: can't encode arguments: %d\n", - clnt->cl_protname, -status); - rpc_exit(task, status); + if (encode == NULL) + return; + + task->tk_status = rpcauth_wrap_req(task, encode, req, p, + task->tk_msg.rpc_argp); + if (task->tk_status == -ENOMEM) { + /* XXX: Is this sane? */ + rpc_delay(task, 3*HZ); + task->tk_status = -EAGAIN; } } @@ -734,43 +729,95 @@ static void call_bind(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; - - dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, - xprt, (xprt_connected(xprt) ? "is" : "is not")); - task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; + dprintk("RPC: %4d call_bind (status %d)\n", + task->tk_pid, task->tk_status); + task->tk_action = call_connect; if (!clnt->cl_port) { - task->tk_action = call_connect; - task->tk_timeout = RPC_CONNECT_TIMEOUT; + task->tk_action = call_bind_status; + task->tk_timeout = task->tk_xprt->bind_timeout; rpc_getport(task, clnt); } } /* - * 4a. Connect to the RPC server (TCP case) + * 4a. Sort out bind result + */ +static void +call_bind_status(struct rpc_task *task) +{ + int status = -EACCES; + + if (task->tk_status >= 0) { + dprintk("RPC: %4d call_bind_status (status %d)\n", + task->tk_pid, task->tk_status); + task->tk_status = 0; + task->tk_action = call_connect; + return; + } + + switch (task->tk_status) { + case -EACCES: + dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", + task->tk_pid); + rpc_delay(task, 3*HZ); + goto retry_bind; + case -ETIMEDOUT: + dprintk("RPC: %4d rpcbind request timed out\n", + task->tk_pid); + if (RPC_IS_SOFT(task)) { + status = -EIO; + break; + } + goto retry_bind; + case -EPFNOSUPPORT: + dprintk("RPC: %4d remote rpcbind service unavailable\n", + task->tk_pid); + break; + case -EPROTONOSUPPORT: + dprintk("RPC: %4d remote rpcbind version 2 unavailable\n", + task->tk_pid); + break; + default: + dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", + task->tk_pid, -task->tk_status); + status = -EIO; + break; + } + + rpc_exit(task, status); + return; + +retry_bind: + task->tk_status = 0; + task->tk_action = call_bind; + return; +} + +/* + * 4b. Connect to the RPC server */ static void call_connect(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; - dprintk("RPC: %4d call_connect status %d\n", - task->tk_pid, task->tk_status); + dprintk("RPC: %4d call_connect xprt %p %s connected\n", + task->tk_pid, xprt, + (xprt_connected(xprt) ? "is" : "is not")); - if (xprt_connected(clnt->cl_xprt)) { - task->tk_action = call_transmit; - return; + task->tk_action = call_transmit; + if (!xprt_connected(xprt)) { + task->tk_action = call_connect_status; + if (task->tk_status < 0) + return; + xprt_connect(task); } - task->tk_action = call_connect_status; - if (task->tk_status < 0) - return; - xprt_connect(task); } /* - * 4b. Sort out connect result + * 4c. Sort out connect result */ static void call_connect_status(struct rpc_task *task) @@ -778,6 +825,9 @@ call_connect_status(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; int status = task->tk_status; + dprintk("RPC: %5u call_connect_status (status %d)\n", + task->tk_pid, task->tk_status); + task->tk_status = 0; if (status >= 0) { clnt->cl_stats->netreconn++; @@ -785,17 +835,19 @@ call_connect_status(struct rpc_task *task) return; } - /* Something failed: we may have to rebind */ + /* Something failed: remote service port may have changed */ if (clnt->cl_autobind) clnt->cl_port = 0; + switch (status) { case -ENOTCONN: case -ETIMEDOUT: case -EAGAIN: - task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; + task->tk_action = call_bind; break; default: rpc_exit(task, -EIO); + break; } } @@ -815,10 +867,12 @@ call_transmit(struct rpc_task *task) if (task->tk_status != 0) return; /* Encode here so that rpcsec_gss can use correct sequence number. */ - if (!task->tk_rqstp->rq_bytes_sent) + if (task->tk_rqstp->rq_bytes_sent == 0) { call_encode(task); - if (task->tk_status < 0) - return; + /* Did the encode result in an error condition? */ + if (task->tk_status != 0) + goto out_nosend; + } xprt_transmit(task); if (task->tk_status < 0) return; @@ -826,6 +880,10 @@ call_transmit(struct rpc_task *task) task->tk_action = NULL; rpc_wake_up_task(task); } + return; +out_nosend: + /* release socket write lock before attempting to handle error */ + xprt_abort_transmit(task); } /* @@ -1020,13 +1078,12 @@ static u32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; struct rpc_rqst *req = task->tk_rqstp; u32 *p = req->rq_svec[0].iov_base; /* FIXME: check buffer size? */ - if (xprt->stream) - *p++ = 0; /* fill in later */ + + p = xprt_skip_transport_header(task->tk_xprt, p); *p++ = req->rq_xid; /* XID */ *p++ = htonl(RPC_CALL); /* CALL */ *p++ = htonl(RPC_VERSION); /* RPC version */ diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 4e81f276692..a398575f94b 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -26,7 +26,7 @@ #define PMAP_GETPORT 3 static struct rpc_procinfo pmap_procedures[]; -static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); +static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); static void pmap_getport_done(struct rpc_task *); static struct rpc_program pmap_program; static DEFINE_SPINLOCK(pmap_lock); @@ -65,7 +65,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) map->pm_binding = 1; spin_unlock(&pmap_lock); - pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot); + pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); if (IS_ERR(pmap_clnt)) { task->tk_status = PTR_ERR(pmap_clnt); goto bailout; @@ -112,7 +112,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); - pmap_clnt = pmap_create(hostname, sin, prot); + pmap_clnt = pmap_create(hostname, sin, prot, 0); if (IS_ERR(pmap_clnt)) return PTR_ERR(pmap_clnt); @@ -171,7 +171,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP); + pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); if (IS_ERR(pmap_clnt)) { error = PTR_ERR(pmap_clnt); dprintk("RPC: couldn't create pmap client. Error = %d\n", error); @@ -198,7 +198,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) } static struct rpc_clnt * -pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) +pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; @@ -208,6 +208,8 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; xprt->addr.sin_port = htons(RPC_PMAP_PORT); + if (!privileged) + xprt->resvport = 0; /* printk("pmap: create clnt\n"); */ clnt = rpc_new_client(xprt, hostname, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ded6c63f11e..4f188d0a5d1 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -76,25 +76,35 @@ int rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) { struct rpc_inode *rpci = RPC_I(inode); - int res = 0; + int res = -EPIPE; down(&inode->i_sem); + if (rpci->ops == NULL) + goto out; if (rpci->nreaders) { list_add_tail(&msg->list, &rpci->pipe); rpci->pipelen += msg->len; + res = 0; } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { if (list_empty(&rpci->pipe)) schedule_delayed_work(&rpci->queue_timeout, RPC_UPCALL_TIMEOUT); list_add_tail(&msg->list, &rpci->pipe); rpci->pipelen += msg->len; - } else - res = -EPIPE; + res = 0; + } +out: up(&inode->i_sem); wake_up(&rpci->waitq); return res; } +static inline void +rpc_inode_setowner(struct inode *inode, void *private) +{ + RPC_I(inode)->private = private; +} + static void rpc_close_pipes(struct inode *inode) { @@ -111,15 +121,10 @@ rpc_close_pipes(struct inode *inode) rpci->ops->release_pipe(inode); rpci->ops = NULL; } + rpc_inode_setowner(inode, NULL); up(&inode->i_sem); } -static inline void -rpc_inode_setowner(struct inode *inode, void *private) -{ - RPC_I(inode)->private = private; -} - static struct inode * rpc_alloc_inode(struct super_block *sb) { @@ -501,7 +506,6 @@ repeat: dentry = dvec[--n]; if (dentry->d_inode) { rpc_close_pipes(dentry->d_inode); - rpc_inode_setowner(dentry->d_inode, NULL); simple_unlink(dir, dentry); } dput(dentry); @@ -576,10 +580,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) int error; shrink_dcache_parent(dentry); - if (dentry->d_inode) { + if (dentry->d_inode) rpc_close_pipes(dentry->d_inode); - rpc_inode_setowner(dentry->d_inode, NULL); - } if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -732,7 +734,6 @@ rpc_unlink(char *path) d_drop(dentry); if (dentry->d_inode) { rpc_close_pipes(dentry->d_inode); - rpc_inode_setowner(dentry->d_inode, NULL); error = simple_unlink(dir, dentry); } dput(dentry); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c new file mode 100644 index 00000000000..8f97e90f36c --- /dev/null +++ b/net/sunrpc/socklib.c @@ -0,0 +1,175 @@ +/* + * linux/net/sunrpc/socklib.c + * + * Common socket helper routines for RPC client and server + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/pagemap.h> +#include <linux/udp.h> +#include <linux/sunrpc/xdr.h> + + +/** + * skb_read_bits - copy some data bits from skb to internal buffer + * @desc: sk_buff copy helper + * @to: copy destination + * @len: number of bytes to copy + * + * Possibly called several times to iterate over an sk_buff and copy + * data out of it. + */ +static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) +{ + if (len > desc->count) + len = desc->count; + if (skb_copy_bits(desc->skb, desc->offset, to, len)) + return 0; + desc->count -= len; + desc->offset += len; + return len; +} + +/** + * skb_read_and_csum_bits - copy and checksum from skb to buffer + * @desc: sk_buff copy helper + * @to: copy destination + * @len: number of bytes to copy + * + * Same as skb_read_bits, but calculate a checksum at the same time. + */ +static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) +{ + unsigned int csum2, pos; + + if (len > desc->count) + len = desc->count; + pos = desc->offset; + csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); + desc->csum = csum_block_add(desc->csum, csum2, pos); + desc->count -= len; + desc->offset += len; + return len; +} + +/** + * xdr_partial_copy_from_skb - copy data out of an skb + * @xdr: target XDR buffer + * @base: starting offset + * @desc: sk_buff copy helper + * @copy_actor: virtual method for copying data + * + */ +ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) +{ + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + ssize_t copied = 0; + int ret; + + len = xdr->head[0].iov_len; + if (base < len) { + len -= base; + ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); + copied += ret; + if (ret != len || !desc->count) + goto out; + base = 0; + } else + base -= len; + + if (unlikely(pglen == 0)) + goto copy_tail; + if (unlikely(base >= pglen)) { + base -= pglen; + goto copy_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + do { + char *kaddr; + + /* ACL likes to be lazy in allocating pages - ACLs + * are small by default but can get huge. */ + if (unlikely(*ppage == NULL)) { + *ppage = alloc_page(GFP_ATOMIC); + if (unlikely(*ppage == NULL)) { + if (copied == 0) + copied = -ENOMEM; + goto out; + } + } + + len = PAGE_CACHE_SIZE; + kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); + if (base) { + len -= base; + if (pglen < len) + len = pglen; + ret = copy_actor(desc, kaddr + base, len); + base = 0; + } else { + if (pglen < len) + len = pglen; + ret = copy_actor(desc, kaddr, len); + } + flush_dcache_page(*ppage); + kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); + copied += ret; + if (ret != len || !desc->count) + goto out; + ppage++; + } while ((pglen -= len) != 0); +copy_tail: + len = xdr->tail[0].iov_len; + if (base < len) + copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); +out: + return copied; +} + +/** + * csum_partial_copy_to_xdr - checksum and copy data + * @xdr: target XDR buffer + * @skb: source skb + * + * We have set things up such that we perform the checksum of the UDP + * packet in parallel with the copies into the RPC client iovec. -DaveM + */ +int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + skb_reader_t desc; + + desc.skb = skb; + desc.offset = sizeof(struct udphdr); + desc.count = skb->len - desc.offset; + + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + goto no_checksum; + + desc.csum = csum_partial(skb->data, desc.offset, skb->csum); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) + return -1; + if (desc.offset != skb->len) { + unsigned int csum2; + csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); + desc.csum = csum_block_add(desc.csum, csum2, desc.offset); + } + if (desc.count) + return -1; + if ((unsigned short)csum_fold(desc.csum)) + return -1; + return 0; +no_checksum: + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ed48ff022d3..2387e7b823f 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -10,7 +10,6 @@ #include <linux/module.h> #include <linux/types.h> -#include <linux/socket.h> #include <linux/sched.h> #include <linux/uio.h> #include <linux/unistd.h> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 691dea4a58e..f16e7cdd615 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -548,9 +548,6 @@ svc_write_space(struct sock *sk) /* * Receive a datagram from a UDP socket. */ -extern int -csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); - static int svc_udp_recvfrom(struct svc_rqst *rqstp) { diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1b9616a12e2..d0c9f460e41 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -119,8 +119,18 @@ done: return 0; } +unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; +EXPORT_SYMBOL(xprt_min_resvport); +unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +EXPORT_SYMBOL(xprt_max_resvport); + + static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; +static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; static ctl_table debug_table[] = { { @@ -177,6 +187,28 @@ static ctl_table debug_table[] = { .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, + { + .ctl_name = CTL_MIN_RESVPORT, + .procname = "min_resvport", + .data = &xprt_min_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, + { + .ctl_name = CTL_MAX_RESVPORT, + .procname = "max_resvport", + .data = &xprt_max_resvport, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &xprt_min_resvport_limit, + .extra2 = &xprt_max_resvport_limit + }, { .ctl_name = 0 } }; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fde16f40a58..32df43372ee 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -6,15 +6,12 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/module.h> #include <linux/types.h> -#include <linux/socket.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/pagemap.h> #include <linux/errno.h> -#include <linux/in.h> -#include <linux/net.h> -#include <net/sock.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/msg_prot.h> @@ -176,178 +173,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, xdr->buflen += len; } -ssize_t -xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, - skb_reader_t *desc, - skb_read_actor_t copy_actor) -{ - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - ssize_t copied = 0; - int ret; - - len = xdr->head[0].iov_len; - if (base < len) { - len -= base; - ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); - copied += ret; - if (ret != len || !desc->count) - goto out; - base = 0; - } else - base -= len; - - if (pglen == 0) - goto copy_tail; - if (base >= pglen) { - base -= pglen; - goto copy_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; - } - do { - char *kaddr; - - /* ACL likes to be lazy in allocating pages - ACLs - * are small by default but can get huge. */ - if (unlikely(*ppage == NULL)) { - *ppage = alloc_page(GFP_ATOMIC); - if (unlikely(*ppage == NULL)) { - if (copied == 0) - copied = -ENOMEM; - goto out; - } - } - - len = PAGE_CACHE_SIZE; - kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); - if (base) { - len -= base; - if (pglen < len) - len = pglen; - ret = copy_actor(desc, kaddr + base, len); - base = 0; - } else { - if (pglen < len) - len = pglen; - ret = copy_actor(desc, kaddr, len); - } - flush_dcache_page(*ppage); - kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); - copied += ret; - if (ret != len || !desc->count) - goto out; - ppage++; - } while ((pglen -= len) != 0); -copy_tail: - len = xdr->tail[0].iov_len; - if (base < len) - copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); -out: - return copied; -} - - -int -xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, - struct xdr_buf *xdr, unsigned int base, int msgflags) -{ - struct page **ppage = xdr->pages; - unsigned int len, pglen = xdr->page_len; - int err, ret = 0; - ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); - - len = xdr->head[0].iov_len; - if (base < len || (addr != NULL && base == 0)) { - struct kvec iov = { - .iov_base = xdr->head[0].iov_base + base, - .iov_len = len - base, - }; - struct msghdr msg = { - .msg_name = addr, - .msg_namelen = addrlen, - .msg_flags = msgflags, - }; - if (xdr->len > len) - msg.msg_flags |= MSG_MORE; - - if (iov.iov_len != 0) - err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - else - err = kernel_sendmsg(sock, &msg, NULL, 0, 0); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - if (err != iov.iov_len) - goto out; - base = 0; - } else - base -= len; - - if (pglen == 0) - goto copy_tail; - if (base >= pglen) { - base -= pglen; - goto copy_tail; - } - if (base || xdr->page_base) { - pglen -= base; - base += xdr->page_base; - ppage += base >> PAGE_CACHE_SHIFT; - base &= ~PAGE_CACHE_MASK; - } - - sendpage = sock->ops->sendpage ? : sock_no_sendpage; - do { - int flags = msgflags; - - len = PAGE_CACHE_SIZE; - if (base) - len -= base; - if (pglen < len) - len = pglen; - - if (pglen != len || xdr->tail[0].iov_len != 0) - flags |= MSG_MORE; - - /* Hmm... We might be dealing with highmem pages */ - if (PageHighMem(*ppage)) - sendpage = sock_no_sendpage; - err = sendpage(sock, *ppage, base, len, flags); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - if (err != len) - goto out; - base = 0; - ppage++; - } while ((pglen -= len) != 0); -copy_tail: - len = xdr->tail[0].iov_len; - if (base < len) { - struct kvec iov = { - .iov_base = xdr->tail[0].iov_base + base, - .iov_len = len - base, - }; - struct msghdr msg = { - .msg_flags = msgflags, - }; - err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); - if (ret == 0) - ret = err; - else if (err > 0) - ret += err; - } -out: - return ret; -} - /* * Helper routines for doing 'memmove' like operations on a struct xdr_buf diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3c654e06b08..6dda3860351 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -10,12 +10,12 @@ * one is available. Otherwise, it sleeps on the backlog queue * (xprt_reserve). * - Next, the caller puts together the RPC message, stuffs it into - * the request struct, and calls xprt_call(). - * - xprt_call transmits the message and installs the caller on the - * socket's wait list. At the same time, it installs a timer that + * the request struct, and calls xprt_transmit(). + * - xprt_transmit sends the message and installs the caller on the + * transport's wait list. At the same time, it installs a timer that * is run after the packet's timeout has expired. * - When a packet arrives, the data_ready handler walks the list of - * pending requests for that socket. If a matching XID is found, the + * pending requests for that transport. If a matching XID is found, the * caller is woken up, and the timer removed. * - When no reply arrives within the timeout interval, the timer is * fired by the kernel and runs xprt_timer(). It either adjusts the @@ -33,36 +33,17 @@ * * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> * - * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> - * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> - * TCP NFS related read + write fixes - * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> - * - * Rewrite of larges part of the code in order to stabilize TCP stuff. - * Fix behaviour when socket buffer is full. - * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> + * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com> */ +#include <linux/module.h> + #include <linux/types.h> -#include <linux/slab.h> -#include <linux/capability.h> -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/net.h> -#include <linux/mm.h> -#include <linux/udp.h> -#include <linux/tcp.h> -#include <linux/sunrpc/clnt.h> -#include <linux/file.h> +#include <linux/interrupt.h> #include <linux/workqueue.h> #include <linux/random.h> -#include <net/sock.h> -#include <net/checksum.h> -#include <net/udp.h> -#include <net/tcp.h> +#include <linux/sunrpc/clnt.h> /* * Local variables @@ -73,81 +54,90 @@ # define RPCDBG_FACILITY RPCDBG_XPRT #endif -#define XPRT_MAX_BACKOFF (8) -#define XPRT_IDLE_TIMEOUT (5*60*HZ) -#define XPRT_MAX_RESVPORT (800) - /* * Local functions */ static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static inline void do_xprt_reserve(struct rpc_task *); -static void xprt_disconnect(struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); -static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, - struct rpc_timeout *to); -static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); -static void xprt_bind_socket(struct rpc_xprt *, struct socket *); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); -static int xprt_clear_backlog(struct rpc_xprt *xprt); - -#ifdef RPC_DEBUG_DATA /* - * Print the buffer contents (first 128 bytes only--just enough for - * diropres return). + * The transport code maintains an estimate on the maximum number of out- + * standing RPC requests, using a smoothed version of the congestion + * avoidance implemented in 44BSD. This is basically the Van Jacobson + * congestion algorithm: If a retransmit occurs, the congestion window is + * halved; otherwise, it is incremented by 1/cwnd when + * + * - a reply is received and + * - a full number of requests are outstanding and + * - the congestion window hasn't been updated recently. */ -static void -xprt_pktdump(char *msg, u32 *packet, unsigned int count) -{ - u8 *buf = (u8 *) packet; - int j; - - dprintk("RPC: %s\n", msg); - for (j = 0; j < count && j < 128; j += 4) { - if (!(j & 31)) { - if (j) - dprintk("\n"); - dprintk("0x%04x ", j); - } - dprintk("%02x%02x%02x%02x ", - buf[j], buf[j+1], buf[j+2], buf[j+3]); - } - dprintk("\n"); -} -#else -static inline void -xprt_pktdump(char *msg, u32 *packet, unsigned int count) -{ - /* NOP */ -} -#endif +#define RPC_CWNDSHIFT (8U) +#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) +#define RPC_INITCWND RPC_CWNDSCALE +#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) -/* - * Look up RPC transport given an INET socket +#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) + +/** + * xprt_reserve_xprt - serialize write access to transports + * @task: task that is requesting access to the transport + * + * This prevents mixing the payload of separate requests, and prevents + * transport connects from colliding with writes. No congestion control + * is provided. */ -static inline struct rpc_xprt * -xprt_from_sock(struct sock *sk) +int xprt_reserve_xprt(struct rpc_task *task) { - return (struct rpc_xprt *) sk->sk_user_data; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_rqst *req = task->tk_rqstp; + + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { + if (task == xprt->snd_task) + return 1; + if (task == NULL) + return 0; + goto out_sleep; + } + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return 1; + +out_sleep: + dprintk("RPC: %4d failed to lock transport %p\n", + task->tk_pid, xprt); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (req && req->rq_ntrans) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + return 0; } /* - * Serialize write access to sockets, in order to prevent different - * requests from interfering with each other. - * Also prevents TCP socket connects from colliding with writes. + * xprt_reserve_xprt_cong - serialize write access to transports + * @task: task that is requesting access to the transport + * + * Same as xprt_reserve_xprt, but Van Jacobson congestion control is + * integrated into the decision of whether a request is allowed to be + * woken up and given access to the transport. */ -static int -__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +int xprt_reserve_xprt_cong(struct rpc_task *task) { + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; - if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) return 1; goto out_sleep; } - if (xprt->nocong || __xprt_get_cong(xprt, task)) { + if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; if (req) { req->rq_bytes_sent = 0; @@ -156,10 +146,10 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) return 1; } smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->sockstate); + clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); out_sleep: - dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); + dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) @@ -169,26 +159,52 @@ out_sleep: return 0; } -static inline int -xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) +static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { int retval; - spin_lock_bh(&xprt->sock_lock); - retval = __xprt_lock_write(xprt, task); - spin_unlock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); + retval = xprt->ops->reserve_xprt(task); + spin_unlock_bh(&xprt->transport_lock); return retval; } +static void __xprt_lock_write_next(struct rpc_xprt *xprt) +{ + struct rpc_task *task; + struct rpc_rqst *req; -static void -__xprt_lock_write_next(struct rpc_xprt *xprt) + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) + return; + + task = rpc_wake_up_next(&xprt->resend); + if (!task) { + task = rpc_wake_up_next(&xprt->sending); + if (!task) + goto out_unlock; + } + + req = task->tk_rqstp; + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return; + +out_unlock: + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); +} + +static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { struct rpc_task *task; - if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + if (RPCXPRT_CONGESTED(xprt)) goto out_unlock; task = rpc_wake_up_next(&xprt->resend); if (!task) { @@ -196,7 +212,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) if (!task) goto out_unlock; } - if (xprt->nocong || __xprt_get_cong(xprt, task)) { + if (__xprt_get_cong(xprt, task)) { struct rpc_rqst *req = task->tk_rqstp; xprt->snd_task = task; if (req) { @@ -207,87 +223,52 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) } out_unlock: smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->sockstate); + clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); } -/* - * Releases the socket for use by other requests. +/** + * xprt_release_xprt - allow other requests to use a transport + * @xprt: transport with other tasks potentially waiting + * @task: task that is releasing access to the transport + * + * Note that "task" can be NULL. No congestion control is provided. */ -static void -__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { xprt->snd_task = NULL; smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->sockstate); + clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); __xprt_lock_write_next(xprt); } } -static inline void -xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) -{ - spin_lock_bh(&xprt->sock_lock); - __xprt_release_write(xprt, task); - spin_unlock_bh(&xprt->sock_lock); -} - -/* - * Write data to socket. +/** + * xprt_release_xprt_cong - allow other requests to use a transport + * @xprt: transport with other tasks potentially waiting + * @task: task that is releasing access to the transport + * + * Note that "task" can be NULL. Another task is awoken to use the + * transport if the transport's congestion window allows it. */ -static inline int -xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) +void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { - struct socket *sock = xprt->sock; - struct xdr_buf *xdr = &req->rq_snd_buf; - struct sockaddr *addr = NULL; - int addrlen = 0; - unsigned int skip; - int result; - - if (!sock) - return -ENOTCONN; - - xprt_pktdump("packet data:", - req->rq_svec->iov_base, - req->rq_svec->iov_len); - - /* For UDP, we need to provide an address */ - if (!xprt->stream) { - addr = (struct sockaddr *) &xprt->addr; - addrlen = sizeof(xprt->addr); + if (xprt->snd_task == task) { + xprt->snd_task = NULL; + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); + __xprt_lock_write_next_cong(xprt); } - /* Dont repeat bytes */ - skip = req->rq_bytes_sent; - - clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); - result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); - - dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); - - if (result >= 0) - return result; +} - switch (result) { - case -ECONNREFUSED: - /* When the server has died, an ICMP port unreachable message - * prompts ECONNREFUSED. - */ - case -EAGAIN: - break; - case -ECONNRESET: - case -ENOTCONN: - case -EPIPE: - /* connection broken */ - if (xprt->stream) - result = -ENOTCONN; - break; - default: - printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); - } - return result; +static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) +{ + spin_lock_bh(&xprt->transport_lock); + xprt->ops->release_xprt(xprt, task); + spin_unlock_bh(&xprt->transport_lock); } /* @@ -321,26 +302,40 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) return; req->rq_cong = 0; xprt->cong -= RPC_CWNDSCALE; - __xprt_lock_write_next(xprt); + __xprt_lock_write_next_cong(xprt); } -/* - * Adjust RPC congestion window +/** + * xprt_release_rqst_cong - housekeeping when request is complete + * @task: RPC request that recently completed + * + * Useful for transports that require congestion control. + */ +void xprt_release_rqst_cong(struct rpc_task *task) +{ + __xprt_put_cong(task->tk_xprt, task->tk_rqstp); +} + +/** + * xprt_adjust_cwnd - adjust transport congestion window + * @task: recently completed RPC request used to adjust window + * @result: result code of completed RPC request + * * We use a time-smoothed congestion estimator to avoid heavy oscillation. */ -static void -xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) +void xprt_adjust_cwnd(struct rpc_task *task, int result) { - unsigned long cwnd; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; + unsigned long cwnd = xprt->cwnd; - cwnd = xprt->cwnd; if (result >= 0 && cwnd <= xprt->cong) { /* The (cwnd >> 1) term makes sure * the result gets rounded properly. */ cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; if (cwnd > RPC_MAXCWND(xprt)) cwnd = RPC_MAXCWND(xprt); - __xprt_lock_write_next(xprt); + __xprt_lock_write_next_cong(xprt); } else if (result == -ETIMEDOUT) { cwnd >>= 1; if (cwnd < RPC_CWNDSCALE) @@ -349,11 +344,89 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", xprt->cong, xprt->cwnd, cwnd); xprt->cwnd = cwnd; + __xprt_put_cong(xprt, req); +} + +/** + * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue + * @xprt: transport with waiting tasks + * @status: result code to plant in each task before waking it + * + */ +void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) +{ + if (status < 0) + rpc_wake_up_status(&xprt->pending, status); + else + rpc_wake_up(&xprt->pending); +} + +/** + * xprt_wait_for_buffer_space - wait for transport output buffer to clear + * @task: task to be put to sleep + * + */ +void xprt_wait_for_buffer_space(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + task->tk_timeout = req->rq_timeout; + rpc_sleep_on(&xprt->pending, task, NULL, NULL); +} + +/** + * xprt_write_space - wake the task waiting for transport output buffer space + * @xprt: transport with waiting tasks + * + * Can be called in a soft IRQ context, so xprt_write_space never sleeps. + */ +void xprt_write_space(struct rpc_xprt *xprt) +{ + if (unlikely(xprt->shutdown)) + return; + + spin_lock_bh(&xprt->transport_lock); + if (xprt->snd_task) { + dprintk("RPC: write space: waking waiting task on xprt %p\n", + xprt); + rpc_wake_up_task(xprt->snd_task); + } + spin_unlock_bh(&xprt->transport_lock); +} + +/** + * xprt_set_retrans_timeout_def - set a request's retransmit timeout + * @task: task whose timeout is to be set + * + * Set a request's retransmit timeout based on the transport's + * default timeout parameters. Used by transports that don't adjust + * the retransmit timeout based on round-trip time estimation. + */ +void xprt_set_retrans_timeout_def(struct rpc_task *task) +{ + task->tk_timeout = task->tk_rqstp->rq_timeout; } /* - * Reset the major timeout value + * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout + * @task: task whose timeout is to be set + * + * Set a request's retransmit timeout using the RTT estimator. */ +void xprt_set_retrans_timeout_rtt(struct rpc_task *task) +{ + int timer = task->tk_msg.rpc_proc->p_timer; + struct rpc_rtt *rtt = task->tk_client->cl_rtt; + struct rpc_rqst *req = task->tk_rqstp; + unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; + + task->tk_timeout = rpc_calc_rto(rtt, timer); + task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; + if (task->tk_timeout > max_timeout || task->tk_timeout == 0) + task->tk_timeout = max_timeout; +} + static void xprt_reset_majortimeo(struct rpc_rqst *req) { struct rpc_timeout *to = &req->rq_xprt->timeout; @@ -368,8 +441,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) req->rq_majortimeo += jiffies; } -/* - * Adjust timeout values etc for next retransmit +/** + * xprt_adjust_timeout - adjust timeout values for next retransmit + * @req: RPC request containing parameters to use for the adjustment + * */ int xprt_adjust_timeout(struct rpc_rqst *req) { @@ -391,9 +466,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req) req->rq_retries = 0; xprt_reset_majortimeo(req); /* Reset the RTT counters == "slow start" */ - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); pprintk("RPC: %lu timeout\n", jiffies); status = -ETIMEDOUT; } @@ -405,133 +480,52 @@ int xprt_adjust_timeout(struct rpc_rqst *req) return status; } -/* - * Close down a transport socket - */ -static void -xprt_close(struct rpc_xprt *xprt) -{ - struct socket *sock = xprt->sock; - struct sock *sk = xprt->inet; - - if (!sk) - return; - - write_lock_bh(&sk->sk_callback_lock); - xprt->inet = NULL; - xprt->sock = NULL; - - sk->sk_user_data = NULL; - sk->sk_data_ready = xprt->old_data_ready; - sk->sk_state_change = xprt->old_state_change; - sk->sk_write_space = xprt->old_write_space; - write_unlock_bh(&sk->sk_callback_lock); - - sk->sk_no_check = 0; - - sock_release(sock); -} - -static void -xprt_socket_autoclose(void *args) +static void xprt_autoclose(void *args) { struct rpc_xprt *xprt = (struct rpc_xprt *)args; xprt_disconnect(xprt); - xprt_close(xprt); + xprt->ops->close(xprt); xprt_release_write(xprt, NULL); } -/* - * Mark a transport as disconnected +/** + * xprt_disconnect - mark a transport as disconnected + * @xprt: transport to flag for disconnect + * */ -static void -xprt_disconnect(struct rpc_xprt *xprt) +void xprt_disconnect(struct rpc_xprt *xprt) { dprintk("RPC: disconnected transport %p\n", xprt); - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); - rpc_wake_up_status(&xprt->pending, -ENOTCONN); - spin_unlock_bh(&xprt->sock_lock); + xprt_wake_pending_tasks(xprt, -ENOTCONN); + spin_unlock_bh(&xprt->transport_lock); } -/* - * Used to allow disconnection when we've been idle - */ static void xprt_init_autodisconnect(unsigned long data) { struct rpc_xprt *xprt = (struct rpc_xprt *)data; - spin_lock(&xprt->sock_lock); + spin_lock(&xprt->transport_lock); if (!list_empty(&xprt->recv) || xprt->shutdown) goto out_abort; - if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; - spin_unlock(&xprt->sock_lock); - /* Let keventd close the socket */ - if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) + spin_unlock(&xprt->transport_lock); + if (xprt_connecting(xprt)) xprt_release_write(xprt, NULL); else schedule_work(&xprt->task_cleanup); return; out_abort: - spin_unlock(&xprt->sock_lock); -} - -static void xprt_socket_connect(void *args) -{ - struct rpc_xprt *xprt = (struct rpc_xprt *)args; - struct socket *sock = xprt->sock; - int status = -EIO; - - if (xprt->shutdown || xprt->addr.sin_port == 0) - goto out; - - /* - * Start by resetting any existing state - */ - xprt_close(xprt); - sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); - if (sock == NULL) { - /* couldn't create socket or bind to reserved port; - * this is likely a permanent error, so cause an abort */ - goto out; - } - xprt_bind_socket(xprt, sock); - xprt_sock_setbufsize(xprt); - - status = 0; - if (!xprt->stream) - goto out; - - /* - * Tell the socket layer to start connecting... - */ - status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - } - } -out: - if (status < 0) - rpc_wake_up_status(&xprt->pending, status); - else - rpc_wake_up(&xprt->pending); -out_clear: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CONNECTING, &xprt->sockstate); - smp_mb__after_clear_bit(); + spin_unlock(&xprt->transport_lock); } -/* - * Attempt to connect a TCP socket. +/** + * xprt_connect - schedule a transport connect operation + * @task: RPC task that is requesting the connect * */ void xprt_connect(struct rpc_task *task) @@ -552,37 +546,19 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; if (xprt_connected(xprt)) - goto out_write; + xprt_release_write(xprt, task); + else { + if (task->tk_rqstp) + task->tk_rqstp->rq_bytes_sent = 0; - if (task->tk_rqstp) - task->tk_rqstp->rq_bytes_sent = 0; - - task->tk_timeout = RPC_CONNECT_TIMEOUT; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); - if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { - /* Note: if we are here due to a dropped connection - * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ - * seconds - */ - if (xprt->sock != NULL) - schedule_delayed_work(&xprt->sock_connect, - RPC_REESTABLISH_TIMEOUT); - else { - schedule_work(&xprt->sock_connect); - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); - } + task->tk_timeout = xprt->connect_timeout; + rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + xprt->ops->connect(task); } return; - out_write: - xprt_release_write(xprt, task); } -/* - * We arrive here when awoken from waiting on connection establishment. - */ -static void -xprt_connect_status(struct rpc_task *task) +static void xprt_connect_status(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; @@ -592,31 +568,42 @@ xprt_connect_status(struct rpc_task *task) return; } - /* if soft mounted, just cause this RPC to fail */ - if (RPC_IS_SOFT(task)) - task->tk_status = -EIO; - switch (task->tk_status) { case -ECONNREFUSED: case -ECONNRESET: + dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n", + task->tk_pid, task->tk_client->cl_server); + break; case -ENOTCONN: - return; + dprintk("RPC: %4d xprt_connect_status: connection broken\n", + task->tk_pid); + break; case -ETIMEDOUT: - dprintk("RPC: %4d xprt_connect_status: timed out\n", + dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n", task->tk_pid); break; default: - printk(KERN_ERR "RPC: error %d connecting to server %s\n", - -task->tk_status, task->tk_client->cl_server); + dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n", + task->tk_pid, -task->tk_status, task->tk_client->cl_server); + xprt_release_write(xprt, task); + task->tk_status = -EIO; + return; + } + + /* if soft mounted, just cause this RPC to fail */ + if (RPC_IS_SOFT(task)) { + xprt_release_write(xprt, task); + task->tk_status = -EIO; } - xprt_release_write(xprt, task); } -/* - * Look up the RPC request corresponding to a reply, and then lock it. +/** + * xprt_lookup_rqst - find an RPC request corresponding to an XID + * @xprt: transport on which the original request was transmitted + * @xid: RPC XID of incoming reply + * */ -static inline struct rpc_rqst * -xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) +struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) { struct list_head *pos; struct rpc_rqst *req = NULL; @@ -631,556 +618,68 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) return req; } -/* - * Complete reply received. - * The TCP code relies on us to remove the request from xprt->pending. - */ -static void -xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) -{ - struct rpc_task *task = req->rq_task; - struct rpc_clnt *clnt = task->tk_client; - - /* Adjust congestion window */ - if (!xprt->nocong) { - unsigned timer = task->tk_msg.rpc_proc->p_timer; - xprt_adjust_cwnd(xprt, copied); - __xprt_put_cong(xprt, req); - if (timer) { - if (req->rq_ntrans == 1) - rpc_update_rtt(clnt->cl_rtt, timer, - (long)jiffies - req->rq_xtime); - rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); - } - } - -#ifdef RPC_PROFILE - /* Profile only reads for now */ - if (copied > 1024) { - static unsigned long nextstat; - static unsigned long pkt_rtt, pkt_len, pkt_cnt; - - pkt_cnt++; - pkt_len += req->rq_slen + copied; - pkt_rtt += jiffies - req->rq_xtime; - if (time_before(nextstat, jiffies)) { - printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); - printk("RPC: %ld %ld %ld %ld stat\n", - jiffies, pkt_cnt, pkt_len, pkt_rtt); - pkt_rtt = pkt_len = pkt_cnt = 0; - nextstat = jiffies + 5 * HZ; - } - } -#endif - - dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); - list_del_init(&req->rq_list); - req->rq_received = req->rq_private_buf.len = copied; - - /* ... and wake up the process. */ - rpc_wake_up_task(task); - return; -} - -static size_t -skb_read_bits(skb_reader_t *desc, void *to, size_t len) -{ - if (len > desc->count) - len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, to, len)) - return 0; - desc->count -= len; - desc->offset += len; - return len; -} - -static size_t -skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) -{ - unsigned int csum2, pos; - - if (len > desc->count) - len = desc->count; - pos = desc->offset; - csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); - desc->csum = csum_block_add(desc->csum, csum2, pos); - desc->count -= len; - desc->offset += len; - return len; -} - -/* - * We have set things up such that we perform the checksum of the UDP - * packet in parallel with the copies into the RPC client iovec. -DaveM - */ -int -csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) -{ - skb_reader_t desc; - - desc.skb = skb; - desc.offset = sizeof(struct udphdr); - desc.count = skb->len - desc.offset; - - if (skb->ip_summed == CHECKSUM_UNNECESSARY) - goto no_checksum; - - desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) - return -1; - if (desc.offset != skb->len) { - unsigned int csum2; - csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); - desc.csum = csum_block_add(desc.csum, csum2, desc.offset); - } - if (desc.count) - return -1; - if ((unsigned short)csum_fold(desc.csum)) - return -1; - return 0; -no_checksum: - if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) - return -1; - if (desc.count) - return -1; - return 0; -} - -/* - * Input handler for RPC replies. Called from a bottom half and hence - * atomic. - */ -static void -udp_data_ready(struct sock *sk, int len) -{ - struct rpc_task *task; - struct rpc_xprt *xprt; - struct rpc_rqst *rovr; - struct sk_buff *skb; - int err, repsize, copied; - u32 _xid, *xp; - - read_lock(&sk->sk_callback_lock); - dprintk("RPC: udp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) { - printk("RPC: udp_data_ready request not found!\n"); - goto out; - } - - dprintk("RPC: udp_data_ready client %p\n", xprt); - - if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) - goto out; - - if (xprt->shutdown) - goto dropit; - - repsize = skb->len - sizeof(struct udphdr); - if (repsize < 4) { - printk("RPC: impossible RPC reply size %d!\n", repsize); - goto dropit; - } - - /* Copy the XID from the skb... */ - xp = skb_header_pointer(skb, sizeof(struct udphdr), - sizeof(_xid), &_xid); - if (xp == NULL) - goto dropit; - - /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->sock_lock); - rovr = xprt_lookup_rqst(xprt, *xp); - if (!rovr) - goto out_unlock; - task = rovr->rq_task; - - dprintk("RPC: %4d received reply\n", task->tk_pid); - - if ((copied = rovr->rq_private_buf.buflen) > repsize) - copied = repsize; - - /* Suck it into the iovec, verify checksum if not done by hw. */ - if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) - goto out_unlock; - - /* Something worked... */ - dst_confirm(skb->dst); - - xprt_complete_rqst(xprt, rovr, copied); - - out_unlock: - spin_unlock(&xprt->sock_lock); - dropit: - skb_free_datagram(sk, skb); - out: - read_unlock(&sk->sk_callback_lock); -} - -/* - * Copy from an skb into memory and shrink the skb. - */ -static inline size_t -tcp_copy_data(skb_reader_t *desc, void *p, size_t len) -{ - if (len > desc->count) - len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, p, len)) { - dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", - len, desc->count); - return 0; - } - desc->offset += len; - desc->count -= len; - dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", - len, desc->count); - return len; -} - -/* - * TCP read fragment marker - */ -static inline void -tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - size_t len, used; - char *p; - - p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; - len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); - xprt->tcp_offset += used; - if (used != len) - return; - xprt->tcp_reclen = ntohl(xprt->tcp_recm); - if (xprt->tcp_reclen & 0x80000000) - xprt->tcp_flags |= XPRT_LAST_FRAG; - else - xprt->tcp_flags &= ~XPRT_LAST_FRAG; - xprt->tcp_reclen &= 0x7fffffff; - xprt->tcp_flags &= ~XPRT_COPY_RECM; - xprt->tcp_offset = 0; - /* Sanity check of the record length */ - if (xprt->tcp_reclen < 4) { - printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); - xprt_disconnect(xprt); - } - dprintk("RPC: reading TCP record fragment of length %d\n", - xprt->tcp_reclen); -} - -static void -tcp_check_recm(struct rpc_xprt *xprt) -{ - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); - if (xprt->tcp_offset == xprt->tcp_reclen) { - xprt->tcp_flags |= XPRT_COPY_RECM; - xprt->tcp_offset = 0; - if (xprt->tcp_flags & XPRT_LAST_FRAG) { - xprt->tcp_flags &= ~XPRT_COPY_DATA; - xprt->tcp_flags |= XPRT_COPY_XID; - xprt->tcp_copied = 0; - } - } -} - -/* - * TCP read xid - */ -static inline void -tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - size_t len, used; - char *p; - - len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; - dprintk("RPC: reading XID (%Zu bytes)\n", len); - p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; - used = tcp_copy_data(desc, p, len); - xprt->tcp_offset += used; - if (used != len) - return; - xprt->tcp_flags &= ~XPRT_COPY_XID; - xprt->tcp_flags |= XPRT_COPY_DATA; - xprt->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", - ntohl(xprt->tcp_xid)); - tcp_check_recm(xprt); -} - -/* - * TCP read and complete request - */ -static inline void -tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - struct rpc_rqst *req; - struct xdr_buf *rcvbuf; - size_t len; - ssize_t r; - - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->sock_lock); - req = xprt_lookup_rqst(xprt, xprt->tcp_xid); - if (!req) { - xprt->tcp_flags &= ~XPRT_COPY_DATA; - dprintk("RPC: XID %08x request not found!\n", - ntohl(xprt->tcp_xid)); - spin_unlock(&xprt->sock_lock); - return; - } - - rcvbuf = &req->rq_private_buf; - len = desc->count; - if (len > xprt->tcp_reclen - xprt->tcp_offset) { - skb_reader_t my_desc; - - len = xprt->tcp_reclen - xprt->tcp_offset; - memcpy(&my_desc, desc, sizeof(my_desc)); - my_desc.count = len; - r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - &my_desc, tcp_copy_data); - desc->count -= r; - desc->offset += r; - } else - r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, - desc, tcp_copy_data); - - if (r > 0) { - xprt->tcp_copied += r; - xprt->tcp_offset += r; - } - if (r != len) { - /* Error when copying to the receive buffer, - * usually because we weren't able to allocate - * additional buffer pages. All we can do now - * is turn off XPRT_COPY_DATA, so the request - * will not receive any additional updates, - * and time out. - * Any remaining data from this record will - * be discarded. - */ - xprt->tcp_flags &= ~XPRT_COPY_DATA; - dprintk("RPC: XID %08x truncated request\n", - ntohl(xprt->tcp_xid)); - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); - goto out; - } - - dprintk("RPC: XID %08x read %Zd bytes\n", - ntohl(xprt->tcp_xid), r); - dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", - xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); - - if (xprt->tcp_copied == req->rq_private_buf.buflen) - xprt->tcp_flags &= ~XPRT_COPY_DATA; - else if (xprt->tcp_offset == xprt->tcp_reclen) { - if (xprt->tcp_flags & XPRT_LAST_FRAG) - xprt->tcp_flags &= ~XPRT_COPY_DATA; - } - -out: - if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { - dprintk("RPC: %4d received reply complete\n", - req->rq_task->tk_pid); - xprt_complete_rqst(xprt, req, xprt->tcp_copied); - } - spin_unlock(&xprt->sock_lock); - tcp_check_recm(xprt); -} - -/* - * TCP discard extra bytes from a short read - */ -static inline void -tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) -{ - size_t len; - - len = xprt->tcp_reclen - xprt->tcp_offset; - if (len > desc->count) - len = desc->count; - desc->count -= len; - desc->offset += len; - xprt->tcp_offset += len; - dprintk("RPC: discarded %Zu bytes\n", len); - tcp_check_recm(xprt); -} - -/* - * TCP record receive routine - * We first have to grab the record marker, then the XID, then the data. +/** + * xprt_update_rtt - update an RPC client's RTT state after receiving a reply + * @task: RPC request that recently completed + * */ -static int -tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, - unsigned int offset, size_t len) -{ - struct rpc_xprt *xprt = rd_desc->arg.data; - skb_reader_t desc = { - .skb = skb, - .offset = offset, - .count = len, - .csum = 0 - }; - - dprintk("RPC: tcp_data_recv\n"); - do { - /* Read in a new fragment marker if necessary */ - /* Can we ever really expect to get completely empty fragments? */ - if (xprt->tcp_flags & XPRT_COPY_RECM) { - tcp_read_fraghdr(xprt, &desc); - continue; - } - /* Read in the xid if necessary */ - if (xprt->tcp_flags & XPRT_COPY_XID) { - tcp_read_xid(xprt, &desc); - continue; - } - /* Read in the request data */ - if (xprt->tcp_flags & XPRT_COPY_DATA) { - tcp_read_request(xprt, &desc); - continue; - } - /* Skip over any trailing bytes on short reads */ - tcp_read_discard(xprt, &desc); - } while (desc.count); - dprintk("RPC: tcp_data_recv done\n"); - return len - desc.count; -} - -static void tcp_data_ready(struct sock *sk, int bytes) +void xprt_update_rtt(struct rpc_task *task) { - struct rpc_xprt *xprt; - read_descriptor_t rd_desc; - - read_lock(&sk->sk_callback_lock); - dprintk("RPC: tcp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) { - printk("RPC: tcp_data_ready socket info not found!\n"); - goto out; - } - if (xprt->shutdown) - goto out; - - /* We use rd_desc to pass struct xprt to tcp_data_recv */ - rd_desc.arg.data = xprt; - rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, tcp_data_recv); -out: - read_unlock(&sk->sk_callback_lock); -} - -static void -tcp_state_change(struct sock *sk) -{ - struct rpc_xprt *xprt; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_rtt *rtt = task->tk_client->cl_rtt; + unsigned timer = task->tk_msg.rpc_proc->p_timer; - read_lock(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) - goto out; - dprintk("RPC: tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", - sk->sk_state, xprt_connected(xprt), - sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); - - switch (sk->sk_state) { - case TCP_ESTABLISHED: - spin_lock_bh(&xprt->sock_lock); - if (!xprt_test_and_set_connected(xprt)) { - /* Reset TCP record info */ - xprt->tcp_offset = 0; - xprt->tcp_reclen = 0; - xprt->tcp_copied = 0; - xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; - rpc_wake_up(&xprt->pending); - } - spin_unlock_bh(&xprt->sock_lock); - break; - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - default: - xprt_disconnect(xprt); - break; + if (timer) { + if (req->rq_ntrans == 1) + rpc_update_rtt(rtt, timer, + (long)jiffies - req->rq_xtime); + rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); } - out: - read_unlock(&sk->sk_callback_lock); } -/* - * Called when more output buffer space is available for this socket. - * We try not to wake our writers until they can make "significant" - * progress, otherwise we'll waste resources thrashing sock_sendmsg - * with a bunch of small requests. +/** + * xprt_complete_rqst - called when reply processing is complete + * @task: RPC request that recently completed + * @copied: actual number of bytes received from the transport + * + * Caller holds transport lock. */ -static void -xprt_write_space(struct sock *sk) +void xprt_complete_rqst(struct rpc_task *task, int copied) { - struct rpc_xprt *xprt; - struct socket *sock; - - read_lock(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) - goto out; - if (xprt->shutdown) - goto out; - - /* Wait until we have enough socket memory */ - if (xprt->stream) { - /* from net/core/stream.c:sk_stream_write_space */ - if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) - goto out; - } else { - /* from net/core/sock.c:sock_def_write_space */ - if (!sock_writeable(sk)) - goto out; - } + struct rpc_rqst *req = task->tk_rqstp; - if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) - goto out; + dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", + task->tk_pid, ntohl(req->rq_xid), copied); - spin_lock_bh(&xprt->sock_lock); - if (xprt->snd_task) - rpc_wake_up_task(xprt->snd_task); - spin_unlock_bh(&xprt->sock_lock); -out: - read_unlock(&sk->sk_callback_lock); + list_del_init(&req->rq_list); + req->rq_received = req->rq_private_buf.len = copied; + rpc_wake_up_task(task); } -/* - * RPC receive timeout handler. - */ -static void -xprt_timer(struct rpc_task *task) +static void xprt_timer(struct rpc_task *task) { - struct rpc_rqst *req = task->tk_rqstp; + struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - spin_lock(&xprt->sock_lock); - if (req->rq_received) - goto out; - - xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); - __xprt_put_cong(xprt, req); + dprintk("RPC: %4d xprt_timer\n", task->tk_pid); - dprintk("RPC: %4d xprt_timer (%s request)\n", - task->tk_pid, req ? "pending" : "backlogged"); - - task->tk_status = -ETIMEDOUT; -out: + spin_lock(&xprt->transport_lock); + if (!req->rq_received) { + if (xprt->ops->timer) + xprt->ops->timer(task); + task->tk_status = -ETIMEDOUT; + } task->tk_timeout = 0; rpc_wake_up_task(task); - spin_unlock(&xprt->sock_lock); + spin_unlock(&xprt->transport_lock); } -/* - * Place the actual RPC call. - * We have to copy the iovec because sendmsg fiddles with its contents. +/** + * xprt_prepare_transmit - reserve the transport before sending a request + * @task: RPC task about to send a request + * */ -int -xprt_prepare_transmit(struct rpc_task *task) +int xprt_prepare_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; @@ -1191,12 +690,12 @@ xprt_prepare_transmit(struct rpc_task *task) if (xprt->shutdown) return -EIO; - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); if (req->rq_received && !req->rq_bytes_sent) { err = req->rq_received; goto out_unlock; } - if (!__xprt_lock_write(xprt, task)) { + if (!xprt->ops->reserve_xprt(task)) { err = -EAGAIN; goto out_unlock; } @@ -1206,39 +705,42 @@ xprt_prepare_transmit(struct rpc_task *task) goto out_unlock; } out_unlock: - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); return err; } void -xprt_transmit(struct rpc_task *task) +xprt_abort_transmit(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + xprt_release_write(xprt, task); +} + +/** + * xprt_transmit - send an RPC request on a transport + * @task: controlling RPC task + * + * We have to copy the iovec because sendmsg fiddles with its contents. + */ +void xprt_transmit(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int status, retry = 0; - + int status; dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - /* set up everything as needed. */ - /* Write the record marker */ - if (xprt->stream) { - u32 *marker = req->rq_svec[0].iov_base; - - *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); - } - smp_rmb(); if (!req->rq_received) { if (list_empty(&req->rq_list)) { - spin_lock_bh(&xprt->sock_lock); + spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); /* Add request to the receive list */ list_add_tail(&req->rq_list, &xprt->recv); - spin_unlock_bh(&xprt->sock_lock); + spin_unlock_bh(&xprt->transport_lock); xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); @@ -1246,40 +748,19 @@ xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; - /* Continue transmitting the packet/record. We must be careful - * to cope with writespace callbacks arriving _after_ we have - * called xprt_sendmsg(). - */ - while (1) { - req->rq_xtime = jiffies; - status = xprt_sendmsg(xprt, req); - - if (status < 0) - break; - - if (xprt->stream) { - req->rq_bytes_sent += status; - - /* If we've sent the entire packet, immediately - * reset the count of bytes sent. */ - if (req->rq_bytes_sent >= req->rq_slen) { - req->rq_bytes_sent = 0; - goto out_receive; - } - } else { - if (status >= req->rq_slen) - goto out_receive; - status = -EAGAIN; - break; - } - - dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", - task->tk_pid, req->rq_slen - req->rq_bytes_sent, - req->rq_slen); - - status = -EAGAIN; - if (retry++ > 50) - break; + status = xprt->ops->send_request(task); + if (status == 0) { + dprintk("RPC: %4d xmit complete\n", task->tk_pid); + spin_lock_bh(&xprt->transport_lock); + xprt->ops->set_retrans_timeout(task); + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (!req->rq_received) + rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + xprt->ops->release_xprt(xprt, task); + spin_unlock_bh(&xprt->transport_lock); + return; } /* Note: at this point, task->tk_sleeping has not yet been set, @@ -1289,60 +770,19 @@ xprt_transmit(struct rpc_task *task) task->tk_status = status; switch (status) { - case -EAGAIN: - if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { - /* Protect against races with xprt_write_space */ - spin_lock_bh(&xprt->sock_lock); - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { - task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); - } - spin_unlock_bh(&xprt->sock_lock); - return; - } - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); - return; case -ECONNREFUSED: - task->tk_timeout = RPC_REESTABLISH_TIMEOUT; rpc_sleep_on(&xprt->sending, task, NULL, NULL); + case -EAGAIN: case -ENOTCONN: return; default: - if (xprt->stream) - xprt_disconnect(xprt); + break; } xprt_release_write(xprt, task); return; - out_receive: - dprintk("RPC: %4d xmit complete\n", task->tk_pid); - /* Set the task's receive timeout value */ - spin_lock_bh(&xprt->sock_lock); - if (!xprt->nocong) { - int timer = task->tk_msg.rpc_proc->p_timer; - task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); - task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; - if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) - task->tk_timeout = xprt->timeout.to_maxval; - } else - task->tk_timeout = req->rq_timeout; - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - __xprt_release_write(xprt, task); - spin_unlock_bh(&xprt->sock_lock); } -/* - * Reserve an RPC call slot. - */ -static inline void -do_xprt_reserve(struct rpc_task *task) +static inline void do_xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; @@ -1362,22 +802,25 @@ do_xprt_reserve(struct rpc_task *task) rpc_sleep_on(&xprt->backlog, task, NULL, NULL); } -void -xprt_reserve(struct rpc_task *task) +/** + * xprt_reserve - allocate an RPC request slot + * @task: RPC task requesting a slot allocation + * + * If no more slots are available, place the task on the transport's + * backlog queue. + */ +void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; task->tk_status = -EIO; if (!xprt->shutdown) { - spin_lock(&xprt->xprt_lock); + spin_lock(&xprt->reserve_lock); do_xprt_reserve(task); - spin_unlock(&xprt->xprt_lock); + spin_unlock(&xprt->reserve_lock); } } -/* - * Allocate a 'unique' XID - */ static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) { return xprt->xid++; @@ -1388,11 +831,7 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt) get_random_bytes(&xprt->xid, sizeof(xprt->xid)); } -/* - * Initialize RPC request - */ -static void -xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) +static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; @@ -1400,128 +839,104 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_task = task; req->rq_xprt = xprt; req->rq_xid = xprt_alloc_xid(xprt); + req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, ntohl(req->rq_xid)); } -/* - * Release an RPC call slot +/** + * xprt_release - release an RPC request slot + * @task: task which is finished with the slot + * */ -void -xprt_release(struct rpc_task *task) +void xprt_release(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req; if (!(req = task->tk_rqstp)) return; - spin_lock_bh(&xprt->sock_lock); - __xprt_release_write(xprt, task); - __xprt_put_cong(xprt, req); + spin_lock_bh(&xprt->transport_lock); + xprt->ops->release_xprt(xprt, task); + if (xprt->ops->release_request) + xprt->ops->release_request(task); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); xprt->last_used = jiffies; if (list_empty(&xprt->recv) && !xprt->shutdown) - mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); - spin_unlock_bh(&xprt->sock_lock); + mod_timer(&xprt->timer, + xprt->last_used + xprt->idle_timeout); + spin_unlock_bh(&xprt->transport_lock); task->tk_rqstp = NULL; + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %4d release request %p\n", task->tk_pid, req); - spin_lock(&xprt->xprt_lock); + spin_lock(&xprt->reserve_lock); list_add(&req->rq_list, &xprt->free); - xprt_clear_backlog(xprt); - spin_unlock(&xprt->xprt_lock); -} - -/* - * Set default timeout parameters - */ -static void -xprt_default_timeout(struct rpc_timeout *to, int proto) -{ - if (proto == IPPROTO_UDP) - xprt_set_timeout(to, 5, 5 * HZ); - else - xprt_set_timeout(to, 5, 60 * HZ); + rpc_wake_up_next(&xprt->backlog); + spin_unlock(&xprt->reserve_lock); } -/* - * Set constant timeout +/** + * xprt_set_timeout - set constant RPC timeout + * @to: RPC timeout parameters to set up + * @retr: number of retries + * @incr: amount of increase after each retry + * */ -void -xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) +void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) { to->to_initval = to->to_increment = incr; - to->to_maxval = incr * retr; + to->to_maxval = to->to_initval + (incr * retr); to->to_retries = retr; to->to_exponential = 0; } -unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; - -/* - * Initialize an RPC client - */ -static struct rpc_xprt * -xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) +static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) { + int result; struct rpc_xprt *xprt; - unsigned int entries; - size_t slot_table_size; struct rpc_rqst *req; - dprintk("RPC: setting up %s transport...\n", - proto == IPPROTO_UDP? "UDP" : "TCP"); - - entries = (proto == IPPROTO_TCP)? - xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; - if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) return ERR_PTR(-ENOMEM); memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ - xprt->max_reqs = entries; - slot_table_size = entries * sizeof(xprt->slot[0]); - xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - return ERR_PTR(-ENOMEM); - } - memset(xprt->slot, 0, slot_table_size); xprt->addr = *ap; - xprt->prot = proto; - xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; - if (xprt->stream) { - xprt->cwnd = RPC_MAXCWND(xprt); - xprt->nocong = 1; - xprt->max_payload = (1U << 31) - 1; - } else { - xprt->cwnd = RPC_INITCWND; - xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); + + switch (proto) { + case IPPROTO_UDP: + result = xs_setup_udp(xprt, to); + break; + case IPPROTO_TCP: + result = xs_setup_tcp(xprt, to); + break; + default: + printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", + proto); + result = -EIO; + break; + } + if (result) { + kfree(xprt); + return ERR_PTR(result); } - spin_lock_init(&xprt->sock_lock); - spin_lock_init(&xprt->xprt_lock); - init_waitqueue_head(&xprt->cong_wait); + + spin_lock_init(&xprt->transport_lock); + spin_lock_init(&xprt->reserve_lock); INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); - INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); - INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); init_timer(&xprt->timer); xprt->timer.function = xprt_init_autodisconnect; xprt->timer.data = (unsigned long) xprt; xprt->last_used = jiffies; - xprt->port = XPRT_MAX_RESVPORT; - - /* Set timeout parameters */ - if (to) { - xprt->timeout = *to; - } else - xprt_default_timeout(&xprt->timeout, xprt->prot); + xprt->cwnd = RPC_INITCWND; rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); @@ -1529,139 +944,25 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); /* initialize free list */ - for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) + for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) list_add(&req->rq_list, &xprt->free); xprt_init_xid(xprt); - /* Check whether we want to use a reserved port */ - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; - dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); return xprt; } -/* - * Bind to a reserved port - */ -static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) -{ - struct sockaddr_in myaddr = { - .sin_family = AF_INET, - }; - int err, port; - - /* Were we already bound to a given port? Try to reuse it */ - port = xprt->port; - do { - myaddr.sin_port = htons(port); - err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); - if (err == 0) { - xprt->port = port; - return 0; - } - if (--port == 0) - port = XPRT_MAX_RESVPORT; - } while (err == -EADDRINUSE && port != xprt->port); - - printk("RPC: Can't bind to reserved port (%d).\n", -err); - return err; -} - -static void -xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (xprt->inet) - return; - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_user_data = xprt; - xprt->old_data_ready = sk->sk_data_ready; - xprt->old_state_change = sk->sk_state_change; - xprt->old_write_space = sk->sk_write_space; - if (xprt->prot == IPPROTO_UDP) { - sk->sk_data_ready = udp_data_ready; - sk->sk_no_check = UDP_CSUM_NORCV; - xprt_set_connected(xprt); - } else { - tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ - sk->sk_data_ready = tcp_data_ready; - sk->sk_state_change = tcp_state_change; - xprt_clear_connected(xprt); - } - sk->sk_write_space = xprt_write_space; - - /* Reset to new socket */ - xprt->sock = sock; - xprt->inet = sk; - write_unlock_bh(&sk->sk_callback_lock); - - return; -} - -/* - * Set socket buffer length - */ -void -xprt_sock_setbufsize(struct rpc_xprt *xprt) -{ - struct sock *sk = xprt->inet; - - if (xprt->stream) - return; - if (xprt->rcvsize) { - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; - } - if (xprt->sndsize) { - sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; - sk->sk_write_space(sk); - } -} - -/* - * Datastream sockets are created here, but xprt_connect will create - * and connect stream sockets. - */ -static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) -{ - struct socket *sock; - int type, err; - - dprintk("RPC: xprt_create_socket(%s %d)\n", - (proto == IPPROTO_UDP)? "udp" : "tcp", proto); - - type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; - - if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { - printk("RPC: can't create socket (%d).\n", -err); - return NULL; - } - - /* If the caller has the capability, bind to a reserved port */ - if (resvport && xprt_bindresvport(xprt, sock) < 0) { - printk("RPC: can't bind to reserved port.\n"); - goto failed; - } - - return sock; - -failed: - sock_release(sock); - return NULL; -} - -/* - * Create an RPC client transport given the protocol and peer address. +/** + * xprt_create_proto - create an RPC client transport + * @proto: requested transport protocol + * @sap: remote peer's address + * @to: timeout parameters for new transport + * */ -struct rpc_xprt * -xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) +struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) { struct rpc_xprt *xprt; @@ -1673,46 +974,26 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) return xprt; } -/* - * Prepare for transport shutdown. - */ -static void -xprt_shutdown(struct rpc_xprt *xprt) +static void xprt_shutdown(struct rpc_xprt *xprt) { xprt->shutdown = 1; rpc_wake_up(&xprt->sending); rpc_wake_up(&xprt->resend); - rpc_wake_up(&xprt->pending); + xprt_wake_pending_tasks(xprt, -EIO); rpc_wake_up(&xprt->backlog); - wake_up(&xprt->cong_wait); del_timer_sync(&xprt->timer); - - /* synchronously wait for connect worker to finish */ - cancel_delayed_work(&xprt->sock_connect); - flush_scheduled_work(); } -/* - * Clear the xprt backlog queue - */ -static int -xprt_clear_backlog(struct rpc_xprt *xprt) { - rpc_wake_up_next(&xprt->backlog); - wake_up(&xprt->cong_wait); - return 1; -} - -/* - * Destroy an RPC transport, killing off all requests. +/** + * xprt_destroy - destroy an RPC transport, killing off all requests. + * @xprt: transport to destroy + * */ -int -xprt_destroy(struct rpc_xprt *xprt) +int xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); xprt_shutdown(xprt); - xprt_disconnect(xprt); - xprt_close(xprt); - kfree(xprt->slot); + xprt->ops->destroy(xprt); kfree(xprt); return 0; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c new file mode 100644 index 00000000000..2e1529217e6 --- /dev/null +++ b/net/sunrpc/xprtsock.c @@ -0,0 +1,1252 @@ +/* + * linux/net/sunrpc/xprtsock.c + * + * Client-side transport implementation for sockets. + * + * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> + * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> + * TCP NFS related read + write fixes + * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> + * + * Rewrite of larges part of the code in order to stabilize TCP stuff. + * Fix behaviour when socket buffer is full. + * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> + * + * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/pagemap.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/net.h> +#include <linux/mm.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/sunrpc/clnt.h> +#include <linux/file.h> + +#include <net/sock.h> +#include <net/checksum.h> +#include <net/udp.h> +#include <net/tcp.h> + +/* + * How many times to try sending a request on a socket before waiting + * for the socket buffer to clear. + */ +#define XS_SENDMSG_RETRY (10U) + +/* + * Time out for an RPC UDP socket connect. UDP socket connects are + * synchronous, but we set a timeout anyway in case of resource + * exhaustion on the local host. + */ +#define XS_UDP_CONN_TO (5U * HZ) + +/* + * Wait duration for an RPC TCP connection to be established. Solaris + * NFS over TCP uses 60 seconds, for example, which is in line with how + * long a server takes to reboot. + */ +#define XS_TCP_CONN_TO (60U * HZ) + +/* + * Wait duration for a reply from the RPC portmapper. + */ +#define XS_BIND_TO (60U * HZ) + +/* + * Delay if a UDP socket connect error occurs. This is most likely some + * kind of resource problem on the local host. + */ +#define XS_UDP_REEST_TO (2U * HZ) + +/* + * The reestablish timeout allows clients to delay for a bit before attempting + * to reconnect to a server that just dropped our connection. + * + * We implement an exponential backoff when trying to reestablish a TCP + * transport connection with the server. Some servers like to drop a TCP + * connection when they are overworked, so we start with a short timeout and + * increase over time if the server is down or not responding. + */ +#define XS_TCP_INIT_REEST_TO (3U * HZ) +#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) + +/* + * TCP idle timeout; client drops the transport socket if it is idle + * for this long. Note that we also timeout UDP sockets to prevent + * holding port numbers when there is no RPC traffic. + */ +#define XS_IDLE_DISC_TO (5U * 60 * HZ) + +#ifdef RPC_DEBUG +# undef RPC_DEBUG_DATA +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +#ifdef RPC_DEBUG_DATA +static void xs_pktdump(char *msg, u32 *packet, unsigned int count) +{ + u8 *buf = (u8 *) packet; + int j; + + dprintk("RPC: %s\n", msg); + for (j = 0; j < count && j < 128; j += 4) { + if (!(j & 31)) { + if (j) + dprintk("\n"); + dprintk("0x%04x ", j); + } + dprintk("%02x%02x%02x%02x ", + buf[j], buf[j+1], buf[j+2], buf[j+3]); + } + dprintk("\n"); +} +#else +static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) +{ + /* NOP */ +} +#endif + +#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) + +static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) +{ + struct kvec iov = { + .iov_base = xdr->head[0].iov_base + base, + .iov_len = len - base, + }; + struct msghdr msg = { + .msg_name = addr, + .msg_namelen = addrlen, + .msg_flags = XS_SENDMSG_FLAGS, + }; + + if (xdr->len > len) + msg.msg_flags |= MSG_MORE; + + if (likely(iov.iov_len)) + return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); + return kernel_sendmsg(sock, &msg, NULL, 0, 0); +} + +static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) +{ + struct kvec iov = { + .iov_base = xdr->tail[0].iov_base + base, + .iov_len = len - base, + }; + struct msghdr msg = { + .msg_flags = XS_SENDMSG_FLAGS, + }; + + return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); +} + +/** + * xs_sendpages - write pages directly to a socket + * @sock: socket to send on + * @addr: UDP only -- address of destination + * @addrlen: UDP only -- length of destination address + * @xdr: buffer containing this request + * @base: starting position in the buffer + * + */ +static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) +{ + struct page **ppage = xdr->pages; + unsigned int len, pglen = xdr->page_len; + int err, ret = 0; + ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); + + if (unlikely(!sock)) + return -ENOTCONN; + + clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + + len = xdr->head[0].iov_len; + if (base < len || (addr != NULL && base == 0)) { + err = xs_send_head(sock, addr, addrlen, xdr, base, len); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + if (err != (len - base)) + goto out; + base = 0; + } else + base -= len; + + if (unlikely(pglen == 0)) + goto copy_tail; + if (unlikely(base >= pglen)) { + base -= pglen; + goto copy_tail; + } + if (base || xdr->page_base) { + pglen -= base; + base += xdr->page_base; + ppage += base >> PAGE_CACHE_SHIFT; + base &= ~PAGE_CACHE_MASK; + } + + sendpage = sock->ops->sendpage ? : sock_no_sendpage; + do { + int flags = XS_SENDMSG_FLAGS; + + len = PAGE_CACHE_SIZE; + if (base) + len -= base; + if (pglen < len) + len = pglen; + + if (pglen != len || xdr->tail[0].iov_len != 0) + flags |= MSG_MORE; + + /* Hmm... We might be dealing with highmem pages */ + if (PageHighMem(*ppage)) + sendpage = sock_no_sendpage; + err = sendpage(sock, *ppage, base, len, flags); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + if (err != len) + goto out; + base = 0; + ppage++; + } while ((pglen -= len) != 0); +copy_tail: + len = xdr->tail[0].iov_len; + if (base < len) { + err = xs_send_tail(sock, xdr, base, len); + if (ret == 0) + ret = err; + else if (err > 0) + ret += err; + } +out: + return ret; +} + +/** + * xs_nospace - place task on wait queue if transmit was incomplete + * @task: task to put to sleep + * + */ +static void xs_nospace(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + + dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", + task->tk_pid, req->rq_slen - req->rq_bytes_sent, + req->rq_slen); + + if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { + /* Protect against races with write_space */ + spin_lock_bh(&xprt->transport_lock); + + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) + xprt_wait_for_buffer_space(task); + + spin_unlock_bh(&xprt->transport_lock); + } else + /* Keep holding the socket if it is blocked */ + rpc_delay(task, HZ>>4); +} + +/** + * xs_udp_send_request - write an RPC request to a UDP socket + * @task: address of RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + */ +static int xs_udp_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct xdr_buf *xdr = &req->rq_snd_buf; + int status; + + xs_pktdump("packet data:", + req->rq_svec->iov_base, + req->rq_svec->iov_len); + + req->rq_xtime = jiffies; + status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, + sizeof(xprt->addr), xdr, req->rq_bytes_sent); + + dprintk("RPC: xs_udp_send_request(%u) = %d\n", + xdr->len - req->rq_bytes_sent, status); + + if (likely(status >= (int) req->rq_slen)) + return 0; + + /* Still some bytes left; set up for a retry later. */ + if (status > 0) + status = -EAGAIN; + + switch (status) { + case -ENETUNREACH: + case -EPIPE: + case -ECONNREFUSED: + /* When the server has died, an ICMP port unreachable message + * prompts ECONNREFUSED. */ + break; + case -EAGAIN: + xs_nospace(task); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + break; + } + + return status; +} + +static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); +} + +/** + * xs_tcp_send_request - write an RPC request to a TCP socket + * @task: address of RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + * + * XXX: In the case of soft timeouts, should we eventually give up + * if sendmsg is not able to make progress? + */ +static int xs_tcp_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct xdr_buf *xdr = &req->rq_snd_buf; + int status, retry = 0; + + xs_encode_tcp_record_marker(&req->rq_snd_buf); + + xs_pktdump("packet data:", + req->rq_svec->iov_base, + req->rq_svec->iov_len); + + /* Continue transmitting the packet/record. We must be careful + * to cope with writespace callbacks arriving _after_ we have + * called sendmsg(). */ + while (1) { + req->rq_xtime = jiffies; + status = xs_sendpages(xprt->sock, NULL, 0, xdr, + req->rq_bytes_sent); + + dprintk("RPC: xs_tcp_send_request(%u) = %d\n", + xdr->len - req->rq_bytes_sent, status); + + if (unlikely(status < 0)) + break; + + /* If we've sent the entire packet, immediately + * reset the count of bytes sent. */ + req->rq_bytes_sent += status; + if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_bytes_sent = 0; + return 0; + } + + status = -EAGAIN; + if (retry++ > XS_SENDMSG_RETRY) + break; + } + + switch (status) { + case -EAGAIN: + xs_nospace(task); + break; + case -ECONNREFUSED: + case -ECONNRESET: + case -ENOTCONN: + case -EPIPE: + status = -ENOTCONN; + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + xprt_disconnect(xprt); + break; + } + + return status; +} + +/** + * xs_close - close a socket + * @xprt: transport + * + * This is used when all requests are complete; ie, no DRC state remains + * on the server we want to save. + */ +static void xs_close(struct rpc_xprt *xprt) +{ + struct socket *sock = xprt->sock; + struct sock *sk = xprt->inet; + + if (!sk) + return; + + dprintk("RPC: xs_close xprt %p\n", xprt); + + write_lock_bh(&sk->sk_callback_lock); + xprt->inet = NULL; + xprt->sock = NULL; + + sk->sk_user_data = NULL; + sk->sk_data_ready = xprt->old_data_ready; + sk->sk_state_change = xprt->old_state_change; + sk->sk_write_space = xprt->old_write_space; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_no_check = 0; + + sock_release(sock); +} + +/** + * xs_destroy - prepare to shutdown a transport + * @xprt: doomed transport + * + */ +static void xs_destroy(struct rpc_xprt *xprt) +{ + dprintk("RPC: xs_destroy xprt %p\n", xprt); + + cancel_delayed_work(&xprt->connect_worker); + flush_scheduled_work(); + + xprt_disconnect(xprt); + xs_close(xprt); + kfree(xprt->slot); +} + +static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) +{ + return (struct rpc_xprt *) sk->sk_user_data; +} + +/** + * xs_udp_data_ready - "data ready" callback for UDP sockets + * @sk: socket with data to read + * @len: how much data to read + * + */ +static void xs_udp_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid, *xp; + + read_lock(&sk->sk_callback_lock); + dprintk("RPC: xs_udp_data_ready...\n"); + if (!(xprt = xprt_from_sock(sk))) + goto out; + + if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(struct udphdr); + if (repsize < 4) { + dprintk("RPC: impossible RPC reply size %d!\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(struct udphdr), + sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + if ((copied = rovr->rq_private_buf.buflen) > repsize) + copied = repsize; + + /* Suck it into the iovec, verify checksum if not done by hw. */ + if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) + goto out_unlock; + + /* Something worked... */ + dst_confirm(skb->dst); + + xprt_adjust_cwnd(task, copied); + xprt_update_rtt(task); + xprt_complete_rqst(task, copied); + + out_unlock: + spin_unlock(&xprt->transport_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock(&sk->sk_callback_lock); +} + +static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) +{ + if (len > desc->count) + len = desc->count; + if (skb_copy_bits(desc->skb, desc->offset, p, len)) { + dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", + len, desc->count); + return 0; + } + desc->offset += len; + desc->count -= len; + dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", + len, desc->count); + return len; +} + +static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + size_t len, used; + char *p; + + p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; + len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; + used = xs_tcp_copy_data(desc, p, len); + xprt->tcp_offset += used; + if (used != len) + return; + + xprt->tcp_reclen = ntohl(xprt->tcp_recm); + if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) + xprt->tcp_flags |= XPRT_LAST_FRAG; + else + xprt->tcp_flags &= ~XPRT_LAST_FRAG; + xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; + + xprt->tcp_flags &= ~XPRT_COPY_RECM; + xprt->tcp_offset = 0; + + /* Sanity check of the record length */ + if (unlikely(xprt->tcp_reclen < 4)) { + dprintk("RPC: invalid TCP record fragment length\n"); + xprt_disconnect(xprt); + return; + } + dprintk("RPC: reading TCP record fragment of length %d\n", + xprt->tcp_reclen); +} + +static void xs_tcp_check_recm(struct rpc_xprt *xprt) +{ + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); + if (xprt->tcp_offset == xprt->tcp_reclen) { + xprt->tcp_flags |= XPRT_COPY_RECM; + xprt->tcp_offset = 0; + if (xprt->tcp_flags & XPRT_LAST_FRAG) { + xprt->tcp_flags &= ~XPRT_COPY_DATA; + xprt->tcp_flags |= XPRT_COPY_XID; + xprt->tcp_copied = 0; + } + } +} + +static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + size_t len, used; + char *p; + + len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; + dprintk("RPC: reading XID (%Zu bytes)\n", len); + p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; + used = xs_tcp_copy_data(desc, p, len); + xprt->tcp_offset += used; + if (used != len) + return; + xprt->tcp_flags &= ~XPRT_COPY_XID; + xprt->tcp_flags |= XPRT_COPY_DATA; + xprt->tcp_copied = 4; + dprintk("RPC: reading reply for XID %08x\n", + ntohl(xprt->tcp_xid)); + xs_tcp_check_recm(xprt); +} + +static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + struct rpc_rqst *req; + struct xdr_buf *rcvbuf; + size_t len; + ssize_t r; + + /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_rqst(xprt, xprt->tcp_xid); + if (!req) { + xprt->tcp_flags &= ~XPRT_COPY_DATA; + dprintk("RPC: XID %08x request not found!\n", + ntohl(xprt->tcp_xid)); + spin_unlock(&xprt->transport_lock); + return; + } + + rcvbuf = &req->rq_private_buf; + len = desc->count; + if (len > xprt->tcp_reclen - xprt->tcp_offset) { + skb_reader_t my_desc; + + len = xprt->tcp_reclen - xprt->tcp_offset; + memcpy(&my_desc, desc, sizeof(my_desc)); + my_desc.count = len; + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + &my_desc, xs_tcp_copy_data); + desc->count -= r; + desc->offset += r; + } else + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + desc, xs_tcp_copy_data); + + if (r > 0) { + xprt->tcp_copied += r; + xprt->tcp_offset += r; + } + if (r != len) { + /* Error when copying to the receive buffer, + * usually because we weren't able to allocate + * additional buffer pages. All we can do now + * is turn off XPRT_COPY_DATA, so the request + * will not receive any additional updates, + * and time out. + * Any remaining data from this record will + * be discarded. + */ + xprt->tcp_flags &= ~XPRT_COPY_DATA; + dprintk("RPC: XID %08x truncated request\n", + ntohl(xprt->tcp_xid)); + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + goto out; + } + + dprintk("RPC: XID %08x read %Zd bytes\n", + ntohl(xprt->tcp_xid), r); + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + + if (xprt->tcp_copied == req->rq_private_buf.buflen) + xprt->tcp_flags &= ~XPRT_COPY_DATA; + else if (xprt->tcp_offset == xprt->tcp_reclen) { + if (xprt->tcp_flags & XPRT_LAST_FRAG) + xprt->tcp_flags &= ~XPRT_COPY_DATA; + } + +out: + if (!(xprt->tcp_flags & XPRT_COPY_DATA)) + xprt_complete_rqst(req->rq_task, xprt->tcp_copied); + spin_unlock(&xprt->transport_lock); + xs_tcp_check_recm(xprt); +} + +static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) +{ + size_t len; + + len = xprt->tcp_reclen - xprt->tcp_offset; + if (len > desc->count) + len = desc->count; + desc->count -= len; + desc->offset += len; + xprt->tcp_offset += len; + dprintk("RPC: discarded %Zu bytes\n", len); + xs_tcp_check_recm(xprt); +} + +static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) +{ + struct rpc_xprt *xprt = rd_desc->arg.data; + skb_reader_t desc = { + .skb = skb, + .offset = offset, + .count = len, + .csum = 0 + }; + + dprintk("RPC: xs_tcp_data_recv started\n"); + do { + /* Read in a new fragment marker if necessary */ + /* Can we ever really expect to get completely empty fragments? */ + if (xprt->tcp_flags & XPRT_COPY_RECM) { + xs_tcp_read_fraghdr(xprt, &desc); + continue; + } + /* Read in the xid if necessary */ + if (xprt->tcp_flags & XPRT_COPY_XID) { + xs_tcp_read_xid(xprt, &desc); + continue; + } + /* Read in the request data */ + if (xprt->tcp_flags & XPRT_COPY_DATA) { + xs_tcp_read_request(xprt, &desc); + continue; + } + /* Skip over any trailing bytes on short reads */ + xs_tcp_read_discard(xprt, &desc); + } while (desc.count); + dprintk("RPC: xs_tcp_data_recv done\n"); + return len - desc.count; +} + +/** + * xs_tcp_data_ready - "data ready" callback for TCP sockets + * @sk: socket with data to read + * @bytes: how much data to read + * + */ +static void xs_tcp_data_ready(struct sock *sk, int bytes) +{ + struct rpc_xprt *xprt; + read_descriptor_t rd_desc; + + read_lock(&sk->sk_callback_lock); + dprintk("RPC: xs_tcp_data_ready...\n"); + if (!(xprt = xprt_from_sock(sk))) + goto out; + if (xprt->shutdown) + goto out; + + /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ + rd_desc.arg.data = xprt; + rd_desc.count = 65536; + tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); +out: + read_unlock(&sk->sk_callback_lock); +} + +/** + * xs_tcp_state_change - callback to handle TCP socket state changes + * @sk: socket whose state has changed + * + */ +static void xs_tcp_state_change(struct sock *sk) +{ + struct rpc_xprt *xprt; + + read_lock(&sk->sk_callback_lock); + if (!(xprt = xprt_from_sock(sk))) + goto out; + dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); + dprintk("RPC: state %x conn %d dead %d zapped %d\n", + sk->sk_state, xprt_connected(xprt), + sock_flag(sk, SOCK_DEAD), + sock_flag(sk, SOCK_ZAPPED)); + + switch (sk->sk_state) { + case TCP_ESTABLISHED: + spin_lock_bh(&xprt->transport_lock); + if (!xprt_test_and_set_connected(xprt)) { + /* Reset TCP record info */ + xprt->tcp_offset = 0; + xprt->tcp_reclen = 0; + xprt->tcp_copied = 0; + xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt_wake_pending_tasks(xprt, 0); + } + spin_unlock_bh(&xprt->transport_lock); + break; + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + default: + xprt_disconnect(xprt); + break; + } + out: + read_unlock(&sk->sk_callback_lock); +} + +/** + * xs_udp_write_space - callback invoked when socket buffer space + * becomes available + * @sk: socket whose state has changed + * + * Called when more output buffer space is available for this socket. + * We try not to wake our writers until they can make "significant" + * progress, otherwise we'll waste resources thrashing kernel_sendmsg + * with a bunch of small requests. + */ +static void xs_udp_write_space(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + + /* from net/core/sock.c:sock_def_write_space */ + if (sock_writeable(sk)) { + struct socket *sock; + struct rpc_xprt *xprt; + + if (unlikely(!(sock = sk->sk_socket))) + goto out; + if (unlikely(!(xprt = xprt_from_sock(sk)))) + goto out; + if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + goto out; + + xprt_write_space(xprt); + } + + out: + read_unlock(&sk->sk_callback_lock); +} + +/** + * xs_tcp_write_space - callback invoked when socket buffer space + * becomes available + * @sk: socket whose state has changed + * + * Called when more output buffer space is available for this socket. + * We try not to wake our writers until they can make "significant" + * progress, otherwise we'll waste resources thrashing kernel_sendmsg + * with a bunch of small requests. + */ +static void xs_tcp_write_space(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + + /* from net/core/stream.c:sk_stream_write_space */ + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + struct socket *sock; + struct rpc_xprt *xprt; + + if (unlikely(!(sock = sk->sk_socket))) + goto out; + if (unlikely(!(xprt = xprt_from_sock(sk)))) + goto out; + if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + goto out; + + xprt_write_space(xprt); + } + + out: + read_unlock(&sk->sk_callback_lock); +} + +static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) +{ + struct sock *sk = xprt->inet; + + if (xprt->rcvsize) { + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; + } + if (xprt->sndsize) { + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; + sk->sk_write_space(sk); + } +} + +/** + * xs_udp_set_buffer_size - set send and receive limits + * @xprt: generic transport + * @sndsize: requested size of send buffer, in bytes + * @rcvsize: requested size of receive buffer, in bytes + * + * Set socket send and receive buffer size limits. + */ +static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) +{ + xprt->sndsize = 0; + if (sndsize) + xprt->sndsize = sndsize + 1024; + xprt->rcvsize = 0; + if (rcvsize) + xprt->rcvsize = rcvsize + 1024; + + xs_udp_do_set_buffer_size(xprt); +} + +/** + * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport + * @task: task that timed out + * + * Adjust the congestion window after a retransmit timeout has occurred. + */ +static void xs_udp_timer(struct rpc_task *task) +{ + xprt_adjust_cwnd(task, -ETIMEDOUT); +} + +static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; + int err; + unsigned short port = xprt->port; + + do { + myaddr.sin_port = htons(port); + err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (err == 0) { + xprt->port = port; + dprintk("RPC: xs_bindresvport bound to port %u\n", + port); + return 0; + } + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; + } while (err == -EADDRINUSE && port != xprt->port); + + dprintk("RPC: can't bind to reserved port (%d).\n", -err); + return err; +} + +/** + * xs_udp_connect_worker - set up a UDP socket + * @args: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_udp_connect_worker(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *) args; + struct socket *sock = xprt->sock; + int err, status = -EIO; + + if (xprt->shutdown || xprt->addr.sin_port == 0) + goto out; + + dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); + + /* Start by resetting any existing state */ + xs_close(xprt); + + if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + + if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { + sock_release(sock); + goto out; + } + + if (!xprt->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + xprt->old_data_ready = sk->sk_data_ready; + xprt->old_state_change = sk->sk_state_change; + xprt->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + + xprt_set_connected(xprt); + + /* Reset to new socket */ + xprt->sock = sock; + xprt->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + xs_udp_do_set_buffer_size(xprt); + status = 0; +out: + xprt_wake_pending_tasks(xprt, status); + xprt_clear_connecting(xprt); +} + +/* + * We need to preserve the port number so the reply cache on the server can + * find our cached RPC replies when we get around to reconnecting. + */ +static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) +{ + int result; + struct socket *sock = xprt->sock; + struct sockaddr any; + + dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); + + /* + * Disconnect the transport socket by doing a connect operation + * with AF_UNSPEC. This should return immediately... + */ + memset(&any, 0, sizeof(any)); + any.sa_family = AF_UNSPEC; + result = sock->ops->connect(sock, &any, sizeof(any), 0); + if (result) + dprintk("RPC: AF_UNSPEC connect return code %d\n", + result); +} + +/** + * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint + * @args: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + struct socket *sock = xprt->sock; + int err, status = -EIO; + + if (xprt->shutdown || xprt->addr.sin_port == 0) + goto out; + + dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); + + if (!xprt->sock) { + /* start from scratch */ + if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + goto out; + } + + if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { + sock_release(sock); + goto out; + } + } else + /* "close" the socket, preserving the local port */ + xs_tcp_reuse_connection(xprt); + + if (!xprt->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + xprt->old_data_ready = sk->sk_data_ready; + xprt->old_state_change = sk->sk_state_change; + xprt->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + xprt->sock = sock; + xprt->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, + sizeof(xprt->addr), O_NONBLOCK); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_close(xprt); + break; + } + } +out: + xprt_wake_pending_tasks(xprt, status); +out_clear: + xprt_clear_connecting(xprt); +} + +/** + * xs_connect - connect a socket to a remote endpoint + * @task: address of RPC task that manages state of connect request + * + * TCP: If the remote end dropped the connection, delay reconnecting. + * + * UDP socket connects are synchronous, but we use a work queue anyway + * to guarantee that even unprivileged user processes can set up a + * socket on a privileged port. + * + * If a UDP socket connect fails, the delay behavior here prevents + * retry floods (hard mounts). + */ +static void xs_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + if (xprt_test_and_set_connecting(xprt)) + return; + + if (xprt->sock != NULL) { + dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", + xprt, xprt->reestablish_timeout / HZ); + schedule_delayed_work(&xprt->connect_worker, + xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) + xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; + } else { + dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); + schedule_work(&xprt->connect_worker); + + /* flush_scheduled_work can sleep... */ + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } +} + +static struct rpc_xprt_ops xs_udp_ops = { + .set_buffer_size = xs_udp_set_buffer_size, + .reserve_xprt = xprt_reserve_xprt_cong, + .release_xprt = xprt_release_xprt_cong, + .connect = xs_connect, + .send_request = xs_udp_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_rtt, + .timer = xs_udp_timer, + .release_request = xprt_release_rqst_cong, + .close = xs_close, + .destroy = xs_destroy, +}; + +static struct rpc_xprt_ops xs_tcp_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xprt_release_xprt, + .connect = xs_connect, + .send_request = xs_tcp_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xs_close, + .destroy = xs_destroy, +}; + +/** + * xs_setup_udp - Set up transport to use a UDP socket + * @xprt: transport to set up + * @to: timeout parameters + * + */ +int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) +{ + size_t slot_table_size; + + dprintk("RPC: setting up udp-ipv4 transport...\n"); + + xprt->max_reqs = xprt_udp_slot_table_entries; + slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); + xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); + if (xprt->slot == NULL) + return -ENOMEM; + memset(xprt->slot, 0, slot_table_size); + + xprt->prot = IPPROTO_UDP; + xprt->port = xprt_max_resvport; + xprt->tsh_size = 0; + xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; + /* XXX: header size can vary due to auth type, IPv6, etc. */ + xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); + + INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); + xprt->bind_timeout = XS_BIND_TO; + xprt->connect_timeout = XS_UDP_CONN_TO; + xprt->reestablish_timeout = XS_UDP_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_udp_ops; + + if (to) + xprt->timeout = *to; + else + xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + + return 0; +} + +/** + * xs_setup_tcp - Set up transport to use a TCP socket + * @xprt: transport to set up + * @to: timeout parameters + * + */ +int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) +{ + size_t slot_table_size; + + dprintk("RPC: setting up tcp-ipv4 transport...\n"); + + xprt->max_reqs = xprt_tcp_slot_table_entries; + slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); + xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); + if (xprt->slot == NULL) + return -ENOMEM; + memset(xprt->slot, 0, slot_table_size); + + xprt->prot = IPPROTO_TCP; + xprt->port = xprt_max_resvport; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); + xprt->bind_timeout = XS_BIND_TO; + xprt->connect_timeout = XS_TCP_CONN_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_tcp_ops; + + if (to) + xprt->timeout = *to; + else + xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + + return 0; +} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cbb0ba34a60..0db9e57013f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1192,46 +1192,6 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) EXPORT_SYMBOL(xfrm_bundle_ok); -/* Well... that's _TASK_. We need to scan through transformation - * list and figure out what mss tcp should generate in order to - * final datagram fit to mtu. Mama mia... :-) - * - * Apparently, some easy way exists, but we used to choose the most - * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. - * - * Consider this function as something like dark humour. :-) - */ -static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) -{ - int res = mtu - dst->header_len; - - for (;;) { - struct dst_entry *d = dst; - int m = res; - - do { - struct xfrm_state *x = d->xfrm; - if (x) { - spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_VALID && - x->type && x->type->get_max_size) - m = x->type->get_max_size(d->xfrm, m); - else - m += x->props.header_len; - spin_unlock_bh(&x->lock); - } - } while ((d = d->child) != NULL); - - if (m <= mtu) - break; - res -= (m - mtu); - if (res < 88) - return mtu; - } - - return res + dst->header_len; -} - int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; @@ -1252,8 +1212,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; - if (likely(dst_ops->get_mss == NULL)) - dst_ops->get_mss = xfrm_get_mss; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = __xfrm_garbage_collect; xfrm_policy_afinfo[afinfo->family] = afinfo; @@ -1281,7 +1239,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->check = NULL; dst_ops->negative_advice = NULL; dst_ops->link_failure = NULL; - dst_ops->get_mss = NULL; afinfo->garbage_collect = NULL; } } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9d206c282cf..8b9a4747417 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,6 +1026,12 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); +/* + * This function is NOT optimal. For example, with ESP it will give an + * MTU that's usually two bytes short of being optimal. However, it will + * usually give an answer that's a multiple of 4 provided the input is + * also a multiple of 4. + */ int xfrm_state_mtu(struct xfrm_state *x, int mtu) { int res = mtu; |